diff --git a/Add-riscv64-support.patch b/Add-riscv64-support.patch index 59017ae02c5b6c185a441f428acd08dfc203eb6e..b06b0fe866fb0ae52a579323d4ef4e80effd0c9b 100644 --- a/Add-riscv64-support.patch +++ b/Add-riscv64-support.patch @@ -1,346 +1,129 @@ -From dfa792539047c39d0d25244265bc8368163d5768 Mon Sep 17 00:00:00 2001 -From: Fei Yang -Date: Thu, 24 Mar 2022 09:22:46 +0000 -Subject: [PATCH 001/140] Cherry-picked JDK-8276799: initial load of RISC-V - backend (cannot pass compilation) - ---- - make/autoconf/build-aux/config.guess | 2 +- - make/autoconf/hotspot.m4 | 3 +- - make/autoconf/libraries.m4 | 8 +- - make/autoconf/platform.m4 | 6 +- - make/hotspot/gensrc/GensrcAdlc.gmk | 9 +- - .../cpu/aarch64/c1_LIRAssembler_aarch64.cpp | 6 +- - src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp | 7 +- - src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp | 8 +- - .../cpu/riscv/abstractInterpreter_riscv.cpp | 177 + - src/hotspot/cpu/riscv/assembler_riscv.cpp | 372 + - src/hotspot/cpu/riscv/assembler_riscv.hpp | 3047 +++++ - .../cpu/riscv/assembler_riscv.inline.hpp | 47 + - src/hotspot/cpu/riscv/bytes_riscv.hpp | 167 + - src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp | 353 + - src/hotspot/cpu/riscv/c1_Defs_riscv.hpp | 84 + - .../cpu/riscv/c1_FpuStackSim_riscv.cpp | 30 + - .../cpu/riscv/c1_FpuStackSim_riscv.hpp | 32 + - src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp | 388 + - src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp | 148 + - .../cpu/riscv/c1_LIRAssembler_arith_riscv.cpp | 281 + - .../cpu/riscv/c1_LIRAssembler_arith_riscv.hpp | 37 + - .../riscv/c1_LIRAssembler_arraycopy_riscv.cpp | 388 + - .../riscv/c1_LIRAssembler_arraycopy_riscv.hpp | 52 + - .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 2267 ++++ - .../cpu/riscv/c1_LIRAssembler_riscv.hpp | 132 + - .../cpu/riscv/c1_LIRGenerator_riscv.cpp | 1075 ++ - src/hotspot/cpu/riscv/c1_LIR_riscv.cpp | 55 + - src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp | 33 + - src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp | 83 + - .../cpu/riscv/c1_MacroAssembler_riscv.cpp | 432 + - .../cpu/riscv/c1_MacroAssembler_riscv.hpp | 120 + - src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp | 1172 ++ - src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 65 + - .../cpu/riscv/c2_MacroAssembler_riscv.cpp | 1646 +++ - .../cpu/riscv/c2_MacroAssembler_riscv.hpp | 193 + - src/hotspot/cpu/riscv/c2_globals_riscv.hpp | 83 + - src/hotspot/cpu/riscv/c2_init_riscv.cpp | 38 + - .../riscv/c2_safepointPollStubTable_riscv.cpp | 47 + - src/hotspot/cpu/riscv/codeBuffer_riscv.hpp | 36 + - src/hotspot/cpu/riscv/compiledIC_riscv.cpp | 149 + - src/hotspot/cpu/riscv/copy_riscv.hpp | 136 + - src/hotspot/cpu/riscv/disassembler_riscv.hpp | 58 + - .../cpu/riscv/foreign_globals_riscv.cpp | 44 + - .../cpu/riscv/foreign_globals_riscv.hpp | 32 + - src/hotspot/cpu/riscv/frame_riscv.cpp | 697 + - src/hotspot/cpu/riscv/frame_riscv.hpp | 202 + - src/hotspot/cpu/riscv/frame_riscv.inline.hpp | 248 + - .../gc/g1/g1BarrierSetAssembler_riscv.cpp | 484 + - .../gc/g1/g1BarrierSetAssembler_riscv.hpp | 78 + - .../cpu/riscv/gc/g1/g1Globals_riscv.hpp | 31 + - .../gc/shared/barrierSetAssembler_riscv.cpp | 302 + - .../gc/shared/barrierSetAssembler_riscv.hpp | 79 + - .../gc/shared/barrierSetNMethod_riscv.cpp | 171 + - .../cardTableBarrierSetAssembler_riscv.cpp | 111 + - .../cardTableBarrierSetAssembler_riscv.hpp | 42 + - .../modRefBarrierSetAssembler_riscv.cpp | 55 + - .../modRefBarrierSetAssembler_riscv.hpp | 55 + - .../c1/shenandoahBarrierSetC1_riscv.cpp | 117 + - .../shenandoahBarrierSetAssembler_riscv.cpp | 712 ++ - .../shenandoahBarrierSetAssembler_riscv.hpp | 88 + - .../riscv/gc/shenandoah/shenandoah_riscv64.ad | 285 + - .../riscv/gc/z/zBarrierSetAssembler_riscv.cpp | 441 + - .../riscv/gc/z/zBarrierSetAssembler_riscv.hpp | 101 + - src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp | 212 + - src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp | 36 + - src/hotspot/cpu/riscv/gc/z/z_riscv64.ad | 233 + - .../cpu/riscv/globalDefinitions_riscv.hpp | 52 + - src/hotspot/cpu/riscv/globals_riscv.hpp | 99 + - src/hotspot/cpu/riscv/icBuffer_riscv.cpp | 79 + - src/hotspot/cpu/riscv/icache_riscv.cpp | 51 + - src/hotspot/cpu/riscv/icache_riscv.hpp | 42 + - src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 1940 +++ - src/hotspot/cpu/riscv/interp_masm_riscv.hpp | 285 + - src/hotspot/cpu/riscv/interpreterRT_riscv.cpp | 295 + - src/hotspot/cpu/riscv/interpreterRT_riscv.hpp | 68 + - .../cpu/riscv/javaFrameAnchor_riscv.hpp | 86 + - .../cpu/riscv/jniFastGetField_riscv.cpp | 214 + - src/hotspot/cpu/riscv/jniTypes_riscv.hpp | 106 + - .../cpu/riscv/macroAssembler_riscv.cpp | 4016 ++++++ - .../cpu/riscv/macroAssembler_riscv.hpp | 858 ++ - .../cpu/riscv/macroAssembler_riscv.inline.hpp | 31 + - src/hotspot/cpu/riscv/matcher_riscv.hpp | 169 + - src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 461 + - src/hotspot/cpu/riscv/methodHandles_riscv.hpp | 57 + - src/hotspot/cpu/riscv/nativeInst_riscv.cpp | 429 + - src/hotspot/cpu/riscv/nativeInst_riscv.hpp | 572 + - src/hotspot/cpu/riscv/registerMap_riscv.cpp | 45 + - src/hotspot/cpu/riscv/registerMap_riscv.hpp | 43 + - src/hotspot/cpu/riscv/register_riscv.cpp | 73 + - src/hotspot/cpu/riscv/register_riscv.hpp | 324 + - src/hotspot/cpu/riscv/relocInfo_riscv.cpp | 113 + - src/hotspot/cpu/riscv/relocInfo_riscv.hpp | 44 + - src/hotspot/cpu/riscv/riscv.ad | 10611 ++++++++++++++++ - src/hotspot/cpu/riscv/riscv_b.ad | 527 + - src/hotspot/cpu/riscv/riscv_v.ad | 2065 +++ - src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 2761 ++++ - src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 3864 ++++++ - src/hotspot/cpu/riscv/stubRoutines_riscv.cpp | 58 + - src/hotspot/cpu/riscv/stubRoutines_riscv.hpp | 161 + - .../templateInterpreterGenerator_riscv.cpp | 1794 +++ - src/hotspot/cpu/riscv/templateTable_riscv.cpp | 3951 ++++++ - src/hotspot/cpu/riscv/templateTable_riscv.hpp | 42 + - .../riscv/universalNativeInvoker_riscv.cpp | 33 + - .../cpu/riscv/universalUpcallHandle_riscv.cpp | 42 + - src/hotspot/cpu/riscv/vmStructs_riscv.hpp | 42 + - src/hotspot/cpu/riscv/vm_version_riscv.cpp | 230 + - src/hotspot/cpu/riscv/vm_version_riscv.hpp | 72 + - src/hotspot/cpu/riscv/vmreg_riscv.cpp | 64 + - src/hotspot/cpu/riscv/vmreg_riscv.hpp | 68 + - src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp | 46 + - src/hotspot/cpu/riscv/vtableStubs_riscv.cpp | 260 + - src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp | 9 +- - src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 7 +- - src/hotspot/os/linux/os_linux.cpp | 2 + - .../linux_riscv/assembler_linux_riscv.cpp | 26 + - .../os_cpu/linux_riscv/atomic_linux_riscv.hpp | 134 + - .../os_cpu/linux_riscv/bytes_linux_riscv.hpp | 45 + - .../os_cpu/linux_riscv/copy_linux_riscv.hpp | 31 + - .../linux_riscv/gc/z/zSyscall_linux_riscv.hpp | 42 + - .../linux_riscv/globals_linux_riscv.hpp | 43 + - .../linux_riscv/orderAccess_linux_riscv.hpp | 63 + - .../os_cpu/linux_riscv/os_linux_riscv.cpp | 466 + - .../os_cpu/linux_riscv/os_linux_riscv.hpp | 59 + - .../prefetch_linux_riscv.inline.hpp | 38 + - .../os_cpu/linux_riscv/thread_linux_riscv.cpp | 92 + - .../os_cpu/linux_riscv/thread_linux_riscv.hpp | 48 + - .../linux_riscv/vmStructs_linux_riscv.hpp | 55 + - .../linux_riscv/vm_version_linux_riscv.cpp | 118 + - src/hotspot/share/c1/c1_LIR.cpp | 112 +- - src/hotspot/share/c1/c1_LIR.hpp | 209 +- - src/hotspot/share/c1/c1_LIRAssembler.cpp | 15 +- - src/hotspot/share/c1/c1_LIRAssembler.hpp | 5 +- - src/hotspot/share/c1/c1_LinearScan.cpp | 18 +- - .../gc/shenandoah/shenandoahArguments.cpp | 4 +- - src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp | 4 +- - .../share/jfr/utilities/jfrBigEndian.hpp | 2 +- - src/hotspot/share/opto/regmask.hpp | 2 +- - .../share/runtime/abstract_vm_version.cpp | 3 +- - src/hotspot/share/runtime/synchronizer.cpp | 2 +- - src/hotspot/share/runtime/thread.hpp | 2 +- - src/hotspot/share/runtime/thread.inline.hpp | 4 +- - src/hotspot/share/utilities/macros.hpp | 26 + - .../native/libsaproc/LinuxDebuggerLocal.c | 49 +- - .../linux/native/libsaproc/libproc.h | 4 +- - .../classes/sun/jvm/hotspot/HotSpotAgent.java | 3 + - .../debugger/MachineDescriptionRISCV64.java | 40 + - .../debugger/linux/LinuxCDebugger.java | 13 +- - .../linux/riscv64/LinuxRISCV64CFrame.java | 90 + - .../riscv64/LinuxRISCV64ThreadContext.java | 48 + - .../proc/riscv64/ProcRISCV64Thread.java | 88 + - .../riscv64/ProcRISCV64ThreadContext.java | 48 + - .../riscv64/ProcRISCV64ThreadFactory.java | 46 + - .../remote/riscv64/RemoteRISCV64Thread.java | 55 + - .../riscv64/RemoteRISCV64ThreadContext.java | 48 + - .../riscv64/RemoteRISCV64ThreadFactory.java | 46 + - .../debugger/risv64/RISCV64ThreadContext.java | 172 + - .../sun/jvm/hotspot/runtime/Threads.java | 5 +- - .../LinuxRISCV64JavaThreadPDAccess.java | 134 + - .../riscv64/RISCV64CurrentFrameGuess.java | 223 + - .../hotspot/runtime/riscv64/RISCV64Frame.java | 556 + - .../riscv64/RISCV64JavaCallWrapper.java | 61 + - .../runtime/riscv64/RISCV64RegisterMap.java | 53 + - .../jvm/hotspot/utilities/PlatformInfo.java | 4 +- - test/hotspot/jtreg/compiler/c2/TestBit.java | 7 +- - ...eSHA1IntrinsicsOptionOnUnsupportedCPU.java | 5 +- - ...HA256IntrinsicsOptionOnUnsupportedCPU.java | 5 +- - ...HA512IntrinsicsOptionOnUnsupportedCPU.java | 5 +- - .../cli/TestUseSHAOptionOnUnsupportedCPU.java | 5 +- - .../testcases/GenericTestCaseForOtherCPU.java | 11 +- - ...nericTestCaseForUnsupportedRISCV64CPU.java | 115 + - .../loopopts/superword/ProdRed_Double.java | 4 +- - .../loopopts/superword/ProdRed_Float.java | 4 +- - .../loopopts/superword/ProdRed_Int.java | 4 +- - .../loopopts/superword/ReductionPerf.java | 4 +- - .../superword/SumRedAbsNeg_Double.java | 4 +- - .../superword/SumRedAbsNeg_Float.java | 4 +- - .../loopopts/superword/SumRedSqrt_Double.java | 4 +- - .../loopopts/superword/SumRed_Double.java | 4 +- - .../loopopts/superword/SumRed_Float.java | 4 +- - .../loopopts/superword/SumRed_Int.java | 4 +- - .../sha/predicate/IntrinsicPredicates.java | 11 +- - .../NMT/CheckForProperDetailStackTrace.java | 4 +- - .../ReservedStack/ReservedStackTest.java | 4 +- - .../HeapMonitorEventsForTwoThreadsTest.java | 1 - - ...stMutuallyExclusivePlatformPredicates.java | 2 +- - .../jdk/jfr/event/os/TestCPUInformation.java | 6 +- - test/lib/jdk/test/lib/Platform.java | 4 + - 187 files changed, 59079 insertions(+), 189 deletions(-) - create mode 100644 src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/assembler_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/assembler_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/assembler_riscv.inline.hpp - create mode 100644 src/hotspot/cpu/riscv/bytes_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/c1_Defs_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/c1_LIR_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/c1_globals_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/c2_globals_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/c2_init_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/codeBuffer_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/compiledIC_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/copy_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/disassembler_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/foreign_globals_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/foreign_globals_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/frame_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/frame_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/frame_riscv.inline.hpp - create mode 100644 src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad - create mode 100644 src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/gc/z/z_riscv64.ad - create mode 100644 src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/globals_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/icBuffer_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/icache_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/icache_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/interp_masm_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/interp_masm_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/interpreterRT_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/interpreterRT_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/jniTypes_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp - create mode 100644 src/hotspot/cpu/riscv/matcher_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/methodHandles_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/methodHandles_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/nativeInst_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/nativeInst_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/registerMap_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/registerMap_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/register_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/register_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/relocInfo_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/relocInfo_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/riscv.ad - create mode 100644 src/hotspot/cpu/riscv/riscv_b.ad - create mode 100644 src/hotspot/cpu/riscv/riscv_v.ad - create mode 100644 src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/stubGenerator_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/stubRoutines_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/stubRoutines_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/templateTable_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/templateTable_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/vmStructs_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/vm_version_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/vm_version_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp - create mode 100644 src/hotspot/cpu/riscv/vtableStubs_riscv.cpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java - create mode 100644 test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java - -diff --git a/make/autoconf/build-aux/config.guess b/make/autoconf/build-aux/config.guess -index a88a9adec3f..15111d827ab 100644 ---- a/make/autoconf/build-aux/config.guess -+++ b/make/autoconf/build-aux/config.guess +diff --git a/.github/workflows/build-cross-compile.yml b/.github/workflows/build-cross-compile.yml +index 385b097b9f..b1c333f711 100644 +--- a/.github/workflows/build-cross-compile.yml ++++ b/.github/workflows/build-cross-compile.yml +@@ -54,28 +54,39 @@ jobs: + - arm + - s390x + - ppc64le ++ - riscv64 + include: + - target-cpu: aarch64 + gnu-arch: aarch64 + debian-arch: arm64 + debian-repository: https://httpredir.debian.org/debian/ + debian-version: bullseye ++ tolerate-sysroot-errors: false + - target-cpu: arm + gnu-arch: arm + debian-arch: armhf + debian-repository: https://httpredir.debian.org/debian/ + debian-version: bullseye ++ tolerate-sysroot-errors: false + gnu-abi: eabihf + - target-cpu: s390x + gnu-arch: s390x + debian-arch: s390x + debian-repository: https://httpredir.debian.org/debian/ + debian-version: bullseye ++ tolerate-sysroot-errors: false + - target-cpu: ppc64le + gnu-arch: powerpc64le + debian-arch: ppc64el + debian-repository: https://httpredir.debian.org/debian/ + debian-version: bullseye ++ tolerate-sysroot-errors: false ++ - target-cpu: riscv64 ++ gnu-arch: riscv64 ++ debian-arch: riscv64 ++ debian-repository: https://httpredir.debian.org/debian/ ++ debian-version: sid ++ tolerate-sysroot-errors: true + + steps: + - name: 'Checkout the JDK source' +@@ -113,6 +124,7 @@ jobs: + if: steps.get-cached-sysroot.outputs.cache-hit != 'true' + + - name: 'Create sysroot' ++ id: create-sysroot + run: > + sudo debootstrap + --arch=${{ matrix.debian-arch }} +@@ -123,6 +135,7 @@ jobs: + ${{ matrix.debian-version }} + sysroot + ${{ matrix.debian-repository }} ++ continue-on-error: ${{ matrix.tolerate-sysroot-errors }} + if: steps.get-cached-sysroot.outputs.cache-hit != 'true' + + - name: 'Prepare sysroot' +@@ -134,7 +147,12 @@ jobs: + rm -rf sysroot/usr/{sbin,bin,share} + rm -rf sysroot/usr/lib/{apt,gcc,udev,systemd} + rm -rf sysroot/usr/libexec/gcc +- if: steps.get-cached-sysroot.outputs.cache-hit != 'true' ++ if: steps.create-sysroot.outcome == 'success' && steps.get-cached-sysroot.outputs.cache-hit != 'true' ++ ++ - name: 'Remove broken sysroot' ++ run: | ++ sudo rm -rf sysroot/ ++ if: steps.create-sysroot.outcome != 'success' && steps.get-cached-sysroot.outputs.cache-hit != 'true' + + - name: 'Configure' + run: > +@@ -153,6 +171,7 @@ jobs: + echo "Dumping config.log:" && + cat config.log && + exit 1) ++ if: steps.create-sysroot.outcome == 'success' || steps.get-cached-sysroot.outputs.cache-hit == 'true' + + - name: 'Build' + id: build +@@ -160,3 +179,4 @@ jobs: + with: + make-target: 'hotspot ${{ inputs.make-arguments }}' + platform: linux-${{ matrix.target-cpu }} ++ if: steps.create-sysroot.outcome == 'success' || steps.get-cached-sysroot.outputs.cache-hit == 'true' +diff --git a/.jcheck/conf b/.jcheck/conf +index be7ad6d26f..e35eb77696 100644 +--- a/.jcheck/conf ++++ b/.jcheck/conf +@@ -1,5 +1,5 @@ + [general] +-project=jdk-updates ++project=riscv-port + jbs=JDK + version=11.0.24 + +diff --git a/make/autoconf/build-aux/config.sub b/make/autoconf/build-aux/config.sub +index 3c280ac7c0..6c66c221e0 100644 +--- a/make/autoconf/build-aux/config.sub ++++ b/make/autoconf/build-aux/config.sub @@ -1,6 +1,6 @@ #!/bin/sh # --# Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights reserved. -+# Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved. - # Copyright (c) 2021, Azul Systems, Inc. All rights reserved. +-# Copyright (c) 2014, 2020, Oracle and/or its affiliates. All rights reserved. ++# Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # + # This code is free software; you can redistribute it and/or modify it +@@ -40,6 +40,13 @@ if echo $* | grep pc-msys >/dev/null ; then + exit + fi + ++# Canonicalize for riscv which autoconf-config.sub doesn't handle ++if echo $* | grep '^riscv\(32\|64\)-linux' >/dev/null ; then ++ result=`echo $@ | sed 's/linux/unknown-linux/'` ++ echo $result ++ exit ++fi ++ + # First, filter out everything that doesn't begin with "aarch64-" + if ! echo $* | grep '^aarch64-' >/dev/null ; then + . $DIR/autoconf-config.sub "$@" diff --git a/make/autoconf/hotspot.m4 b/make/autoconf/hotspot.m4 -index 9bb34363e5c..f84e8f84c60 100644 +index 9bb34363e5..f84e8f84c6 100644 --- a/make/autoconf/hotspot.m4 +++ b/make/autoconf/hotspot.m4 @@ -370,7 +370,8 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES], @@ -353,32 +136,8 @@ index 9bb34363e5c..f84e8f84c60 100644 AC_MSG_RESULT([yes]) else DISABLED_JVM_FEATURES="$DISABLED_JVM_FEATURES shenandoahgc" -diff --git a/make/autoconf/libraries.m4 b/make/autoconf/libraries.m4 -index 16e906bdc6a..5c49fd9285d 100644 ---- a/make/autoconf/libraries.m4 -+++ b/make/autoconf/libraries.m4 -@@ -1,5 +1,5 @@ - # --# Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. -+# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved. - # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - # - # This code is free software; you can redistribute it and/or modify it -@@ -130,6 +130,12 @@ AC_DEFUN_ONCE([LIB_SETUP_LIBRARIES], - BASIC_JVM_LIBS="$BASIC_JVM_LIBS -lthread" - fi - -+ # Because RISC-V only has word-sized atomics, it requries libatomic where -+ # other common architectures do not. So link libatomic by default. -+ if test "x$OPENJDK_TARGET_OS" = xlinux && test "x$OPENJDK_TARGET_CPU" = xriscv64; then -+ BASIC_JVM_LIBS="$BASIC_JVM_LIBS -latomic" -+ fi -+ - # perfstat lib - if test "x$OPENJDK_TARGET_OS" = xaix; then - BASIC_JVM_LIBS="$BASIC_JVM_LIBS -lperfstat" diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4 -index 26a58eb2ee8..67972d89248 100644 +index 5d1d9efa39..565ca18e20 100644 --- a/make/autoconf/platform.m4 +++ b/make/autoconf/platform.m4 @@ -1,5 +1,5 @@ @@ -397,17 +156,8 @@ index 26a58eb2ee8..67972d89248 100644 # The cpu defines below are for zero, we don't support them directly. elif test "x$OPENJDK_$1_CPU" = xsparc; then -@@ -564,8 +566,6 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER], - HOTSPOT_$1_CPU_DEFINE=S390 - elif test "x$OPENJDK_$1_CPU" = xs390x; then - HOTSPOT_$1_CPU_DEFINE=S390 -- elif test "x$OPENJDK_$1_CPU" = xriscv64; then -- HOTSPOT_$1_CPU_DEFINE=RISCV - elif test "x$OPENJDK_$1_CPU" = xloongarch64; then - HOTSPOT_$1_CPU_DEFINE=LOONGARCH64 - elif test "x$OPENJDK_$1_CPU" != x; then diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk -index c5a3ac5724b..67f4c6f0574 100644 +index c5a3ac5724..51137b99db 100644 --- a/make/hotspot/gensrc/GensrcAdlc.gmk +++ b/make/hotspot/gensrc/GensrcAdlc.gmk @@ -1,5 +1,5 @@ @@ -417,13 +167,12 @@ index c5a3ac5724b..67f4c6f0574 100644 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it -@@ -150,6 +150,13 @@ ifeq ($(call check-jvm-feature, compiler2), true) +@@ -150,6 +150,12 @@ ifeq ($(call check-jvm-feature, compiler2), true) $d/os_cpu/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH).ad \ ))) + ifeq ($(HOTSPOT_TARGET_CPU_ARCH), riscv) + AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ -+ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_v.ad \ + $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_b.ad \ + ))) + endif @@ -431,79 +180,9 @@ index c5a3ac5724b..67f4c6f0574 100644 ifeq ($(call check-jvm-feature, shenandoahgc), true) AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/shenandoah/shenandoah_$(HOTSPOT_TARGET_CPU).ad \ -diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp -index fdd2c0ca3d7..63f193de86e 100644 ---- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp -+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * -@@ -1593,7 +1593,9 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { - } - - --void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { -+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, -+ LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { -+ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on aarch64"); - - Assembler::Condition acond, ncond; - switch (condition) { -diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp -index f0a7229aa18..cb095052534 100644 ---- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp -+++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2008, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -1824,7 +1824,10 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { - } - - --void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { -+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, -+ LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { -+ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on arm"); -+ - AsmCondition acond = al; - AsmCondition ncond = nv; - if (opr1 != opr2) { -diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp -index 847f7d61d2f..d74db914331 100644 ---- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp -+++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp -@@ -1,6 +1,6 @@ - /* -- * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. -- * Copyright (c) 2012, 2019, SAP SE. All rights reserved. -+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2012, 2021 SAP SE. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -1553,8 +1553,10 @@ inline void load_to_reg(LIR_Assembler *lasm, LIR_Opr src, LIR_Opr dst) { - } - } - -+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, -+ LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { -+ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on ppc"); - --void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { - if (opr1->is_equal(opr2) || opr1->is_same_register(opr2)) { - load_to_reg(this, opr1, result); // Condition doesn't matter. - return; diff --git a/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp b/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp new file mode 100644 -index 00000000000..31c63abe71d +index 0000000000..31c63abe71 --- /dev/null +++ b/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp @@ -0,0 +1,177 @@ @@ -686,10 +365,10 @@ index 00000000000..31c63abe71d +} diff --git a/src/hotspot/cpu/riscv/assembler_riscv.cpp b/src/hotspot/cpu/riscv/assembler_riscv.cpp new file mode 100644 -index 00000000000..f15ef5304c5 +index 0000000000..a83d43a8f1 --- /dev/null +++ b/src/hotspot/cpu/riscv/assembler_riscv.cpp -@@ -0,0 +1,372 @@ +@@ -0,0 +1,365 @@ +/* + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. @@ -974,9 +653,9 @@ index 00000000000..f15ef5304c5 + } +#endif + assert(is_unsigned_imm_in_range(imm64, 47, 0) || (imm64 == (uintptr_t)-1), -+ "48-bit overflow in address constant"); -+ // Load upper 32 bits -+ int32_t imm = imm64 >> 16; ++ "bit 47 overflows in address constant"); ++ // Load upper 31 bits ++ int32_t imm = imm64 >> 17; + int64_t upper = imm, lower = imm; + lower = (lower << 52) >> 52; + upper -= lower; @@ -984,13 +663,13 @@ index 00000000000..f15ef5304c5 + lui(Rd, upper); + addi(Rd, Rd, lower); + -+ // Load the rest 16 bits. ++ // Load the rest 17 bits. + slli(Rd, Rd, 11); -+ addi(Rd, Rd, (imm64 >> 5) & 0x7ff); -+ slli(Rd, Rd, 5); ++ addi(Rd, Rd, (imm64 >> 6) & 0x7ff); ++ slli(Rd, Rd, 6); + + // This offset will be used by following jalr/ld. -+ offset = imm64 & 0x1f; ++ offset = imm64 & 0x3f; +} + +void Assembler::movptr(Register Rd, uintptr_t imm64) { @@ -1003,13 +682,6 @@ index 00000000000..f15ef5304c5 + addi(Rd, Rd, offset); +} + -+void Assembler::ifence() { -+ fence_i(); -+ if (UseConservativeFence) { -+ fence(ir, ir); -+ } -+} -+ +#define INSN(NAME, NEG_INSN) \ + void Assembler::NAME(Register Rs, Register Rt, const address &dest) { \ + NEG_INSN(Rt, Rs, dest); \ @@ -1064,10 +736,10 @@ index 00000000000..f15ef5304c5 +} diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp new file mode 100644 -index 00000000000..4923962a496 +index 0000000000..9e7d271860 --- /dev/null +++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp -@@ -0,0 +1,3047 @@ +@@ -0,0 +1,3057 @@ +/* + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. @@ -1253,13 +925,22 @@ index 00000000000..4923962a496 + : _base(noreg), _index(noreg), _offset(0), _mode(no_mode), _target(NULL) { } + Address(Register r) + : _base(r), _index(noreg), _offset(0), _mode(base_plus_offset), _target(NULL) { } -+ -+ template::value)> -+ Address(Register r, T o) -+ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) {} -+ ++ Address(Register r, int o) ++ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++ Address(Register r, long o) ++ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++ Address(Register r, long long o) ++ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++ Address(Register r, unsigned int o) ++ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++ Address(Register r, unsigned long o) ++ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++ Address(Register r, unsigned long long o) ++ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++#ifdef ASSERT + Address(Register r, ByteSize disp) -+ : Address(r, in_bytes(disp)) {} ++ : _base(r), _index(noreg), _offset(in_bytes(disp)), _mode(base_plus_offset), _target(0) { } ++#endif + Address(address target, RelocationHolder const& rspec) + : _base(noreg), + _index(noreg), @@ -1338,14 +1019,6 @@ index 00000000000..4923962a496 + + enum { instruction_size = 4 }; + -+ //---< calculate length of instruction >--- -+ // We just use the values set above. -+ // instruction must start at passed address -+ static unsigned int instr_len(unsigned char *instr) { return instruction_size; } -+ -+ //---< longest instructions >--- -+ static unsigned int instr_maxlen() { return instruction_size; } -+ + enum RoundingMode { + rne = 0b000, // round to Nearest, ties to Even + rtz = 0b001, // round towards Zero @@ -1387,7 +1060,6 @@ index 00000000000..4923962a496 + void movptr(Register Rd, address addr); + void movptr_with_offset(Register Rd, address addr, int32_t &offset); + void movptr(Register Rd, uintptr_t imm64); -+ void ifence(); + void j(const address &dest, Register temp = t0); + void j(const Address &adr, Register temp = t0); + void j(Label &l, Register temp = t0); @@ -1966,7 +1638,6 @@ index 00000000000..4923962a496 + emit(insn); \ + } + -+ INSN(fence_i, 0b0001111, 0b001, 0b000000000000); + INSN(ecall, 0b1110011, 0b000, 0b000000000000); + INSN(_ebreak, 0b1110011, 0b000, 0b000000000001); + @@ -3014,6 +2685,7 @@ index 00000000000..4923962a496 + +// ==================================== +// RISC-V Bit-Manipulation Extension ++// Currently only support Zba, Zbb and Zbs bitmanip extensions. +// ==================================== +#define INSN(NAME, op, funct3, funct7) \ + void NAME(Register Rd, Register Rs1, Register Rs2) { \ @@ -3088,6 +2760,7 @@ index 00000000000..4923962a496 + + INSN(rori, 0b0010011, 0b101, 0b011000); + INSN(slli_uw, 0b0011011, 0b001, 0b000010); ++ INSN(bexti, 0b0010011, 0b101, 0b010010); + +#undef INSN + @@ -4097,6 +3770,13 @@ index 00000000000..4923962a496 + Assembler(CodeBuffer* code) : AbstractAssembler(code), _in_compressible_region(false) { + } + ++ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset) { ++ ShouldNotCallThis(); ++ return RegisterOrConstant(); ++ } ++ + // Stack overflow checking + virtual void bang_stack_with_offset(int offset) { Unimplemented(); } + @@ -4114,10 +3794,12 @@ index 00000000000..4923962a496 + virtual ~Assembler() {} +}; + ++class BiasedLockingCounters; ++ +#endif // CPU_RISCV_ASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp new file mode 100644 -index 00000000000..7ffe8803985 +index 0000000000..7ffe880398 --- /dev/null +++ b/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp @@ -0,0 +1,47 @@ @@ -4170,10 +3852,10 @@ index 00000000000..7ffe8803985 +#endif // CPU_RISCV_ASSEMBLER_RISCV_INLINE_HPP diff --git a/src/hotspot/cpu/riscv/bytes_riscv.hpp b/src/hotspot/cpu/riscv/bytes_riscv.hpp new file mode 100644 -index 00000000000..23d982f9abd +index 0000000000..f60e0e38ae --- /dev/null +++ b/src/hotspot/cpu/riscv/bytes_riscv.hpp -@@ -0,0 +1,167 @@ +@@ -0,0 +1,165 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2016 SAP SE. All rights reserved. @@ -4203,8 +3885,6 @@ index 00000000000..23d982f9abd +#ifndef CPU_RISCV_BYTES_RISCV_HPP +#define CPU_RISCV_BYTES_RISCV_HPP + -+#include "memory/allStatic.hpp" -+ +class Bytes: AllStatic { + public: + // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering @@ -4343,10 +4023,10 @@ index 00000000000..23d982f9abd +#endif // CPU_RISCV_BYTES_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp new file mode 100644 -index 00000000000..dcd0472c540 +index 0000000000..12980c12de --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp -@@ -0,0 +1,353 @@ +@@ -0,0 +1,339 @@ +/* + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. @@ -4388,20 +4068,6 @@ index 00000000000..dcd0472c540 + +#define __ ce->masm()-> + -+void C1SafepointPollStub::emit_code(LIR_Assembler* ce) { -+ __ bind(_entry); -+ InternalAddress safepoint_pc(__ pc() - __ offset() + safepoint_offset()); -+ __ code_section()->relocate(__ pc(), safepoint_pc.rspec()); -+ __ la(t0, safepoint_pc.target()); -+ __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset())); -+ -+ assert(SharedRuntime::polling_page_return_handler_blob() != NULL, -+ "polling page return stub not created yet"); -+ address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); -+ -+ __ far_jump(RuntimeAddress(stub)); -+} -+ +void CounterOverflowStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + Metadata *m = _method->as_constant_ptr()->as_metadata(); @@ -4421,7 +4087,7 @@ index 00000000000..dcd0472c540 +} + +RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index) -+ : _index(index), _array(), _throw_index_out_of_bounds_exception(true) { ++ : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) { + assert(info != NULL, "must have info"); + _info = new CodeEmitInfo(info); +} @@ -4446,7 +4112,7 @@ index 00000000000..dcd0472c540 + if (_throw_index_out_of_bounds_exception) { + stub_id = Runtime1::throw_index_exception_id; + } else { -+ assert(_array != LIR_Opr::nullOpr(), "sanity"); ++ assert(_array != NULL, "sanity"); + __ mv(t1, _array->as_pointer_register()); + stub_id = Runtime1::throw_range_check_failed_id; + } @@ -4653,7 +4319,7 @@ index 00000000000..dcd0472c540 + const int args_num = 5; + VMRegPair args[args_num]; + BasicType signature[args_num] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT }; -+ SharedRuntime::java_calling_convention(signature, args, args_num); ++ SharedRuntime::java_calling_convention(signature, args, args_num, true); + + // push parameters + Register r[args_num]; @@ -4702,7 +4368,7 @@ index 00000000000..dcd0472c540 +#undef __ diff --git a/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp b/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp new file mode 100644 -index 00000000000..4417ad63091 +index 0000000000..4417ad6309 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp @@ -0,0 +1,84 @@ @@ -4792,7 +4458,7 @@ index 00000000000..4417ad63091 +#endif // CPU_RISCV_C1_DEFS_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp new file mode 100644 -index 00000000000..e3a2606c532 +index 0000000000..e3a2606c53 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp @@ -0,0 +1,30 @@ @@ -4828,7 +4494,7 @@ index 00000000000..e3a2606c532 +// No FPU stack on RISCV diff --git a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp new file mode 100644 -index 00000000000..7bc3d311501 +index 0000000000..7bc3d31150 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp @@ -0,0 +1,32 @@ @@ -4866,7 +4532,7 @@ index 00000000000..7bc3d311501 +#endif // CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp new file mode 100644 -index 00000000000..172031941b2 +index 0000000000..682ebe8262 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp @@ -0,0 +1,388 @@ @@ -5028,8 +4694,8 @@ index 00000000000..172031941b2 +LIR_Opr FrameMap::fpu10_float_opr; +LIR_Opr FrameMap::fpu10_double_opr; + -+LIR_Opr FrameMap::_caller_save_cpu_regs[] = {}; -+LIR_Opr FrameMap::_caller_save_fpu_regs[] = {}; ++LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, }; ++LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, }; + +//-------------------------------------------------------- +// FrameMap @@ -5186,7 +4852,7 @@ index 00000000000..172031941b2 + + VMRegPair regs; + BasicType sig_bt = T_OBJECT; -+ SharedRuntime::java_calling_convention(&sig_bt, ®s, 1); ++ SharedRuntime::java_calling_convention(&sig_bt, ®s, 1, true); + receiver_opr = as_oop_opr(regs.first()->as_Register()); + + for (i = 0; i < nof_caller_save_fpu_regs; i++) { @@ -5260,7 +4926,7 @@ index 00000000000..172031941b2 +} diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp new file mode 100644 -index 00000000000..01281f5c9e1 +index 0000000000..01281f5c9e --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp @@ -0,0 +1,148 @@ @@ -5414,10 +5080,10 @@ index 00000000000..01281f5c9e1 +#endif // CPU_RISCV_C1_FRAMEMAP_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp new file mode 100644 -index 00000000000..4c1c13dc290 +index 0000000000..2a99d49c94 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp -@@ -0,0 +1,281 @@ +@@ -0,0 +1,285 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. @@ -5610,7 +5276,7 @@ index 00000000000..4c1c13dc290 + code == lir_add ? __ add(dreg, lreg_lo, c) : __ sub(dreg, lreg_lo, c); + break; + case lir_div: -+ assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); ++ assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant"); + if (c == 1) { + // move lreg_lo to dreg if divisor is 1 + __ mv(dreg, lreg_lo); @@ -5628,7 +5294,7 @@ index 00000000000..4c1c13dc290 + } + break; + case lir_rem: -+ assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); ++ assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant"); + if (c == 1) { + // move 0 to dreg if divisor is 1 + __ mv(dreg, zr); @@ -5658,7 +5324,9 @@ index 00000000000..4c1c13dc290 + switch (code) { + case lir_add: __ fadd_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + case lir_sub: __ fsub_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; ++ case lir_mul_strictfp: // fall through + case lir_mul: __ fmul_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; ++ case lir_div_strictfp: // fall through + case lir_div: __ fdiv_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + default: + ShouldNotReachHere(); @@ -5671,7 +5339,9 @@ index 00000000000..4c1c13dc290 + switch (code) { + case lir_add: __ fadd_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + case lir_sub: __ fsub_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; ++ case lir_mul_strictfp: // fall through + case lir_mul: __ fmul_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; ++ case lir_div_strictfp: // fall through + case lir_div: __ fdiv_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + default: + ShouldNotReachHere(); @@ -5701,7 +5371,7 @@ index 00000000000..4c1c13dc290 +#undef __ diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp new file mode 100644 -index 00000000000..ab0a9963fc1 +index 0000000000..ab0a9963fc --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp @@ -0,0 +1,37 @@ @@ -5744,7 +5414,7 @@ index 00000000000..ab0a9963fc1 +#endif // CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp new file mode 100644 -index 00000000000..b7f53e395f3 +index 0000000000..b7f53e395f --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp @@ -0,0 +1,388 @@ @@ -6138,7 +5808,7 @@ index 00000000000..b7f53e395f3 +#undef __ diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp new file mode 100644 -index 00000000000..06a0f248ca6 +index 0000000000..06a0f248ca --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp @@ -0,0 +1,52 @@ @@ -6196,10 +5866,10 @@ index 00000000000..06a0f248ca6 +#endif // CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp new file mode 100644 -index 00000000000..742c2126e60 +index 0000000000..1e482d7cc2 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -@@ -0,0 +1,2267 @@ +@@ -0,0 +1,2268 @@ +/* + * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. @@ -6243,7 +5913,6 @@ index 00000000000..742c2126e60 +#include "oops/objArrayKlass.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" -+#include "utilities/powerOfTwo.hpp" +#include "vmreg_riscv.inline.hpp" + +#ifndef PRODUCT @@ -6293,18 +5962,6 @@ index 00000000000..742c2126e60 + +bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; } + -+void LIR_Assembler::clinit_barrier(ciMethod* method) { -+ assert(VM_Version::supports_fast_class_init_checks(), "sanity"); -+ assert(!method->holder()->is_not_initialized(), "initialization should have been started"); -+ -+ Label L_skip_barrier; -+ -+ __ mov_metadata(t1, method->holder()->constant_encoding()); -+ __ clinit_barrier(t1, t0, &L_skip_barrier /* L_fast_path */); -+ __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); -+ __ bind(L_skip_barrier); -+} -+ +LIR_Opr LIR_Assembler::receiverOpr() { + return FrameMap::receiver_opr; +} @@ -6569,11 +6226,7 @@ index 00000000000..742c2126e60 + if (method()->is_synchronized()) { + monitor_address(0, FrameMap::r10_opr); + stub = new MonitorExitStub(FrameMap::r10_opr, true, 0); -+ if (UseHeavyMonitors) { -+ __ j(*stub->entry()); -+ } else { -+ __ unlock_object(x15, x14, x10, *stub->entry()); -+ } ++ __ unlock_object(x15, x14, x10, *stub->entry()); + __ bind(*stub->continuation()); + } + @@ -6626,7 +6279,7 @@ index 00000000000..742c2126e60 + return offset; +} + -+void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) { ++void LIR_Assembler::return_op(LIR_Opr result) { + assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == x10, "word returns are in x10"); + + // Pop the stack before the safepoint code @@ -6636,18 +6289,20 @@ index 00000000000..742c2126e60 + __ reserved_stack_check(); + } + -+ code_stub->set_safepoint_offset(__ offset()); -+ __ relocate(relocInfo::poll_return_type); -+ __ safepoint_poll(*code_stub->entry(), true /* at_return */, false /* acquire */, true /* in_nmethod */); ++ address polling_page(os::get_polling_page()); ++ __ read_polling_page(t0, polling_page, relocInfo::poll_return_type); + __ ret(); +} + +int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) { ++ address polling_page(os::get_polling_page()); + guarantee(info != NULL, "Shouldn't be NULL"); -+ __ get_polling_page(t0, relocInfo::poll_type); ++ assert(os::is_poll_address(polling_page), "should be"); ++ int32_t offset = 0; ++ __ get_polling_page(t0, polling_page, offset, relocInfo::poll_type); + add_debug_info_for_branch(info); // This isn't just debug info: + // it's the oop map -+ __ read_polling_page(t0, 0, relocInfo::poll_type); ++ __ read_polling_page(t0, offset, relocInfo::poll_type); + return __ offset(); +} + @@ -6878,7 +6533,7 @@ index 00000000000..742c2126e60 + } +} + -+void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide) { ++void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) { + LIR_Address* to_addr = dest->as_address_ptr(); + // t0 was used as tmp reg in as_Address, so we use t1 as compressed_src + Register compressed_src = t1; @@ -7000,7 +6655,7 @@ index 00000000000..742c2126e60 + reg2stack(temp, dest, dest->type(), false); +} + -+void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide) { ++void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) { + assert(src->is_address(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + @@ -7045,7 +6700,14 @@ index 00000000000..742c2126e60 + __ ld(dest->as_register(), as_Address(from_addr)); + break; + case T_ADDRESS: -+ __ ld(dest->as_register(), as_Address(from_addr)); ++ // FIXME: OMG this is a horrible kludge. Any offset from an ++ // address that matches klass_offset_in_bytes() will be loaded ++ // as a word, not a long. ++ if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) { ++ __ lwu(dest->as_register(), as_Address(from_addr)); ++ } else { ++ __ ld(dest->as_register(), as_Address(from_addr)); ++ } + break; + case T_INT: + __ lw(dest->as_register(), as_Address(from_addr)); @@ -7073,10 +6735,10 @@ index 00000000000..742c2126e60 + if (UseCompressedOops && !wide) { + __ decode_heap_oop(dest->as_register()); + } -+ -+ if (!UseZGC) { -+ // Load barrier has not yet been applied, so ZGC can't verify the oop here -+ __ verify_oop(dest->as_register()); ++ __ verify_oop(dest->as_register()); ++ } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) { ++ if (UseCompressedClassPointers) { ++ __ decode_klass_not_null(dest->as_register()); + } + } +} @@ -7119,11 +6781,13 @@ index 00000000000..742c2126e60 + Label done; + move_op(opr2, result, type, lir_patch_none, NULL, + false, // pop_fpu_stack ++ false, // unaligned + false); // wide + __ j(done); + __ bind(label); + move_op(opr1, result, type, lir_patch_none, NULL, + false, // pop_fpu_stack ++ false, // unaligned + false); // wide + __ bind(done); +} @@ -7470,7 +7134,7 @@ index 00000000000..742c2126e60 + assert(op->addr()->is_address(), "what else?"); + LIR_Address* addr_ptr = op->addr()->as_address_ptr(); + assert(addr_ptr->disp() == 0, "need 0 disp"); -+ assert(addr_ptr->index() == LIR_Opr::illegalOpr(), "need 0 index"); ++ assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index"); + addr = as_reg(addr_ptr->base()); + } + Register newval = as_reg(op->new_value()); @@ -7586,6 +7250,11 @@ index 00000000000..742c2126e60 + add_call_info(code_offset(), op->info()); +} + ++/* Currently, vtable-dispatch is only enabled for sparc platforms */ ++void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) { ++ ShouldNotReachHere(); ++} ++ +void LIR_Assembler::emit_static_call_stub() { + address call_pc = __ pc(); + assert((__ offset() % 4) == 0, "bad alignment"); @@ -7711,12 +7380,16 @@ index 00000000000..742c2126e60 + Register obj = op->obj_opr()->as_register(); // may not be an oop + Register hdr = op->hdr_opr()->as_register(); + Register lock = op->lock_opr()->as_register(); -+ if (UseHeavyMonitors) { ++ if (!UseFastLocking) { + __ j(*op->stub()->entry()); + } else if (op->code() == lir_lock) { ++ Register scratch = noreg; ++ if (UseBiasedLocking) { ++ scratch = op->scratch_opr()->as_register(); ++ } + assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); + // add debug info for NullPointerException only if one is possible -+ int null_check_offset = __ lock_object(hdr, obj, lock, *op->stub()->entry()); ++ int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry()); + if (op->info() != NULL) { + add_debug_info_for_null_check(null_check_offset, op->info()); + } @@ -7729,23 +7402,6 @@ index 00000000000..742c2126e60 + __ bind(*op->stub()->continuation()); +} + -+void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { -+ Register obj = op->obj()->as_pointer_register(); -+ Register result = op->result_opr()->as_pointer_register(); -+ -+ CodeEmitInfo* info = op->info(); -+ if (info != NULL) { -+ add_debug_info_for_null_check_here(info); -+ } -+ -+ if (UseCompressedClassPointers) { -+ __ lwu(result, Address(obj, oopDesc::klass_offset_in_bytes())); -+ __ decode_klass_not_null(result); -+ } else { -+ __ ld(result, Address(obj, oopDesc::klass_offset_in_bytes())); -+ } -+} -+ +void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { + ciMethod* method = op->profiled_method(); + int bci = op->profiled_bci(); @@ -8016,11 +7672,14 @@ index 00000000000..742c2126e60 + + +void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) { -+ if (patch_code != lir_patch_none) { ++#if INCLUDE_SHENANDOAHGC ++ if (UseShenandoahGC && patch_code != lir_patch_none) { + deoptimize_trap(info); + return; + } ++#endif + ++ assert(patch_code == lir_patch_none, "Patch code not supported"); + LIR_Address* adr = addr->as_address_ptr(); + Register dst = dest->as_register_lo(); + @@ -8063,7 +7722,7 @@ index 00000000000..742c2126e60 + +void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) { + if (dest->is_address() || src->is_address()) { -+ move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, /* wide */ false); ++ move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, /*unaligned*/ false, /* wide */ false); + } else { + ShouldNotReachHere(); + } @@ -8223,6 +7882,18 @@ index 00000000000..742c2126e60 + } +} + ++void LIR_Assembler::set_24bit_FPU() { Unimplemented(); } ++ ++void LIR_Assembler::reset_FPU() { Unimplemented(); } ++ ++void LIR_Assembler::fpop() { Unimplemented(); } ++ ++void LIR_Assembler::fxch(int i) { Unimplemented(); } ++ ++void LIR_Assembler::fld(int i) { Unimplemented(); } ++ ++void LIR_Assembler::ffree(int i) { Unimplemented(); } ++ +void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) { + __ cmpxchg(addr, cmpval, newval, Assembler::int32, Assembler::aq /* acquire */, + Assembler::rl /* release */, t0, true /* result as bool */); @@ -8469,10 +8140,10 @@ index 00000000000..742c2126e60 +#undef __ diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp new file mode 100644 -index 00000000000..051328c3a8a +index 0000000000..5c81f1c704 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp -@@ -0,0 +1,132 @@ +@@ -0,0 +1,133 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. @@ -8548,6 +8219,7 @@ index 00000000000..051328c3a8a + // CompiledStaticCall::to_interp_stub_size() (14) + CompiledStaticCall::to_trampoline_stub_size() (1 + 3 + address) + _call_stub_size = 14 * NativeInstruction::instruction_size + + (NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size), ++ _call_aot_stub_size = 0, + // See emit_exception_handler for detail + // verify_not_null_oop + far_call + should_not_reach_here + invalidate_registers(DEBUG_ONLY) + _exception_handler_size = DEBUG_ONLY(584) NOT_DEBUG(548), // or smaller @@ -8607,10 +8279,10 @@ index 00000000000..051328c3a8a +#endif // CPU_RISCV_C1_LIRASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp new file mode 100644 -index 00000000000..e126f148cdf +index 0000000000..c41819fc2a --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp -@@ -0,0 +1,1075 @@ +@@ -0,0 +1,1094 @@ +/* + * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. @@ -8651,7 +8323,6 @@ index 00000000000..e126f148cdf +#include "ci/ciTypeArrayKlass.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" -+#include "utilities/powerOfTwo.hpp" +#include "vmreg_riscv.inline.hpp" + +#ifdef ASSERT @@ -8819,6 +8490,7 @@ index 00000000000..e126f148cdf + break; + default: + ShouldNotReachHere(); ++ r = NULL; + } + return r; +} @@ -8890,6 +8562,11 @@ index 00000000000..e126f148cdf + + // "lock" stores the address of the monitor stack slot, so this is not an oop + LIR_Opr lock = new_register(T_INT); ++ // Need a scratch register for biased locking ++ LIR_Opr scratch = LIR_OprFact::illegalOpr; ++ if (UseBiasedLocking) { ++ scratch = new_register(T_INT); ++ } + + CodeEmitInfo* info_for_exception = NULL; + if (x->needs_null_check()) { @@ -8898,7 +8575,7 @@ index 00000000000..e126f148cdf + // this CodeEmitInfo must not have the xhandlers because here the + // object is already locked (xhandlers expect object to be unlocked) + CodeEmitInfo* info = state_for(x, x->state(), true); -+ monitor_enter(obj.result(), lock, syncTempOpr(), LIR_OprFact::illegalOpr, ++ monitor_enter(obj.result(), lock, syncTempOpr(), scratch, + x->monitor_no(), info_for_exception, info); +} + @@ -8968,7 +8645,12 @@ index 00000000000..e126f148cdf + right.load_item(); + + LIR_Opr reg = rlock(x); -+ arithmetic_op_fpu(x->op(), reg, left.result(), right.result()); ++ LIR_Opr tmp = LIR_OprFact::illegalOpr; ++ if (x->is_strictfp() && (x->op() == Bytecodes::_dmul || x->op() == Bytecodes::_ddiv)) { ++ tmp = new_register(T_DOUBLE); ++ } ++ ++ arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp()); + + set_result(x, round_item(reg)); +} @@ -8990,7 +8672,7 @@ index 00000000000..e126f148cdf + // no need to do div-by-zero check if the divisor is a non-zero constant + if (c != 0) { need_zero_check = false; } + // do not load right if the divisor is a power-of-2 constant -+ if (c > 0 && is_power_of_2(c)) { ++ if (c > 0 && is_power_of_2_long(c)) { + right.dont_load_item(); + } else { + right.load_item(); @@ -9001,7 +8683,7 @@ index 00000000000..e126f148cdf + if (need_zero_check) { + CodeEmitInfo* info = state_for(x); + __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0)); -+ __ branch(lir_cond_equal, new DivByZeroStub(info)); ++ __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info)); + } + + rlock_result(x); @@ -9075,7 +8757,7 @@ index 00000000000..e126f148cdf + if (need_zero_check) { + CodeEmitInfo* info = state_for(x); + __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0)); -+ __ branch(lir_cond_equal, new DivByZeroStub(info)); ++ __ branch(lir_cond_equal, T_INT, new DivByZeroStub(info)); + } + + LIR_Opr ill = LIR_OprFact::illegalOpr; @@ -9254,16 +8936,14 @@ index 00000000000..e126f148cdf + do_LibmIntrinsic(x); + break; + case vmIntrinsics::_dabs: // fall through -+ case vmIntrinsics::_dsqrt: // fall through -+ case vmIntrinsics::_dsqrt_strict: { ++ case vmIntrinsics::_dsqrt: { + assert(x->number_of_arguments() == 1, "wrong type"); + LIRItem value(x->argument_at(0), this); + value.load_item(); + LIR_Opr dst = rlock_result(x); + + switch (x->id()) { -+ case vmIntrinsics::_dsqrt: // fall through -+ case vmIntrinsics::_dsqrt_strict: { ++ case vmIntrinsics::_dsqrt: { + __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr); + break; + } @@ -9284,19 +8964,30 @@ index 00000000000..e126f148cdf +void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) { + LIRItem value(x->argument_at(0), this); + value.set_destroys_register(); ++ + LIR_Opr calc_result = rlock_result(x); + LIR_Opr result_reg = result_register_for(x->type()); ++ + CallingConvention* cc = NULL; -+ BasicTypeList signature(1); -+ signature.append(T_DOUBLE); -+ if (x->id() == vmIntrinsics::_dpow) { signature.append(T_DOUBLE); } -+ cc = frame_map()->c_calling_convention(&signature); -+ value.load_item_force(cc->at(0)); ++ + if (x->id() == vmIntrinsics::_dpow) { + LIRItem value1(x->argument_at(1), this); ++ + value1.set_destroys_register(); ++ ++ BasicTypeList signature(2); ++ signature.append(T_DOUBLE); ++ signature.append(T_DOUBLE); ++ cc = frame_map()->c_calling_convention(&signature); ++ value.load_item_force(cc->at(0)); + value1.load_item_force(cc->at(1)); ++ } else { ++ BasicTypeList signature(1); ++ signature.append(T_DOUBLE); ++ cc = frame_map()->c_calling_convention(&signature); ++ value.load_item_force(cc->at(0)); + } ++ + switch (x->id()) { + case vmIntrinsics::_dexp: + if (StubRoutines::dexp() != NULL) { __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args()); } @@ -9663,9 +9354,9 @@ index 00000000000..e126f148cdf + profile_branch(x, cond); + move_to_phi(x->state()); + if (x->x()->type()->is_float_kind()) { -+ __ branch(lir_cond(cond), x->tsux(), x->usux()); ++ __ branch(lir_cond(cond), right->type(), x->tsux(), x->usux()); + } else { -+ __ branch(lir_cond(cond), x->tsux()); ++ __ branch(lir_cond(cond), right->type(), x->tsux()); + } + assert(x->default_sux() == x->fsux(), "wrong destination above"); + __ jump(x->default_sux()); @@ -9688,7 +9379,7 @@ index 00000000000..e126f148cdf +} diff --git a/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp new file mode 100644 -index 00000000000..5f1c394ab3d +index 0000000000..0317ed9003 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp @@ -0,0 +1,55 @@ @@ -9721,22 +9412,22 @@ index 00000000000..5f1c394ab3d +#include "asm/register.hpp" +#include "c1/c1_LIR.hpp" + -+FloatRegister LIR_Opr::as_float_reg() const { ++FloatRegister LIR_OprDesc::as_float_reg() const { + return as_FloatRegister(fpu_regnr()); +} + -+FloatRegister LIR_Opr::as_double_reg() const { ++FloatRegister LIR_OprDesc::as_double_reg() const { + return as_FloatRegister(fpu_regnrLo()); +} + +// Reg2 unused. +LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) { + assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform"); -+ return (LIR_Opr)(intptr_t)((reg1 << LIR_Opr::reg1_shift) | -+ (reg1 << LIR_Opr::reg2_shift) | -+ LIR_Opr::double_type | -+ LIR_Opr::fpu_register | -+ LIR_Opr::double_size); ++ return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) | ++ (reg1 << LIR_OprDesc::reg2_shift) | ++ LIR_OprDesc::double_type | ++ LIR_OprDesc::fpu_register | ++ LIR_OprDesc::double_size); +} + +#ifndef PRODUCT @@ -9749,7 +9440,7 @@ index 00000000000..5f1c394ab3d +#endif // PRODUCT diff --git a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp new file mode 100644 -index 00000000000..78a61128bdd +index 0000000000..78a61128bd --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp @@ -0,0 +1,33 @@ @@ -9788,7 +9479,7 @@ index 00000000000..78a61128bdd +} diff --git a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp new file mode 100644 -index 00000000000..d7ca7b0fd05 +index 0000000000..d7ca7b0fd0 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp @@ -0,0 +1,83 @@ @@ -9877,10 +9568,10 @@ index 00000000000..d7ca7b0fd05 +#endif // CPU_RISCV_C1_LINEARSCAN_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp new file mode 100644 -index 00000000000..6f656c8c533 +index 0000000000..99d981f97f --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp -@@ -0,0 +1,432 @@ +@@ -0,0 +1,443 @@ +/* + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. @@ -9916,8 +9607,8 @@ index 00000000000..6f656c8c533 +#include "gc/shared/collectedHeap.hpp" +#include "interpreter/interpreter.hpp" +#include "oops/arrayOop.hpp" -+#include "oops/markWord.hpp" +#include "runtime/basicLock.hpp" ++#include "runtime/biasedLocking.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" @@ -9933,7 +9624,7 @@ index 00000000000..6f656c8c533 + } +} + -+int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { ++int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) { + const int aligned_mask = BytesPerWord - 1; + const int hdr_offset = oopDesc::mark_offset_in_bytes(); + assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); @@ -9945,19 +9636,17 @@ index 00000000000..6f656c8c533 + // save object being locked into the BasicObjectLock + sd(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + -+ null_check_offset = offset(); -+ -+ if (DiagnoseSyncOnValueBasedClasses != 0) { -+ load_klass(hdr, obj); -+ lwu(hdr, Address(hdr, Klass::access_flags_offset())); -+ andi(t0, hdr, JVM_ACC_IS_VALUE_BASED_CLASS); -+ bnez(t0, slow_case, true /* is_far */); ++ if (UseBiasedLocking) { ++ assert(scratch != noreg, "should have scratch register at this point"); ++ null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case); ++ } else { ++ null_check_offset = offset(); + } + + // Load object header + ld(hdr, Address(obj, hdr_offset)); + // and mark it as unlocked -+ ori(hdr, hdr, markWord::unlocked_value); ++ ori(hdr, hdr, markOopDesc::unlocked_value); + // save unlocked object header into the displaced header location on the stack + sd(hdr, Address(disp_hdr, 0)); + // test if object header is still the same (i.e. unlocked), and if so, store the @@ -9988,6 +9677,10 @@ index 00000000000..6f656c8c533 + // otherwise we don't care about the result and handle locking via runtime call + bnez(hdr, slow_case, /* is_far */ true); + bind(done); ++ if (PrintBiasedLockingStatistics) { ++ la(t1, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr())); ++ add_memory_int32(Address(t1, 0), 1); ++ } + return null_check_offset; +} + @@ -9997,13 +9690,21 @@ index 00000000000..6f656c8c533 + assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); + Label done; + ++ if (UseBiasedLocking) { ++ // load object ++ ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); ++ biased_locking_exit(obj, hdr, done); ++ } ++ + // load displaced header + ld(hdr, Address(disp_hdr, 0)); + // if the loaded hdr is NULL we had recursive locking + // if we had recursive locking, we are done + beqz(hdr, done); -+ // load object -+ ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); ++ if (!UseBiasedLocking) { ++ // load object ++ ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); ++ } + verify_oop(obj); + // test if object header is pointing to the displaced header, and if so, restore + // the displaced header in the object - if the object header is not pointing to @@ -10030,8 +9731,13 @@ index 00000000000..6f656c8c533 + +void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2) { + assert_different_registers(obj, klass, len); -+ // This assumes that all prototype bits fitr in an int32_t -+ mv(tmp1, (int32_t)(intptr_t)markWord::prototype().value()); ++ if (UseBiasedLocking && !len->is_valid()) { ++ assert_different_registers(obj, klass, len, tmp1, tmp2); ++ ld(tmp1, Address(klass, Klass::prototype_header_offset())); ++ } else { ++ // This assumes that all prototype bits fitr in an int32_t ++ mv(tmp1, (int32_t)(intptr_t)markOopDesc::prototype()); ++ } + sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes())); + + if (UseCompressedClassPointers) { // Take care not to kill klass @@ -10185,15 +9891,17 @@ index 00000000000..6f656c8c533 +} + +void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { ++ // If we have to make this method not-entrant we'll overwrite its ++ // first instruction with a jump. For this action to be legal we ++ // must ensure that this first instruction is a J, JAL or NOP. ++ // Make it a NOP. ++ nop(); ++ + assert(bang_size_in_bytes >= framesize, "stack bang size incorrect"); + // Make sure there is enough stack space for this method's activation. + // Note that we do this before creating a frame. + generate_stack_overflow_check(bang_size_in_bytes); + MacroAssembler::build_frame(framesize); -+ -+ // Insert nmethod entry barrier into frame. -+ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ bs->nmethod_entry_barrier(this); +} + +void C1_MacroAssembler::remove_frame(int framesize) { @@ -10201,13 +9909,7 @@ index 00000000000..6f656c8c533 +} + + -+void C1_MacroAssembler::verified_entry(bool breakAtEntry) { -+ // If we have to make this method not-entrant we'll overwrite its -+ // first instruction with a jump. For this action to be legal we -+ // must ensure that this first instruction is a J, JAL or NOP. -+ // Make it a NOP. -+ -+ nop(); ++void C1_MacroAssembler::verified_entry() { +} + +void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { @@ -10315,10 +10017,10 @@ index 00000000000..6f656c8c533 +} diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp new file mode 100644 -index 00000000000..dfd3c17d7c7 +index 0000000000..1950cee5dd --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp -@@ -0,0 +1,120 @@ +@@ -0,0 +1,121 @@ +/* + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. @@ -10380,8 +10082,9 @@ index 00000000000..dfd3c17d7c7 + // hdr : must be x10, contents destroyed + // obj : must point to the object to lock, contents preserved + // disp_hdr: must point to the displaced header location, contents preserved ++ // scratch : scratch register, contents destroyed + // returns code offset at which to add null check debug information -+ int lock_object (Register swap, Register obj, Register disp_hdr, Label& slow_case); ++ int lock_object (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case); + + // unlocking + // hdr : contents destroyed @@ -10441,10 +10144,10 @@ index 00000000000..dfd3c17d7c7 +#endif // CPU_RISCV_C1_MACROASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp new file mode 100644 -index 00000000000..f523c9ed50a +index 0000000000..329df2e1ca --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp -@@ -0,0 +1,1172 @@ +@@ -0,0 +1,1210 @@ +/* + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. @@ -10493,7 +10196,6 @@ index 00000000000..f523c9ed50a +#include "runtime/stubRoutines.hpp" +#include "runtime/vframe.hpp" +#include "runtime/vframeArray.hpp" -+#include "utilities/powerOfTwo.hpp" +#include "vmreg_riscv.inline.hpp" + + @@ -10614,19 +10316,14 @@ index 00000000000..f523c9ed50a + return call_RT(oop_result, metadata_result, entry, arg_num); +} + -+enum return_state_t { -+ does_not_return, requires_return -+}; -+ +// Implementation of StubFrame + +class StubFrame: public StackObj { + private: + StubAssembler* _sasm; -+ bool _return_state; + + public: -+ StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state=requires_return); ++ StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments); + void load_argument(int offset_in_words, Register reg); + + ~StubFrame(); @@ -10644,9 +10341,8 @@ index 00000000000..f523c9ed50a + +#define __ _sasm-> + -+StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state) { ++StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) { + _sasm = sasm; -+ _return_state = return_state; + __ prologue(name, must_gc_arguments); +} + @@ -10658,11 +10354,7 @@ index 00000000000..f523c9ed50a + + +StubFrame::~StubFrame() { -+ if (_return_state == requires_return) { -+ __ epilogue(); -+ } else { -+ __ should_not_reach_here(); -+ } ++ __ epilogue(); + _sasm = NULL; +} + @@ -10825,6 +10517,7 @@ index 00000000000..f523c9ed50a + assert_cond(oop_maps != NULL); + oop_maps->add_gc_map(call_offset, oop_map); + ++ __ should_not_reach_here(); + return oop_maps; +} + @@ -10872,7 +10565,9 @@ index 00000000000..f523c9ed50a + sasm->set_frame_size(frame_size); + break; + } -+ default: ShouldNotReachHere(); ++ default: ++ __ should_not_reach_here(); ++ break; + } + + // verify that only x10 and x13 are valid at this time @@ -10928,6 +10623,9 @@ index 00000000000..f523c9ed50a + restore_live_registers(sasm, id != handle_exception_nofpu_id); + break; + case handle_exception_from_callee_id: ++ // Pop the return address. ++ __ leave(); ++ __ ret(); // jump to exception handler + break; + default: ShouldNotReachHere(); + } @@ -11032,37 +10730,80 @@ index 00000000000..f523c9ed50a +#endif + __ reset_last_Java_frame(true); + ++ // check for pending exceptions ++ { Label L; ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ beqz(t0, L); ++ // exception pending => remove activation and forward to exception handler ++ ++ { Label L1; ++ __ bnez(x10, L1); // have we deoptimized? ++ __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id))); ++ __ bind(L1); ++ } ++ ++ // the deopt blob expects exceptions in the special fields of ++ // JavaThread, so copy and clear pending exception. ++ ++ // load and clear pending exception ++ __ ld(x10, Address(xthread, Thread::pending_exception_offset())); ++ __ sd(zr, Address(xthread, Thread::pending_exception_offset())); ++ ++ // check that there is really a valid exception ++ __ verify_not_null_oop(x10); ++ ++ // load throwing pc: this is the return address of the stub ++ __ ld(x13, Address(fp, wordSize)); ++ +#ifdef ASSERT -+ // Check that fields in JavaThread for exception oop and issuing pc are empty -+ Label oop_empty; -+ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); -+ __ beqz(t0, oop_empty); -+ __ stop("exception oop must be empty"); -+ __ bind(oop_empty); ++ // Check that fields in JavaThread for exception oop and issuing pc are empty ++ Label oop_empty; ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ beqz(t0, oop_empty); ++ __ stop("exception oop must be empty"); ++ __ bind(oop_empty); + -+ Label pc_empty; -+ __ ld(t0, Address(xthread, JavaThread::exception_pc_offset())); -+ __ beqz(t0, pc_empty); -+ __ stop("exception pc must be empty"); -+ __ bind(pc_empty); ++ Label pc_empty; ++ __ ld(t0, Address(xthread, JavaThread::exception_pc_offset())); ++ __ beqz(t0, pc_empty); ++ __ stop("exception pc must be empty"); ++ __ bind(pc_empty); +#endif + -+ // Runtime will return true if the nmethod has been deoptimized, this is the -+ // expected scenario and anything else is an error. Note that we maintain a -+ // check on the result purely as a defensive measure. -+ Label no_deopt; -+ __ beqz(x10, no_deopt); // Have we deoptimized? ++ // store exception oop and throwing pc to JavaThread ++ __ sd(x10, Address(xthread, JavaThread::exception_oop_offset())); ++ __ sd(x13, Address(xthread, JavaThread::exception_pc_offset())); ++ ++ restore_live_registers(sasm); ++ ++ __ leave(); ++ ++ // Forward the exception directly to deopt blob. We can blow no ++ // registers and must leave throwing pc on the stack. A patch may ++ // have values live in registers so the entry point with the ++ // exception in tls. ++ __ far_jump(RuntimeAddress(deopt_blob->unpack_with_exception_in_tls())); ++ ++ __ bind(L); ++ } + -+ // Perform a re-execute. The proper return address is already on the stack, -+ // we just need to restore registers, pop all of our frames but the return -+ // address and jump to the deopt blob. ++ // Runtime will return true if the nmethod has been deoptimized during ++ // the patching process. In that case we must do a deopt reexecute instead. ++ Label cont; ++ ++ __ beqz(x10, cont); // have we deoptimized? ++ ++ // Will reexecute. Proper return address is already on the stack we just restore ++ // registers, pop all of our frame but the return address and jump to the deopt blob + + restore_live_registers(sasm); + __ leave(); + __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); + -+ __ bind(no_deopt); -+ __ stop("deopt not performed"); ++ __ bind(cont); ++ restore_live_registers(sasm); ++ __ leave(); ++ __ ret(); + + return oop_maps; +} @@ -11088,13 +10829,13 @@ index 00000000000..f523c9ed50a + + case throw_div0_exception_id: + { -+ StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false); + } + break; + + case throw_null_pointer_exception_id: -+ { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments, does_not_return); ++ { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false); + } + break; @@ -11373,14 +11114,14 @@ index 00000000000..f523c9ed50a + + case throw_class_cast_exception_id: + { -+ StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true); + } + break; + + case throw_incompatible_class_change_error_id: + { -+ StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, + CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); + } @@ -11474,7 +11215,7 @@ index 00000000000..f523c9ed50a + + case deoptimize_id: + { -+ StubFrame f(sasm, "deoptimize", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "deoptimize", dont_gc_arguments); + OopMap* oop_map = save_live_registers(sasm); + assert_cond(oop_map != NULL); + f.load_argument(0, c_rarg1); @@ -11493,7 +11234,7 @@ index 00000000000..f523c9ed50a + + case throw_range_check_failed_id: + { -+ StubFrame f(sasm, "range_check_failed", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "range_check_failed", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true); + } + break; @@ -11509,7 +11250,7 @@ index 00000000000..f523c9ed50a + + case access_field_patching_id: + { -+ StubFrame f(sasm, "access_field_patching", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "access_field_patching", dont_gc_arguments); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching)); + } @@ -11517,7 +11258,7 @@ index 00000000000..f523c9ed50a + + case load_klass_patching_id: + { -+ StubFrame f(sasm, "load_klass_patching", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "load_klass_patching", dont_gc_arguments); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching)); + } @@ -11525,7 +11266,7 @@ index 00000000000..f523c9ed50a + + case load_mirror_patching_id: + { -+ StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching)); + } @@ -11533,7 +11274,7 @@ index 00000000000..f523c9ed50a + + case load_appendix_patching_id: + { -+ StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching)); + } @@ -11556,14 +11297,14 @@ index 00000000000..f523c9ed50a + + case throw_index_exception_id: + { -+ StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true); + } + break; + + case throw_array_store_exception_id: + { -+ StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments); + // tos + 0: link + // + 1: return address + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true); @@ -11572,7 +11313,7 @@ index 00000000000..f523c9ed50a + + case predicate_failed_trap_id: + { -+ StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments); + + OopMap* map = save_live_registers(sasm); + assert_cond(map != NULL); @@ -11595,7 +11336,7 @@ index 00000000000..f523c9ed50a + StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments); + save_live_registers(sasm); + -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast(SharedRuntime::dtrace_object_alloc)), c_rarg0); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), c_rarg0); + + restore_live_registers(sasm); + } @@ -11603,7 +11344,7 @@ index 00000000000..f523c9ed50a + + default: + { -+ StubFrame f(sasm, "unimplemented entry", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "unimplemented entry", dont_gc_arguments); + __ li(x10, (int) id); + __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), x10); + __ should_not_reach_here(); @@ -11619,10 +11360,10 @@ index 00000000000..f523c9ed50a +const char *Runtime1::pd_name_for_address(address entry) { Unimplemented(); return 0; } diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp new file mode 100644 -index 00000000000..fe46f7b21c8 +index 0000000000..9316d4be02 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp -@@ -0,0 +1,65 @@ +@@ -0,0 +1,71 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. @@ -11657,8 +11398,10 @@ index 00000000000..fe46f7b21c8 +// Sets the default values for platform dependent flags used by the client compiler. +// (see c1_globals.hpp) + -+#ifndef COMPILER2 ++#ifndef TIERED +define_pd_global(bool, BackgroundCompilation, true ); ++define_pd_global(bool, UseTLAB, true ); ++define_pd_global(bool, ResizeTLAB, true ); +define_pd_global(bool, InlineIntrinsics, true ); +define_pd_global(bool, PreferInterpreterNativeStubs, false); +define_pd_global(bool, ProfileTraps, false); @@ -11667,6 +11410,7 @@ index 00000000000..fe46f7b21c8 +define_pd_global(intx, CompileThreshold, 1500 ); + +define_pd_global(intx, OnStackReplacePercentage, 933 ); ++define_pd_global(intx, FreqInlineSize, 325 ); +define_pd_global(intx, NewSizeThreadIncrease, 4*K ); +define_pd_global(intx, InitialCodeCacheSize, 160*K); +define_pd_global(intx, ReservedCodeCacheSize, 32*M ); @@ -11677,25 +11421,28 @@ index 00000000000..fe46f7b21c8 +define_pd_global(intx, CodeCacheExpansionSize, 32*K ); +define_pd_global(uintx, CodeCacheMinBlockLength, 1); +define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); ++define_pd_global(uintx, MetaspaceSize, 12*M ); +define_pd_global(bool, NeverActAsServerClassMachine, true ); -+define_pd_global(uint64_t, MaxRAM, 1ULL*G); ++define_pd_global(uint64_t, MaxRAM, 1ULL*G); +define_pd_global(bool, CICompileOSR, true ); -+#endif // !COMPILER2 ++#endif // !TIERED +define_pd_global(bool, UseTypeProfile, false); ++define_pd_global(bool, RoundFPResults, true ); + ++define_pd_global(bool, LIRFillDelaySlots, false); +define_pd_global(bool, OptimizeSinglePrecision, true ); +define_pd_global(bool, CSEArrayLength, false); +define_pd_global(bool, TwoOperandLIRForm, false); + +#endif // CPU_RISCV_C1_GLOBALS_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp new file mode 100644 -index 00000000000..27770dc17aa +index 0000000000..3da1f1c6d8 --- /dev/null -+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp -@@ -0,0 +1,1646 @@ ++++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp +@@ -0,0 +1,90 @@ +/* -+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -11719,1635 +11466,1167 @@ index 00000000000..27770dc17aa + * + */ + -+#include "precompiled.hpp" -+#include "asm/assembler.hpp" -+#include "asm/assembler.inline.hpp" -+#include "opto/c2_MacroAssembler.hpp" -+#include "opto/intrinsicnode.hpp" -+#include "opto/subnode.hpp" -+#include "runtime/stubRoutines.hpp" ++#ifndef CPU_RISCV_C2_GLOBALS_RISCV_HPP ++#define CPU_RISCV_C2_GLOBALS_RISCV_HPP + -+#ifdef PRODUCT -+#define BLOCK_COMMENT(str) /* nothing */ -+#define STOP(error) stop(error) -+#else -+#define BLOCK_COMMENT(str) block_comment(str) -+#define STOP(error) block_comment(error); stop(error) -+#endif ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" + -+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++// Sets the default values for platform dependent flags used by the server compiler. ++// (see c2_globals.hpp). Alpha-sorted. + -+// short string -+// StringUTF16.indexOfChar -+// StringLatin1.indexOfChar -+void C2_MacroAssembler::string_indexof_char_short(Register str1, Register cnt1, -+ Register ch, Register result, -+ bool isL) -+{ -+ Register ch1 = t0; -+ Register index = t1; ++define_pd_global(bool, BackgroundCompilation, true); ++define_pd_global(bool, UseTLAB, true); ++define_pd_global(bool, ResizeTLAB, true); ++define_pd_global(bool, CICompileOSR, true); ++define_pd_global(bool, InlineIntrinsics, true); ++define_pd_global(bool, PreferInterpreterNativeStubs, false); ++define_pd_global(bool, ProfileTraps, true); ++define_pd_global(bool, UseOnStackReplacement, true); ++define_pd_global(bool, ProfileInterpreter, true); ++define_pd_global(bool, TieredCompilation, trueInTiered); ++define_pd_global(intx, CompileThreshold, 10000); + -+ BLOCK_COMMENT("string_indexof_char_short {"); ++define_pd_global(intx, OnStackReplacePercentage, 140); ++define_pd_global(intx, ConditionalMoveLimit, 0); ++define_pd_global(intx, FLOATPRESSURE, 32); ++define_pd_global(intx, FreqInlineSize, 325); ++define_pd_global(intx, MinJumpTableSize, 10); ++define_pd_global(intx, INTPRESSURE, 24); ++define_pd_global(intx, InteriorEntryAlignment, 16); ++define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); ++define_pd_global(intx, LoopUnrollLimit, 60); ++define_pd_global(intx, LoopPercentProfileLimit, 10); ++// InitialCodeCacheSize derived from specjbb2000 run. ++define_pd_global(intx, InitialCodeCacheSize, 2496*K); // Integral multiple of CodeCacheExpansionSize ++define_pd_global(intx, CodeCacheExpansionSize, 64*K); + -+ Label LOOP, LOOP1, LOOP4, LOOP8; -+ Label MATCH, MATCH1, MATCH2, MATCH3, -+ MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH; ++// Ergonomics related flags ++define_pd_global(uint64_t,MaxRAM, 128ULL*G); ++define_pd_global(intx, RegisterCostAreaRatio, 16000); + -+ mv(result, -1); -+ mv(index, zr); ++// Peephole and CISC spilling both break the graph, and so makes the ++// scheduler sick. ++define_pd_global(bool, OptoPeephole, false); ++define_pd_global(bool, UseCISCSpill, false); ++define_pd_global(bool, OptoScheduling, true); ++define_pd_global(bool, OptoBundling, false); ++define_pd_global(bool, OptoRegScheduling, false); ++define_pd_global(bool, SuperWordLoopUnrollAnalysis, true); ++define_pd_global(bool, IdealizeClearArrayNode, true); + -+ bind(LOOP); -+ addi(t0, index, 8); -+ ble(t0, cnt1, LOOP8); -+ addi(t0, index, 4); -+ ble(t0, cnt1, LOOP4); -+ j(LOOP1); ++define_pd_global(intx, ReservedCodeCacheSize, 48*M); ++define_pd_global(intx, NonProfiledCodeHeapSize, 21*M); ++define_pd_global(intx, ProfiledCodeHeapSize, 22*M); ++define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); ++define_pd_global(uintx, CodeCacheMinBlockLength, 6); ++define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); + -+ bind(LOOP8); -+ isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); -+ beq(ch, ch1, MATCH); -+ isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); -+ beq(ch, ch1, MATCH1); -+ isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); -+ beq(ch, ch1, MATCH2); -+ isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); -+ beq(ch, ch1, MATCH3); -+ isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8)); -+ beq(ch, ch1, MATCH4); -+ isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10)); -+ beq(ch, ch1, MATCH5); -+ isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12)); -+ beq(ch, ch1, MATCH6); -+ isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14)); -+ beq(ch, ch1, MATCH7); -+ addi(index, index, 8); -+ addi(str1, str1, isL ? 8 : 16); -+ blt(index, cnt1, LOOP); -+ j(NOMATCH); ++// Heap related flags ++define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M)); + -+ bind(LOOP4); -+ isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); -+ beq(ch, ch1, MATCH); -+ isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); -+ beq(ch, ch1, MATCH1); -+ isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); -+ beq(ch, ch1, MATCH2); -+ isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); -+ beq(ch, ch1, MATCH3); -+ addi(index, index, 4); -+ addi(str1, str1, isL ? 4 : 8); -+ bge(index, cnt1, NOMATCH); ++// Ergonomics related flags ++define_pd_global(bool, NeverActAsServerClassMachine, false); + -+ bind(LOOP1); -+ isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1)); -+ beq(ch, ch1, MATCH); -+ addi(index, index, 1); -+ addi(str1, str1, isL ? 1 : 2); -+ blt(index, cnt1, LOOP1); -+ j(NOMATCH); ++define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed. + -+ bind(MATCH1); -+ addi(index, index, 1); -+ j(MATCH); ++#endif // CPU_RISCV_C2_GLOBALS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/c2_init_riscv.cpp b/src/hotspot/cpu/riscv/c2_init_riscv.cpp +new file mode 100644 +index 0000000000..cdbd69807b +--- /dev/null ++++ b/src/hotspot/cpu/riscv/c2_init_riscv.cpp +@@ -0,0 +1,38 @@ ++/* ++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ bind(MATCH2); -+ addi(index, index, 2); -+ j(MATCH); ++#include "precompiled.hpp" ++#include "opto/compile.hpp" ++#include "opto/node.hpp" + -+ bind(MATCH3); -+ addi(index, index, 3); -+ j(MATCH); ++// processor dependent initialization for riscv + -+ bind(MATCH4); -+ addi(index, index, 4); -+ j(MATCH); ++extern void reg_mask_init(); + -+ bind(MATCH5); -+ addi(index, index, 5); -+ j(MATCH); ++void Compile::pd_compiler2_init() { ++ guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); ++ reg_mask_init(); ++} +diff --git a/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp +new file mode 100644 +index 0000000000..14a68b4502 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp +@@ -0,0 +1,36 @@ ++/* ++ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ bind(MATCH6); -+ addi(index, index, 6); -+ j(MATCH); ++#ifndef CPU_RISCV_CODEBUFFER_RISCV_HPP ++#define CPU_RISCV_CODEBUFFER_RISCV_HPP + -+ bind(MATCH7); -+ addi(index, index, 7); ++private: ++ void pd_initialize() {} + -+ bind(MATCH); -+ mv(result, index); -+ bind(NOMATCH); -+ BLOCK_COMMENT("} string_indexof_char_short"); -+} ++public: ++ void flush_bundle(bool start_new_bundle) {} + -+// StringUTF16.indexOfChar -+// StringLatin1.indexOfChar -+void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1, -+ Register ch, Register result, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ bool isL) -+{ -+ Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG; -+ Register ch1 = t0; -+ Register orig_cnt = t1; -+ Register mask1 = tmp3; -+ Register mask2 = tmp2; -+ Register match_mask = tmp1; -+ Register trailing_char = tmp4; -+ Register unaligned_elems = tmp4; ++#endif // CPU_RISCV_CODEBUFFER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp +new file mode 100644 +index 0000000000..a4de342a93 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp +@@ -0,0 +1,149 @@ ++/* ++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ BLOCK_COMMENT("string_indexof_char {"); -+ beqz(cnt1, NOMATCH); ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/compiledIC.hpp" ++#include "code/icBuffer.hpp" ++#include "code/nmethod.hpp" ++#include "memory/resourceArea.hpp" ++#include "runtime/mutexLocker.hpp" ++#include "runtime/safepoint.hpp" + -+ addi(t0, cnt1, isL ? -32 : -16); -+ bgtz(t0, DO_LONG); -+ string_indexof_char_short(str1, cnt1, ch, result, isL); -+ j(DONE); ++// ---------------------------------------------------------------------------- + -+ bind(DO_LONG); -+ mv(orig_cnt, cnt1); -+ if (AvoidUnalignedAccesses) { -+ Label ALIGNED; -+ andi(unaligned_elems, str1, 0x7); -+ beqz(unaligned_elems, ALIGNED); -+ sub(unaligned_elems, unaligned_elems, 8); -+ neg(unaligned_elems, unaligned_elems); -+ if (!isL) { -+ srli(unaligned_elems, unaligned_elems, 1); -+ } -+ // do unaligned part per element -+ string_indexof_char_short(str1, unaligned_elems, ch, result, isL); -+ bgez(result, DONE); -+ mv(orig_cnt, cnt1); -+ sub(cnt1, cnt1, unaligned_elems); -+ bind(ALIGNED); -+ } ++#define __ _masm. ++address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) { ++ precond(cbuf.stubs()->start() != badAddress); ++ precond(cbuf.stubs()->end() != badAddress); ++ // Stub is fixed up when the corresponding call is converted from ++ // calling compiled code to calling interpreted code. ++ // mv xmethod, 0 ++ // jalr -4 # to self + -+ // duplicate ch -+ if (isL) { -+ slli(ch1, ch, 8); -+ orr(ch, ch1, ch); ++ if (mark == NULL) { ++ mark = cbuf.insts_mark(); // Get mark within main instrs section. + } -+ slli(ch1, ch, 16); -+ orr(ch, ch1, ch); -+ slli(ch1, ch, 32); -+ orr(ch, ch1, ch); + -+ if (!isL) { -+ slli(cnt1, cnt1, 1); -+ } ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a stub. ++ MacroAssembler _masm(&cbuf); + -+ uint64_t mask0101 = UCONST64(0x0101010101010101); -+ uint64_t mask0001 = UCONST64(0x0001000100010001); -+ mv(mask1, isL ? mask0101 : mask0001); -+ uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f); -+ uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff); -+ mv(mask2, isL ? mask7f7f : mask7fff); ++ address base = __ start_a_stub(to_interp_stub_size()); ++ int offset = __ offset(); ++ if (base == NULL) { ++ return NULL; // CodeBuffer::expand failed ++ } ++ // static stub relocation stores the instruction address of the call ++ __ relocate(static_stub_Relocation::spec(mark)); + -+ bind(CH1_LOOP); -+ ld(ch1, Address(str1)); -+ addi(str1, str1, 8); -+ addi(cnt1, cnt1, -8); -+ compute_match_mask(ch1, ch, match_mask, mask1, mask2); -+ bnez(match_mask, HIT); -+ bgtz(cnt1, CH1_LOOP); -+ j(NOMATCH); ++ __ emit_static_call_stub(); + -+ bind(HIT); -+ ctzc_bit(trailing_char, match_mask, isL, ch1, result); -+ srli(trailing_char, trailing_char, 3); -+ addi(cnt1, cnt1, 8); -+ ble(cnt1, trailing_char, NOMATCH); -+ // match case -+ if (!isL) { -+ srli(cnt1, cnt1, 1); -+ srli(trailing_char, trailing_char, 1); -+ } ++ assert((__ offset() - offset) <= (int)to_interp_stub_size(), "stub too big"); ++ __ end_a_stub(); ++ return base; ++} ++#undef __ + -+ sub(result, orig_cnt, cnt1); -+ add(result, result, trailing_char); -+ j(DONE); ++int CompiledStaticCall::to_interp_stub_size() { ++ // (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr ++ return 12 * NativeInstruction::instruction_size; ++} + -+ bind(NOMATCH); -+ mv(result, -1); ++int CompiledStaticCall::to_trampoline_stub_size() { ++ // Somewhat pessimistically, we count 4 instructions here (although ++ // there are only 3) because we sometimes emit an alignment nop. ++ // Trampoline stubs are always word aligned. ++ return NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size; ++} + -+ bind(DONE); -+ BLOCK_COMMENT("} string_indexof_char"); ++// Relocation entries for call stub, compiled java to interpreter. ++int CompiledStaticCall::reloc_to_interp_stub() { ++ return 4; // 3 in emit_to_interp_stub + 1 in emit_call +} + -+typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp); ++void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) { ++ address stub = find_stub(false /* is_aot */); ++ guarantee(stub != NULL, "stub not found"); + -+// Search for needle in haystack and return index or -1 -+// x10: result -+// x11: haystack -+// x12: haystack_len -+// x13: needle -+// x14: needle_len -+void C2_MacroAssembler::string_indexof(Register haystack, Register needle, -+ Register haystack_len, Register needle_len, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ Register tmp5, Register tmp6, -+ Register result, int ae) -+{ -+ assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); ++ if (TraceICs) { ++ ResourceMark rm; ++ tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s", ++ p2i(instruction_address()), ++ callee->name_and_sig_as_C_string()); ++ } + -+ Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH; ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub); ++#ifndef PRODUCT ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); + -+ Register ch1 = t0; -+ Register ch2 = t1; -+ Register nlen_tmp = tmp1; // needle len tmp -+ Register hlen_tmp = tmp2; // haystack len tmp -+ Register result_tmp = tmp4; ++ // read the value once ++ volatile intptr_t data = method_holder->data(); ++ assert(data == 0 || data == (intptr_t)callee(), ++ "a) MT-unsafe modification of inline cache"); ++ assert(data == 0 || jump->jump_destination() == entry, ++ "b) MT-unsafe modification of inline cache"); ++#endif ++ // Update stub. ++ method_holder->set_data((intptr_t)callee()); ++ NativeGeneralJump::insert_unconditional(method_holder->next_instruction_address(), entry); ++ ICache::invalidate_range(stub, to_interp_stub_size()); ++ // Update jump to call. ++ set_destination_mt_safe(stub); ++} + -+ bool isLL = ae == StrIntrinsicNode::LL; ++void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { ++ assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); ++ // Reset stub. ++ address stub = static_stub->addr(); ++ assert(stub != NULL, "stub not found"); ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub); ++ method_holder->set_data(0); ++} + -+ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; -+ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; -+ int needle_chr_shift = needle_isL ? 0 : 1; -+ int haystack_chr_shift = haystack_isL ? 0 : 1; -+ int needle_chr_size = needle_isL ? 1 : 2; -+ int haystack_chr_size = haystack_isL ? 1 : 2; -+ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : -+ (load_chr_insn)&MacroAssembler::lhu; -+ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : -+ (load_chr_insn)&MacroAssembler::lhu; ++//----------------------------------------------------------------------------- ++// Non-product mode code ++#ifndef PRODUCT + -+ BLOCK_COMMENT("string_indexof {"); ++void CompiledDirectStaticCall::verify() { ++ // Verify call. ++ _call->verify(); ++ _call->verify_alignment(); + -+ // Note, inline_string_indexOf() generates checks: -+ // if (pattern.count > src.count) return -1; -+ // if (pattern.count == 0) return 0; ++ // Verify stub. ++ address stub = find_stub(false /* is_aot */); ++ assert(stub != NULL, "no stub found for static call"); ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub); ++ NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); + -+ // We have two strings, a source string in haystack, haystack_len and a pattern string -+ // in needle, needle_len. Find the first occurence of pattern in source or return -1. ++ // Verify state. ++ assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); ++} + -+ // For larger pattern and source we use a simplified Boyer Moore algorithm. -+ // With a small pattern and source we use linear scan. ++#endif // !PRODUCT +diff --git a/src/hotspot/cpu/riscv/copy_riscv.hpp b/src/hotspot/cpu/riscv/copy_riscv.hpp +new file mode 100644 +index 0000000000..05da242e35 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/copy_riscv.hpp +@@ -0,0 +1,59 @@ ++/* ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm. -+ sub(result_tmp, haystack_len, needle_len); -+ // needle_len < 8, use linear scan -+ sub(t0, needle_len, 8); -+ bltz(t0, LINEARSEARCH); -+ // needle_len >= 256, use linear scan -+ sub(t0, needle_len, 256); -+ bgez(t0, LINEARSTUB); -+ // needle_len >= haystack_len/4, use linear scan -+ srli(t0, haystack_len, 2); -+ bge(needle_len, t0, LINEARSTUB); ++#ifndef CPU_RISCV_COPY_RISCV_HPP ++#define CPU_RISCV_COPY_RISCV_HPP + -+ // Boyer-Moore-Horspool introduction: -+ // The Boyer Moore alogorithm is based on the description here:- -+ // -+ // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm -+ // -+ // This describes and algorithm with 2 shift rules. The 'Bad Character' rule -+ // and the 'Good Suffix' rule. -+ // -+ // These rules are essentially heuristics for how far we can shift the -+ // pattern along the search string. -+ // -+ // The implementation here uses the 'Bad Character' rule only because of the -+ // complexity of initialisation for the 'Good Suffix' rule. -+ // -+ // This is also known as the Boyer-Moore-Horspool algorithm: -+ // -+ // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm -+ // -+ // #define ASIZE 256 -+ // -+ // int bm(unsigned char *pattern, int m, unsigned char *src, int n) { -+ // int i, j; -+ // unsigned c; -+ // unsigned char bc[ASIZE]; -+ // -+ // /* Preprocessing */ -+ // for (i = 0; i < ASIZE; ++i) -+ // bc[i] = m; -+ // for (i = 0; i < m - 1; ) { -+ // c = pattern[i]; -+ // ++i; -+ // // c < 256 for Latin1 string, so, no need for branch -+ // #ifdef PATTERN_STRING_IS_LATIN1 -+ // bc[c] = m - i; -+ // #else -+ // if (c < ASIZE) bc[c] = m - i; -+ // #endif -+ // } -+ // -+ // /* Searching */ -+ // j = 0; -+ // while (j <= n - m) { -+ // c = src[i+j]; -+ // if (pattern[m-1] == c) -+ // int k; -+ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); -+ // if (k < 0) return j; -+ // // c < 256 for Latin1 string, so, no need for branch -+ // #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1 -+ // // LL case: (c< 256) always true. Remove branch -+ // j += bc[pattern[j+m-1]]; -+ // #endif -+ // #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF -+ // // UU case: need if (c if not. -+ // if (c < ASIZE) -+ // j += bc[pattern[j+m-1]]; -+ // else -+ // j += m -+ // #endif -+ // } -+ // return -1; -+ // } ++// Inline functions for memory copy and fill. + -+ // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result -+ Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH, -+ BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP; ++// Contains inline asm implementations ++#include OS_CPU_HEADER_INLINE(copy) + -+ Register haystack_end = haystack_len; -+ Register skipch = tmp2; ++static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { ++ julong* to = (julong*) tohw; ++ julong v = ((julong) value << 32) | value; ++ while (count-- > 0) { ++ *to++ = v; ++ } ++} + -+ // pattern length is >=8, so, we can read at least 1 register for cases when -+ // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for -+ // UL case. We'll re-read last character in inner pre-loop code to have -+ // single outer pre-loop load -+ const int firstStep = isLL ? 7 : 3; ++static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { ++ pd_fill_to_words(tohw, count, value); ++} + -+ const int ASIZE = 256; -+ const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd) ++static void pd_fill_to_bytes(void* to, size_t count, jubyte value) { ++ (void)memset(to, value, count); ++} + -+ sub(sp, sp, ASIZE); ++static void pd_zero_to_words(HeapWord* tohw, size_t count) { ++ pd_fill_to_words(tohw, count, 0); ++} + -+ // init BC offset table with default value: needle_len -+ slli(t0, needle_len, 8); -+ orr(t0, t0, needle_len); // [63...16][needle_len][needle_len] -+ slli(tmp1, t0, 16); -+ orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len] -+ slli(tmp1, t0, 32); -+ orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len] ++static void pd_zero_to_bytes(void* to, size_t count) { ++ (void)memset(to, 0, count); ++} + -+ mv(ch1, sp); // ch1 is t0 -+ mv(tmp6, ASIZE / STORE_BYTES); // loop iterations ++#endif // CPU_RISCV_COPY_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/depChecker_riscv.hpp b/src/hotspot/cpu/riscv/depChecker_riscv.hpp +new file mode 100644 +index 0000000000..e9ff307b64 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/depChecker_riscv.hpp +@@ -0,0 +1,32 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ bind(BM_INIT_LOOP); -+ // for (i = 0; i < ASIZE; ++i) -+ // bc[i] = m; -+ for (int i = 0; i < 4; i++) { -+ sd(tmp5, Address(ch1, i * wordSize)); -+ } -+ add(ch1, ch1, 32); -+ sub(tmp6, tmp6, 4); -+ bgtz(tmp6, BM_INIT_LOOP); ++#ifndef CPU_RISCV_VM_DEPCHECKER_RISCV_HPP ++#define CPU_RISCV_VM_DEPCHECKER_RISCV_HPP + -+ sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern -+ Register orig_haystack = tmp5; -+ mv(orig_haystack, haystack); -+ // result_tmp = tmp4 -+ shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift); -+ sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1 -+ mv(tmp3, needle); ++// Nothing to do on riscv + -+ // for (i = 0; i < m - 1; ) { -+ // c = pattern[i]; -+ // ++i; -+ // // c < 256 for Latin1 string, so, no need for branch -+ // #ifdef PATTERN_STRING_IS_LATIN1 -+ // bc[c] = m - i; -+ // #else -+ // if (c < ASIZE) bc[c] = m - i; -+ // #endif -+ // } -+ bind(BCLOOP); -+ (this->*needle_load_1chr)(ch1, Address(tmp3), noreg); -+ add(tmp3, tmp3, needle_chr_size); -+ if (!needle_isL) { -+ // ae == StrIntrinsicNode::UU -+ mv(tmp6, ASIZE); -+ bgeu(ch1, tmp6, BCSKIP); -+ } -+ add(tmp4, sp, ch1); -+ sb(ch2, Address(tmp4)); // store skip offset to BC offset table ++#endif // CPU_RISCV_VM_DEPCHECKER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/disassembler_riscv.hpp b/src/hotspot/cpu/riscv/disassembler_riscv.hpp +new file mode 100644 +index 0000000000..06bca5298c +--- /dev/null ++++ b/src/hotspot/cpu/riscv/disassembler_riscv.hpp +@@ -0,0 +1,38 @@ ++/* ++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ bind(BCSKIP); -+ sub(ch2, ch2, 1); // for next pattern element, skip distance -1 -+ bgtz(ch2, BCLOOP); ++#ifndef CPU_RISCV_DISASSEMBLER_RISCV_HPP ++#define CPU_RISCV_DISASSEMBLER_RISCV_HPP + -+ // tmp6: pattern end, address after needle -+ shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift); -+ if (needle_isL == haystack_isL) { -+ // load last 8 bytes (8LL/4UU symbols) -+ ld(tmp6, Address(tmp6, -wordSize)); -+ } else { -+ // UL: from UTF-16(source) search Latin1(pattern) -+ lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols) -+ // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d -+ // We'll have to wait until load completed, but it's still faster than per-character loads+checks -+ srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a -+ slli(ch2, tmp6, XLEN - 24); -+ srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b -+ slli(ch1, tmp6, XLEN - 16); -+ srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c -+ andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d -+ slli(ch2, ch2, 16); -+ orr(ch2, ch2, ch1); // 0x00000b0c -+ slli(result, tmp3, 48); // use result as temp register -+ orr(tmp6, tmp6, result); // 0x0a00000d -+ slli(result, ch2, 16); -+ orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d -+ } ++static int pd_instruction_alignment() { ++ return 1; ++} + -+ // i = m - 1; -+ // skipch = j + i; -+ // if (skipch == pattern[m - 1] -+ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); -+ // else -+ // move j with bad char offset table -+ bind(BMLOOPSTR2); -+ // compare pattern to source string backward -+ shadd(result, nlen_tmp, haystack, result, haystack_chr_shift); -+ (this->*haystack_load_1chr)(skipch, Address(result), noreg); -+ sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8 -+ if (needle_isL == haystack_isL) { -+ // re-init tmp3. It's for free because it's executed in parallel with -+ // load above. Alternative is to initialize it before loop, but it'll -+ // affect performance on in-order systems with 2 or more ld/st pipelines -+ srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1] -+ } -+ if (!isLL) { // UU/UL case -+ slli(ch2, nlen_tmp, 1); // offsets in bytes -+ } -+ bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char -+ add(result, haystack, isLL ? nlen_tmp : ch2); -+ ld(ch2, Address(result)); // load 8 bytes from source string -+ mv(ch1, tmp6); -+ if (isLL) { -+ j(BMLOOPSTR1_AFTER_LOAD); -+ } else { -+ sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8 -+ j(BMLOOPSTR1_CMP); -+ } ++static const char* pd_cpu_opts() { ++ return ""; ++} + -+ bind(BMLOOPSTR1); -+ shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift); -+ (this->*needle_load_1chr)(ch1, Address(ch1), noreg); -+ shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift); -+ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); ++#endif // CPU_RISCV_DISASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp +new file mode 100644 +index 0000000000..d4fcbdcbbd +--- /dev/null ++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp +@@ -0,0 +1,694 @@ ++/* ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ bind(BMLOOPSTR1_AFTER_LOAD); -+ sub(nlen_tmp, nlen_tmp, 1); -+ bltz(nlen_tmp, BMLOOPSTR1_LASTCMP); ++#include "precompiled.hpp" ++#include "compiler/oopMap.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/resourceArea.hpp" ++#include "memory/universe.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/javaCalls.hpp" ++#include "runtime/monitorChunk.hpp" ++#include "runtime/os.inline.hpp" ++#include "runtime/signature.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "vmreg_riscv.inline.hpp" ++#ifdef COMPILER1 ++#include "c1/c1_Runtime1.hpp" ++#include "runtime/vframeArray.hpp" ++#endif + -+ bind(BMLOOPSTR1_CMP); -+ beq(ch1, ch2, BMLOOPSTR1); ++#ifdef ASSERT ++void RegisterMap::check_location_valid() { ++} ++#endif + -+ bind(BMSKIP); -+ if (!isLL) { -+ // if we've met UTF symbol while searching Latin1 pattern, then we can -+ // skip needle_len symbols -+ if (needle_isL != haystack_isL) { -+ mv(result_tmp, needle_len); -+ } else { -+ mv(result_tmp, 1); -+ } -+ mv(t0, ASIZE); -+ bgeu(skipch, t0, BMADV); -+ } -+ add(result_tmp, sp, skipch); -+ lbu(result_tmp, Address(result_tmp)); // load skip offset + -+ bind(BMADV); -+ sub(nlen_tmp, needle_len, 1); -+ // move haystack after bad char skip offset -+ shadd(haystack, result_tmp, haystack, result, haystack_chr_shift); -+ ble(haystack, haystack_end, BMLOOPSTR2); -+ add(sp, sp, ASIZE); -+ j(NOMATCH); ++// Profiling/safepoint support + -+ bind(BMLOOPSTR1_LASTCMP); -+ bne(ch1, ch2, BMSKIP); ++bool frame::safe_for_sender(JavaThread *thread) { ++ address sp = (address)_sp; ++ address fp = (address)_fp; ++ address unextended_sp = (address)_unextended_sp; + -+ bind(BMMATCH); -+ sub(result, haystack, orig_haystack); -+ if (!haystack_isL) { -+ srli(result, result, 1); ++ // consider stack guards when trying to determine "safe" stack pointers ++ static size_t stack_guard_size = os::uses_stack_guard_pages() ? ++ (JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size()) : 0; ++ size_t usable_stack_size = thread->stack_size() - stack_guard_size; ++ ++ // sp must be within the usable part of the stack (not in guards) ++ bool sp_safe = (sp < thread->stack_base()) && ++ (sp >= thread->stack_base() - usable_stack_size); ++ ++ ++ if (!sp_safe) { ++ return false; + } -+ add(sp, sp, ASIZE); -+ j(DONE); + -+ bind(LINEARSTUB); -+ sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm -+ bltz(t0, LINEARSEARCH); -+ mv(result, zr); -+ RuntimeAddress stub = NULL; -+ if (isLL) { -+ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll()); -+ assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated"); -+ } else if (needle_isL) { -+ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul()); -+ assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated"); -+ } else { -+ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu()); -+ assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated"); ++ // When we are running interpreted code the machine stack pointer, SP, is ++ // set low enough so that the Java expression stack can grow and shrink ++ // without ever exceeding the machine stack bounds. So, ESP >= SP. ++ ++ // When we call out of an interpreted method, SP is incremented so that ++ // the space between SP and ESP is removed. The SP saved in the callee's ++ // frame is the SP *before* this increment. So, when we walk a stack of ++ // interpreter frames the sender's SP saved in a frame might be less than ++ // the SP at the point of call. ++ ++ // So unextended sp must be within the stack but we need not to check ++ // that unextended sp >= sp ++ ++ bool unextended_sp_safe = (unextended_sp < thread->stack_base()); ++ ++ if (!unextended_sp_safe) { ++ return false; + } -+ trampoline_call(stub); -+ j(DONE); + -+ bind(NOMATCH); -+ mv(result, -1); -+ j(DONE); ++ // an fp must be within the stack and above (but not equal) sp ++ // second evaluation on fp+ is added to handle situation where fp is -1 ++ bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base()))); + -+ bind(LINEARSEARCH); -+ string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae); ++ // We know sp/unextended_sp are safe only fp is questionable here + -+ bind(DONE); -+ BLOCK_COMMENT("} string_indexof"); -+} ++ // If the current frame is known to the code cache then we can attempt to ++ // to construct the sender and do some validation of it. This goes a long way ++ // toward eliminating issues when we get in frame construction code + -+// string_indexof -+// result: x10 -+// src: x11 -+// src_count: x12 -+// pattern: x13 -+// pattern_count: x14 or 1/2/3/4 -+void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register needle, -+ Register haystack_len, Register needle_len, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ int needle_con_cnt, Register result, int ae) -+{ -+ // Note: -+ // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant -+ // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1 -+ assert(needle_con_cnt <= 4, "Invalid needle constant count"); -+ assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); ++ if (_cb != NULL) { + -+ Register ch1 = t0; -+ Register ch2 = t1; -+ Register hlen_neg = haystack_len, nlen_neg = needle_len; -+ Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4; ++ // First check if frame is complete and tester is reliable ++ // Unfortunately we can only check frame complete for runtime stubs and nmethod ++ // other generic buffer blobs are more problematic so we just assume they are ++ // ok. adapter blobs never have a frame complete and are never ok. + -+ bool isLL = ae == StrIntrinsicNode::LL; ++ if (!_cb->is_frame_complete_at(_pc)) { ++ if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) { ++ return false; ++ } ++ } + -+ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; -+ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; -+ int needle_chr_shift = needle_isL ? 0 : 1; -+ int haystack_chr_shift = haystack_isL ? 0 : 1; -+ int needle_chr_size = needle_isL ? 1 : 2; -+ int haystack_chr_size = haystack_isL ? 1 : 2; ++ // Could just be some random pointer within the codeBlob ++ if (!_cb->code_contains(_pc)) { ++ return false; ++ } + -+ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : -+ (load_chr_insn)&MacroAssembler::lhu; -+ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : -+ (load_chr_insn)&MacroAssembler::lhu; -+ load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu; -+ load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld; ++ // Entry frame checks ++ if (is_entry_frame()) { ++ // an entry frame must have a valid fp. ++ return fp_safe && is_entry_frame_valid(thread); ++ } + -+ Label DO1, DO2, DO3, MATCH, NOMATCH, DONE; ++ intptr_t* sender_sp = NULL; ++ intptr_t* sender_unextended_sp = NULL; ++ address sender_pc = NULL; ++ intptr_t* saved_fp = NULL; + -+ Register first = tmp3; ++ if (is_interpreted_frame()) { ++ // fp must be safe ++ if (!fp_safe) { ++ return false; ++ } + -+ if (needle_con_cnt == -1) { -+ Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT; ++ sender_pc = (address)this->fp()[return_addr_offset]; ++ // for interpreted frames, the value below is the sender "raw" sp, ++ // which can be different from the sender unextended sp (the sp seen ++ // by the sender) because of current frame local variables ++ sender_sp = (intptr_t*) addr_at(sender_sp_offset); ++ sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset]; ++ saved_fp = (intptr_t*) this->fp()[link_offset]; ++ } else { ++ // must be some sort of compiled/runtime frame ++ // fp does not have to be safe (although it could be check for c1?) + -+ sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2); -+ bltz(t0, DOSHORT); ++ // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc ++ if (_cb->frame_size() <= 0) { ++ return false; ++ } + -+ (this->*needle_load_1chr)(first, Address(needle), noreg); -+ slli(t0, needle_len, needle_chr_shift); -+ add(needle, needle, t0); -+ neg(nlen_neg, t0); -+ slli(t0, result_tmp, haystack_chr_shift); -+ add(haystack, haystack, t0); -+ neg(hlen_neg, t0); ++ sender_sp = _unextended_sp + _cb->frame_size(); ++ // Is sender_sp safe? ++ if ((address)sender_sp >= thread->stack_base()) { ++ return false; ++ } + -+ bind(FIRST_LOOP); -+ add(t0, haystack, hlen_neg); -+ (this->*haystack_load_1chr)(ch2, Address(t0), noreg); -+ beq(first, ch2, STR1_LOOP); ++ sender_unextended_sp = sender_sp; ++ sender_pc = (address) *(sender_sp - 1); ++ saved_fp = (intptr_t*) *(sender_sp - 2); ++ } + -+ bind(STR2_NEXT); -+ add(hlen_neg, hlen_neg, haystack_chr_size); -+ blez(hlen_neg, FIRST_LOOP); -+ j(NOMATCH); + -+ bind(STR1_LOOP); -+ add(nlen_tmp, nlen_neg, needle_chr_size); -+ add(hlen_tmp, hlen_neg, haystack_chr_size); -+ bgez(nlen_tmp, MATCH); ++ // If the potential sender is the interpreter then we can do some more checking ++ if (Interpreter::contains(sender_pc)) { + -+ bind(STR1_NEXT); -+ add(ch1, needle, nlen_tmp); -+ (this->*needle_load_1chr)(ch1, Address(ch1), noreg); -+ add(ch2, haystack, hlen_tmp); -+ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); -+ bne(ch1, ch2, STR2_NEXT); -+ add(nlen_tmp, nlen_tmp, needle_chr_size); -+ add(hlen_tmp, hlen_tmp, haystack_chr_size); -+ bltz(nlen_tmp, STR1_NEXT); -+ j(MATCH); ++ // fp is always saved in a recognizable place in any code we generate. However ++ // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp ++ // is really a frame pointer. + -+ bind(DOSHORT); -+ if (needle_isL == haystack_isL) { -+ sub(t0, needle_len, 2); -+ bltz(t0, DO1); -+ bgtz(t0, DO3); ++ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); ++ ++ if (!saved_fp_safe) { ++ return false; ++ } ++ ++ // construct the potential sender ++ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); ++ ++ return sender.is_interpreted_frame_valid(thread); + } -+ } + -+ if (needle_con_cnt == 4) { -+ Label CH1_LOOP; -+ (this->*load_4chr)(ch1, Address(needle), noreg); -+ sub(result_tmp, haystack_len, 4); -+ slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp -+ add(haystack, haystack, tmp3); -+ neg(hlen_neg, tmp3); ++ // We must always be able to find a recognizable pc ++ CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc); ++ if (sender_pc == NULL || sender_blob == NULL) { ++ return false; ++ } + -+ bind(CH1_LOOP); -+ add(ch2, haystack, hlen_neg); -+ (this->*load_4chr)(ch2, Address(ch2), noreg); -+ beq(ch1, ch2, MATCH); -+ add(hlen_neg, hlen_neg, haystack_chr_size); -+ blez(hlen_neg, CH1_LOOP); -+ j(NOMATCH); -+ } ++ // Could be a zombie method ++ if (sender_blob->is_zombie() || sender_blob->is_unloaded()) { ++ return false; ++ } + -+ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) { -+ Label CH1_LOOP; -+ BLOCK_COMMENT("string_indexof DO2 {"); -+ bind(DO2); -+ (this->*load_2chr)(ch1, Address(needle), noreg); -+ if (needle_con_cnt == 2) { -+ sub(result_tmp, haystack_len, 2); ++ // Could just be some random pointer within the codeBlob ++ if (!sender_blob->code_contains(sender_pc)) { ++ return false; + } -+ slli(tmp3, result_tmp, haystack_chr_shift); -+ add(haystack, haystack, tmp3); -+ neg(hlen_neg, tmp3); + -+ bind(CH1_LOOP); -+ add(tmp3, haystack, hlen_neg); -+ (this->*load_2chr)(ch2, Address(tmp3), noreg); -+ beq(ch1, ch2, MATCH); -+ add(hlen_neg, hlen_neg, haystack_chr_size); -+ blez(hlen_neg, CH1_LOOP); -+ j(NOMATCH); -+ BLOCK_COMMENT("} string_indexof DO2"); -+ } ++ // We should never be able to see an adapter if the current frame is something from code cache ++ if (sender_blob->is_adapter_blob()) { ++ return false; ++ } + -+ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) { -+ Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; -+ BLOCK_COMMENT("string_indexof DO3 {"); ++ // Could be the call_stub ++ if (StubRoutines::returns_to_call_stub(sender_pc)) { ++ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); + -+ bind(DO3); -+ (this->*load_2chr)(first, Address(needle), noreg); -+ (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg); -+ if (needle_con_cnt == 3) { -+ sub(result_tmp, haystack_len, 3); ++ if (!saved_fp_safe) { ++ return false; ++ } ++ ++ // construct the potential sender ++ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); ++ ++ // Validate the JavaCallWrapper an entry frame must have ++ address jcw = (address)sender.entry_frame_call_wrapper(); ++ ++ bool jcw_safe = (jcw < thread->stack_base()) && (jcw > (address)sender.fp()); ++ ++ return jcw_safe; + } -+ slli(hlen_tmp, result_tmp, haystack_chr_shift); -+ add(haystack, haystack, hlen_tmp); -+ neg(hlen_neg, hlen_tmp); + -+ bind(FIRST_LOOP); -+ add(ch2, haystack, hlen_neg); -+ (this->*load_2chr)(ch2, Address(ch2), noreg); -+ beq(first, ch2, STR1_LOOP); ++ CompiledMethod* nm = sender_blob->as_compiled_method_or_null(); ++ if (nm != NULL) { ++ if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) || ++ nm->method()->is_method_handle_intrinsic()) { ++ return false; ++ } ++ } + -+ bind(STR2_NEXT); -+ add(hlen_neg, hlen_neg, haystack_chr_size); -+ blez(hlen_neg, FIRST_LOOP); -+ j(NOMATCH); ++ // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size ++ // because the return address counts against the callee's frame. ++ if (sender_blob->frame_size() <= 0) { ++ assert(!sender_blob->is_compiled(), "should count return address at least"); ++ return false; ++ } + -+ bind(STR1_LOOP); -+ add(hlen_tmp, hlen_neg, 2 * haystack_chr_size); -+ add(ch2, haystack, hlen_tmp); -+ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); -+ bne(ch1, ch2, STR2_NEXT); -+ j(MATCH); -+ BLOCK_COMMENT("} string_indexof DO3"); ++ // We should never be able to see anything here except an nmethod. If something in the ++ // code cache (current frame) is called by an entity within the code cache that entity ++ // should not be anything but the call stub (already covered), the interpreter (already covered) ++ // or an nmethod. ++ if (!sender_blob->is_compiled()) { ++ return false; ++ } ++ ++ // Could put some more validation for the potential non-interpreted sender ++ // frame we'd create by calling sender if I could think of any. Wait for next crash in forte... ++ ++ // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb ++ ++ // We've validated the potential sender that would be created ++ return true; + } + -+ if (needle_con_cnt == -1 || needle_con_cnt == 1) { -+ Label DO1_LOOP; ++ // Must be native-compiled frame. Since sender will try and use fp to find ++ // linkages it must be safe ++ if (!fp_safe) { ++ return false; ++ } + -+ BLOCK_COMMENT("string_indexof DO1 {"); -+ bind(DO1); -+ (this->*needle_load_1chr)(ch1, Address(needle), noreg); -+ sub(result_tmp, haystack_len, 1); -+ mv(tmp3, result_tmp); -+ if (haystack_chr_shift) { -+ slli(tmp3, result_tmp, haystack_chr_shift); -+ } -+ add(haystack, haystack, tmp3); -+ neg(hlen_neg, tmp3); ++ // Will the pc we fetch be non-zero (which we'll find at the oldest frame) ++ if ((address)this->fp()[return_addr_offset] == NULL) { return false; } + -+ bind(DO1_LOOP); -+ add(tmp3, haystack, hlen_neg); -+ (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg); -+ beq(ch1, ch2, MATCH); -+ add(hlen_neg, hlen_neg, haystack_chr_size); -+ blez(hlen_neg, DO1_LOOP); -+ BLOCK_COMMENT("} string_indexof DO1"); ++ return true; ++} ++ ++void frame::patch_pc(Thread* thread, address pc) { ++ address* pc_addr = &(((address*) sp())[-1]); ++ if (TracePcPatching) { ++ tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", ++ p2i(pc_addr), p2i(*pc_addr), p2i(pc)); ++ } ++ // Either the return address is the original one or we are going to ++ // patch in the same address that's already there. ++ assert(_pc == *pc_addr || pc == *pc_addr, "must be"); ++ *pc_addr = pc; ++ _cb = CodeCache::find_blob(pc); ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ assert(original_pc == _pc, "expected original PC to be stored before patching"); ++ _deopt_state = is_deoptimized; ++ // leave _pc as is ++ } else { ++ _deopt_state = not_deoptimized; ++ _pc = pc; + } ++} + -+ bind(NOMATCH); -+ mv(result, -1); -+ j(DONE); ++bool frame::is_interpreted_frame() const { ++ return Interpreter::contains(pc()); ++} + -+ bind(MATCH); -+ srai(t0, hlen_neg, haystack_chr_shift); -+ add(result, result_tmp, t0); ++int frame::frame_size(RegisterMap* map) const { ++ frame sender = this->sender(map); ++ return sender.sp() - sp(); ++} + -+ bind(DONE); ++intptr_t* frame::entry_frame_argument_at(int offset) const { ++ // convert offset to index to deal with tsi ++ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); ++ // Entry frame's arguments are always in relation to unextended_sp() ++ return &unextended_sp()[index]; +} + -+// Compare strings. -+void C2_MacroAssembler::string_compare(Register str1, Register str2, -+ Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, -+ Register tmp3, int ae) -+{ -+ Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB, -+ DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT, -+ SHORT_LOOP_START, TAIL_CHECK, L; ++// sender_sp ++intptr_t* frame::interpreter_frame_sender_sp() const { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ return (intptr_t*) at(interpreter_frame_sender_sp_offset); ++} + -+ const int STUB_THRESHOLD = 64 + 8; -+ bool isLL = ae == StrIntrinsicNode::LL; -+ bool isLU = ae == StrIntrinsicNode::LU; -+ bool isUL = ae == StrIntrinsicNode::UL; ++void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ ptr_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp); ++} + -+ bool str1_isL = isLL || isLU; -+ bool str2_isL = isLL || isUL; + -+ // for L strings, 1 byte for 1 character -+ // for U strings, 2 bytes for 1 character -+ int str1_chr_size = str1_isL ? 1 : 2; -+ int str2_chr_size = str2_isL ? 1 : 2; -+ int minCharsInWord = isLL ? wordSize : wordSize / 2; ++// monitor elements + -+ load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; -+ load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; ++BasicObjectLock* frame::interpreter_frame_monitor_begin() const { ++ return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset); ++} + -+ BLOCK_COMMENT("string_compare {"); ++BasicObjectLock* frame::interpreter_frame_monitor_end() const { ++ BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset); ++ // make sure the pointer points inside the frame ++ assert(sp() <= (intptr_t*) result, "monitor end should be above the stack pointer"); ++ assert((intptr_t*) result < fp(), "monitor end should be strictly below the frame pointer"); ++ return result; ++} + -+ // Bizzarely, the counts are passed in bytes, regardless of whether they -+ // are L or U strings, however the result is always in characters. -+ if (!str1_isL) { -+ sraiw(cnt1, cnt1, 1); -+ } -+ if (!str2_isL) { -+ sraiw(cnt2, cnt2, 1); ++void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) { ++ *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value; ++} ++ ++// Used by template based interpreter deoptimization ++void frame::interpreter_frame_set_last_sp(intptr_t* last_sp) { ++ *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = last_sp; ++} ++ ++frame frame::sender_for_entry_frame(RegisterMap* map) const { ++ assert(map != NULL, "map must be set"); ++ // Java frame called from C; skip all C frames and return top C ++ // frame of that chunk as the sender ++ JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor(); ++ assert(!entry_frame_is_first(), "next Java fp must be non zero"); ++ assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack"); ++ // Since we are walking the stack now this nested anchor is obviously walkable ++ // even if it wasn't when it was stacked. ++ if (!jfa->walkable()) { ++ // Capture _last_Java_pc (if needed) and mark anchor walkable. ++ jfa->capture_last_Java_pc(); + } ++ map->clear(); ++ assert(map->include_argument_oops(), "should be set by clear"); ++ vmassert(jfa->last_Java_pc() != NULL, "not walkable"); ++ frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc()); ++ return fr; ++} + -+ // Compute the minimum of the string lengths and save the difference in result. -+ sub(result, cnt1, cnt2); -+ bgt(cnt1, cnt2, L); -+ mv(cnt2, cnt1); -+ bind(L); ++//------------------------------------------------------------------------------ ++// frame::verify_deopt_original_pc ++// ++// Verifies the calculated original PC of a deoptimization PC for the ++// given unextended SP. ++#ifdef ASSERT ++void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) { ++ frame fr; + -+ // A very short string -+ li(t0, minCharsInWord); -+ ble(cnt2, t0, SHORT_STRING); ++ // This is ugly but it's better than to change {get,set}_original_pc ++ // to take an SP value as argument. And it's only a debugging ++ // method anyway. ++ fr._unextended_sp = unextended_sp; + -+ // Compare longwords -+ // load first parts of strings and finish initialization while loading -+ { -+ if (str1_isL == str2_isL) { // LL or UU -+ // load 8 bytes once to compare -+ ld(tmp1, Address(str1)); -+ beq(str1, str2, DONE); -+ ld(tmp2, Address(str2)); -+ li(t0, STUB_THRESHOLD); -+ bge(cnt2, t0, STUB); -+ sub(cnt2, cnt2, minCharsInWord); -+ beqz(cnt2, TAIL_CHECK); -+ // convert cnt2 from characters to bytes -+ if (!str1_isL) { -+ slli(cnt2, cnt2, 1); ++ assert_cond(nm != NULL); ++ address original_pc = nm->get_original_pc(&fr); ++ assert(nm->insts_contains_inclusive(original_pc), ++ "original PC must be in the main code section of the the compiled method (or must be immediately following it)"); ++} ++#endif ++ ++//------------------------------------------------------------------------------ ++// frame::adjust_unextended_sp ++void frame::adjust_unextended_sp() { ++ // On riscv, sites calling method handle intrinsics and lambda forms are treated ++ // as any other call site. Therefore, no special action is needed when we are ++ // returning to any of these call sites. ++ ++ if (_cb != NULL) { ++ CompiledMethod* sender_cm = _cb->as_compiled_method_or_null(); ++ if (sender_cm != NULL) { ++ // If the sender PC is a deoptimization point, get the original PC. ++ if (sender_cm->is_deopt_entry(_pc) || ++ sender_cm->is_deopt_mh_entry(_pc)) { ++ DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp)); + } -+ add(str2, str2, cnt2); -+ add(str1, str1, cnt2); -+ sub(cnt2, zr, cnt2); -+ } else if (isLU) { // LU case -+ lwu(tmp1, Address(str1)); -+ ld(tmp2, Address(str2)); -+ li(t0, STUB_THRESHOLD); -+ bge(cnt2, t0, STUB); -+ addi(cnt2, cnt2, -4); -+ add(str1, str1, cnt2); -+ sub(cnt1, zr, cnt2); -+ slli(cnt2, cnt2, 1); -+ add(str2, str2, cnt2); -+ inflate_lo32(tmp3, tmp1); -+ mv(tmp1, tmp3); -+ sub(cnt2, zr, cnt2); -+ addi(cnt1, cnt1, 4); -+ } else { // UL case -+ ld(tmp1, Address(str1)); -+ lwu(tmp2, Address(str2)); -+ li(t0, STUB_THRESHOLD); -+ bge(cnt2, t0, STUB); -+ addi(cnt2, cnt2, -4); -+ slli(t0, cnt2, 1); -+ sub(cnt1, zr, t0); -+ add(str1, str1, t0); -+ add(str2, str2, cnt2); -+ inflate_lo32(tmp3, tmp2); -+ mv(tmp2, tmp3); -+ sub(cnt2, zr, cnt2); -+ addi(cnt1, cnt1, 8); -+ } -+ addi(cnt2, cnt2, isUL ? 4 : 8); -+ bgez(cnt2, TAIL); -+ xorr(tmp3, tmp1, tmp2); -+ bnez(tmp3, DIFFERENCE); -+ -+ // main loop -+ bind(NEXT_WORD); -+ if (str1_isL == str2_isL) { // LL or UU -+ add(t0, str1, cnt2); -+ ld(tmp1, Address(t0)); -+ add(t0, str2, cnt2); -+ ld(tmp2, Address(t0)); -+ addi(cnt2, cnt2, 8); -+ } else if (isLU) { // LU case -+ add(t0, str1, cnt1); -+ lwu(tmp1, Address(t0)); -+ add(t0, str2, cnt2); -+ ld(tmp2, Address(t0)); -+ addi(cnt1, cnt1, 4); -+ inflate_lo32(tmp3, tmp1); -+ mv(tmp1, tmp3); -+ addi(cnt2, cnt2, 8); -+ } else { // UL case -+ add(t0, str2, cnt2); -+ lwu(tmp2, Address(t0)); -+ add(t0, str1, cnt1); -+ ld(tmp1, Address(t0)); -+ inflate_lo32(tmp3, tmp2); -+ mv(tmp2, tmp3); -+ addi(cnt1, cnt1, 8); -+ addi(cnt2, cnt2, 4); -+ } -+ bgez(cnt2, TAIL); -+ -+ xorr(tmp3, tmp1, tmp2); -+ beqz(tmp3, NEXT_WORD); -+ j(DIFFERENCE); -+ bind(TAIL); -+ xorr(tmp3, tmp1, tmp2); -+ bnez(tmp3, DIFFERENCE); -+ // Last longword. In the case where length == 4 we compare the -+ // same longword twice, but that's still faster than another -+ // conditional branch. -+ if (str1_isL == str2_isL) { // LL or UU -+ ld(tmp1, Address(str1)); -+ ld(tmp2, Address(str2)); -+ } else if (isLU) { // LU case -+ lwu(tmp1, Address(str1)); -+ ld(tmp2, Address(str2)); -+ inflate_lo32(tmp3, tmp1); -+ mv(tmp1, tmp3); -+ } else { // UL case -+ lwu(tmp2, Address(str2)); -+ ld(tmp1, Address(str1)); -+ inflate_lo32(tmp3, tmp2); -+ mv(tmp2, tmp3); -+ } -+ bind(TAIL_CHECK); -+ xorr(tmp3, tmp1, tmp2); -+ beqz(tmp3, DONE); -+ -+ // Find the first different characters in the longwords and -+ // compute their difference. -+ bind(DIFFERENCE); -+ ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb -+ srl(tmp1, tmp1, result); -+ srl(tmp2, tmp2, result); -+ if (isLL) { -+ andi(tmp1, tmp1, 0xFF); -+ andi(tmp2, tmp2, 0xFF); -+ } else { -+ andi(tmp1, tmp1, 0xFFFF); -+ andi(tmp2, tmp2, 0xFFFF); + } -+ sub(result, tmp1, tmp2); -+ j(DONE); -+ } -+ -+ bind(STUB); -+ RuntimeAddress stub = NULL; -+ switch (ae) { -+ case StrIntrinsicNode::LL: -+ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL()); -+ break; -+ case StrIntrinsicNode::UU: -+ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU()); -+ break; -+ case StrIntrinsicNode::LU: -+ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU()); -+ break; -+ case StrIntrinsicNode::UL: -+ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL()); -+ break; -+ default: -+ ShouldNotReachHere(); + } -+ assert(stub.target() != NULL, "compare_long_string stub has not been generated"); -+ trampoline_call(stub); -+ j(DONE); -+ -+ bind(SHORT_STRING); -+ // Is the minimum length zero? -+ beqz(cnt2, DONE); -+ // arrange code to do most branches while loading and loading next characters -+ // while comparing previous -+ (this->*str1_load_chr)(tmp1, Address(str1), t0); -+ addi(str1, str1, str1_chr_size); -+ addi(cnt2, cnt2, -1); -+ beqz(cnt2, SHORT_LAST_INIT); -+ (this->*str2_load_chr)(cnt1, Address(str2), t0); -+ addi(str2, str2, str2_chr_size); -+ j(SHORT_LOOP_START); -+ bind(SHORT_LOOP); -+ addi(cnt2, cnt2, -1); -+ beqz(cnt2, SHORT_LAST); -+ bind(SHORT_LOOP_START); -+ (this->*str1_load_chr)(tmp2, Address(str1), t0); -+ addi(str1, str1, str1_chr_size); -+ (this->*str2_load_chr)(t0, Address(str2), t0); -+ addi(str2, str2, str2_chr_size); -+ bne(tmp1, cnt1, SHORT_LOOP_TAIL); -+ addi(cnt2, cnt2, -1); -+ beqz(cnt2, SHORT_LAST2); -+ (this->*str1_load_chr)(tmp1, Address(str1), t0); -+ addi(str1, str1, str1_chr_size); -+ (this->*str2_load_chr)(cnt1, Address(str2), t0); -+ addi(str2, str2, str2_chr_size); -+ beq(tmp2, t0, SHORT_LOOP); -+ sub(result, tmp2, t0); -+ j(DONE); -+ bind(SHORT_LOOP_TAIL); -+ sub(result, tmp1, cnt1); -+ j(DONE); -+ bind(SHORT_LAST2); -+ beq(tmp2, t0, DONE); -+ sub(result, tmp2, t0); -+ -+ j(DONE); -+ bind(SHORT_LAST_INIT); -+ (this->*str2_load_chr)(cnt1, Address(str2), t0); -+ addi(str2, str2, str2_chr_size); -+ bind(SHORT_LAST); -+ beq(tmp1, cnt1, DONE); -+ sub(result, tmp1, cnt1); ++} + -+ bind(DONE); ++//------------------------------------------------------------------------------ ++// frame::update_map_with_saved_link ++void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) { ++ // The interpreter and compiler(s) always save fp in a known ++ // location on entry. We must record where that location is ++ // so that if fp was live on callout from c2 we can find ++ // the saved copy no matter what it called. + -+ BLOCK_COMMENT("} string_compare"); ++ // Since the interpreter always saves fp if we record where it is then ++ // we don't have to always save fp on entry and exit to c2 compiled ++ // code, on entry will be enough. ++ assert(map != NULL, "map must be set"); ++ map->set_location(::fp->as_VMReg(), (address) link_addr); ++ // this is weird "H" ought to be at a higher address however the ++ // oopMaps seems to have the "H" regs at the same address and the ++ // vanilla register. ++ map->set_location(::fp->as_VMReg()->next(), (address) link_addr); +} + -+void C2_MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3, -+ Register tmp4, Register tmp5, Register tmp6, Register result, -+ Register cnt1, int elem_size) { -+ Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR; -+ Register tmp1 = t0; -+ Register tmp2 = t1; -+ Register cnt2 = tmp2; // cnt2 only used in array length compare -+ Register elem_per_word = tmp6; -+ int log_elem_size = exact_log2(elem_size); -+ int length_offset = arrayOopDesc::length_offset_in_bytes(); -+ int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); + -+ assert(elem_size == 1 || elem_size == 2, "must be char or byte"); -+ assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6); -+ li(elem_per_word, wordSize / elem_size); ++//------------------------------------------------------------------------------ ++// frame::sender_for_interpreter_frame ++frame frame::sender_for_interpreter_frame(RegisterMap* map) const { ++ // SP is the raw SP from the sender after adapter or interpreter ++ // extension. ++ intptr_t* sender_sp = this->sender_sp(); + -+ BLOCK_COMMENT("arrays_equals {"); ++ // This is the sp before any possible extension (adapter/locals). ++ intptr_t* unextended_sp = interpreter_frame_sender_sp(); + -+ // if (a1 == a2), return true -+ beq(a1, a2, SAME); ++#ifdef COMPILER2 ++ assert(map != NULL, "map must be set"); ++ if (map->update_map()) { ++ update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset)); ++ } ++#endif // COMPILER2 + -+ mv(result, false); -+ beqz(a1, DONE); -+ beqz(a2, DONE); -+ lwu(cnt1, Address(a1, length_offset)); -+ lwu(cnt2, Address(a2, length_offset)); -+ bne(cnt2, cnt1, DONE); -+ beqz(cnt1, SAME); ++ return frame(sender_sp, unextended_sp, link(), sender_pc()); ++} + -+ slli(tmp5, cnt1, 3 + log_elem_size); -+ sub(tmp5, zr, tmp5); -+ add(a1, a1, base_offset); -+ add(a2, a2, base_offset); -+ ld(tmp3, Address(a1, 0)); -+ ld(tmp4, Address(a2, 0)); -+ ble(cnt1, elem_per_word, SHORT); // short or same + -+ // Main 16 byte comparison loop with 2 exits -+ bind(NEXT_DWORD); { -+ ld(tmp1, Address(a1, wordSize)); -+ ld(tmp2, Address(a2, wordSize)); -+ sub(cnt1, cnt1, 2 * wordSize / elem_size); -+ blez(cnt1, TAIL); -+ bne(tmp3, tmp4, DONE); -+ ld(tmp3, Address(a1, 2 * wordSize)); -+ ld(tmp4, Address(a2, 2 * wordSize)); -+ add(a1, a1, 2 * wordSize); -+ add(a2, a2, 2 * wordSize); -+ ble(cnt1, elem_per_word, TAIL2); -+ } beq(tmp1, tmp2, NEXT_DWORD); -+ j(DONE); ++//------------------------------------------------------------------------------ ++// frame::sender_for_compiled_frame ++frame frame::sender_for_compiled_frame(RegisterMap* map) const { ++ // we cannot rely upon the last fp having been saved to the thread ++ // in C2 code but it will have been pushed onto the stack. so we ++ // have to find it relative to the unextended sp + -+ bind(TAIL); -+ xorr(tmp4, tmp3, tmp4); -+ xorr(tmp2, tmp1, tmp2); -+ sll(tmp2, tmp2, tmp5); -+ orr(tmp5, tmp4, tmp2); -+ j(IS_TMP5_ZR); ++ assert(_cb->frame_size() >= 0, "must have non-zero frame size"); ++ intptr_t* l_sender_sp = unextended_sp() + _cb->frame_size(); ++ intptr_t* unextended_sp = l_sender_sp; + -+ bind(TAIL2); -+ bne(tmp1, tmp2, DONE); ++ // the return_address is always the word on the stack ++ address sender_pc = (address) *(l_sender_sp + frame::return_addr_offset); + -+ bind(SHORT); -+ xorr(tmp4, tmp3, tmp4); -+ sll(tmp5, tmp4, tmp5); ++ intptr_t** saved_fp_addr = (intptr_t**) (l_sender_sp + frame::link_offset); + -+ bind(IS_TMP5_ZR); -+ bnez(tmp5, DONE); ++ assert(map != NULL, "map must be set"); ++ if (map->update_map()) { ++ // Tell GC to use argument oopmaps for some runtime stubs that need it. ++ // For C1, the runtime stub might not have oop maps, so set this flag ++ // outside of update_register_map. ++ map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); ++ if (_cb->oop_maps() != NULL) { ++ OopMapSet::update_register_map(this, map); ++ } + -+ bind(SAME); -+ mv(result, true); -+ // That's it. -+ bind(DONE); ++ // Since the prolog does the save and restore of FP there is no ++ // oopmap for it so we must fill in its location as if there was ++ // an oopmap entry since if our caller was compiled code there ++ // could be live jvm state in it. ++ update_map_with_saved_link(map, saved_fp_addr); ++ } + -+ BLOCK_COMMENT("} array_equals"); ++ return frame(l_sender_sp, unextended_sp, *saved_fp_addr, sender_pc); +} + -+// Compare Strings ++//------------------------------------------------------------------------------ ++// frame::sender ++frame frame::sender(RegisterMap* map) const { ++ // Default is we done have to follow them. The sender_for_xxx will ++ // update it accordingly ++ assert(map != NULL, "map must be set"); ++ map->set_include_argument_oops(false); + -+// For Strings we're passed the address of the first characters in a1 -+// and a2 and the length in cnt1. -+// elem_size is the element size in bytes: either 1 or 2. -+// There are two implementations. For arrays >= 8 bytes, all -+// comparisons (including the final one, which may overlap) are -+// performed 8 bytes at a time. For strings < 8 bytes, we compare a -+// halfword, then a short, and then a byte. ++ if (is_entry_frame()) { ++ return sender_for_entry_frame(map); ++ } ++ if (is_interpreted_frame()) { ++ return sender_for_interpreter_frame(map); ++ } ++ assert(_cb == CodeCache::find_blob(pc()),"Must be the same"); + -+void C2_MacroAssembler::string_equals(Register a1, Register a2, -+ Register result, Register cnt1, int elem_size) -+{ -+ Label SAME, DONE, SHORT, NEXT_WORD; -+ Register tmp1 = t0; -+ Register tmp2 = t1; ++ // This test looks odd: why is it not is_compiled_frame() ? That's ++ // because stubs also have OOP maps. ++ if (_cb != NULL) { ++ return sender_for_compiled_frame(map); ++ } + -+ assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte"); -+ assert_different_registers(a1, a2, result, cnt1, t0, t1); ++ // Must be native-compiled frame, i.e. the marshaling code for native ++ // methods that exists in the core system. ++ return frame(sender_sp(), link(), sender_pc()); ++} + -+ BLOCK_COMMENT("string_equals {"); ++bool frame::is_interpreted_frame_valid(JavaThread* thread) const { ++ assert(is_interpreted_frame(), "Not an interpreted frame"); ++ // These are reasonable sanity checks ++ if (fp() == NULL || (intptr_t(fp()) & (wordSize-1)) != 0) { ++ return false; ++ } ++ if (sp() == NULL || (intptr_t(sp()) & (wordSize-1)) != 0) { ++ return false; ++ } ++ if (fp() + interpreter_frame_initial_sp_offset < sp()) { ++ return false; ++ } ++ // These are hacks to keep us out of trouble. ++ // The problem with these is that they mask other problems ++ if (fp() <= sp()) { // this attempts to deal with unsigned comparison above ++ return false; ++ } + -+ mv(result, false); ++ // do some validation of frame elements + -+ // Check for short strings, i.e. smaller than wordSize. -+ sub(cnt1, cnt1, wordSize); -+ bltz(cnt1, SHORT); ++ // first the method ++ Method* m = *interpreter_frame_method_addr(); ++ // validate the method we'd find in this potential sender ++ if (!Method::is_valid_method(m)) { ++ return false; ++ } + -+ // Main 8 byte comparison loop. -+ bind(NEXT_WORD); { -+ ld(tmp1, Address(a1, 0)); -+ add(a1, a1, wordSize); -+ ld(tmp2, Address(a2, 0)); -+ add(a2, a2, wordSize); -+ sub(cnt1, cnt1, wordSize); -+ bne(tmp1, tmp2, DONE); -+ } bgtz(cnt1, NEXT_WORD); ++ // stack frames shouldn't be much larger than max_stack elements ++ // this test requires the use of unextended_sp which is the sp as seen by ++ // the current frame, and not sp which is the "raw" pc which could point ++ // further because of local variables of the callee method inserted after ++ // method arguments ++ if (fp() - unextended_sp() > 1024 + m->max_stack()*Interpreter::stackElementSize) { ++ return false; ++ } + -+ // Last longword. In the case where length == 4 we compare the -+ // same longword twice, but that's still faster than another -+ // conditional branch. -+ // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when -+ // length == 4. -+ add(tmp1, a1, cnt1); -+ ld(tmp1, Address(tmp1, 0)); -+ add(tmp2, a2, cnt1); -+ ld(tmp2, Address(tmp2, 0)); -+ bne(tmp1, tmp2, DONE); -+ j(SAME); ++ // validate bci/bcx ++ address bcp = interpreter_frame_bcp(); ++ if (m->validate_bci_from_bcp(bcp) < 0) { ++ return false; ++ } + -+ bind(SHORT); -+ Label TAIL03, TAIL01; ++ // validate constantPoolCache* ++ ConstantPoolCache* cp = *interpreter_frame_cache_addr(); ++ if (MetaspaceObj::is_valid(cp) == false) { ++ return false; ++ } + -+ // 0-7 bytes left. -+ andi(t0, cnt1, 4); -+ beqz(t0, TAIL03); -+ { -+ lwu(tmp1, Address(a1, 0)); -+ add(a1, a1, 4); -+ lwu(tmp2, Address(a2, 0)); -+ add(a2, a2, 4); -+ bne(tmp1, tmp2, DONE); ++ // validate locals ++ address locals = (address) *interpreter_frame_locals_addr(); ++ if (locals > thread->stack_base()) { ++ return false; + } + -+ bind(TAIL03); -+ // 0-3 bytes left. -+ andi(t0, cnt1, 2); -+ beqz(t0, TAIL01); -+ { -+ lhu(tmp1, Address(a1, 0)); -+ add(a1, a1, 2); -+ lhu(tmp2, Address(a2, 0)); -+ add(a2, a2, 2); -+ bne(tmp1, tmp2, DONE); ++ if (m->max_locals() > 0 && locals < (address) fp()) { ++ // fp in interpreter frame on RISC-V is higher than that on AArch64, ++ // pointing to sender_sp and sender_sp-2 relatively. ++ // On RISC-V, if max_locals is 0, the 'locals' pointer may be below fp, ++ // pointing to sender_sp-1 (with one padding slot). ++ // So we verify the 'locals' pointer only if max_locals > 0. ++ return false; + } + -+ bind(TAIL01); -+ if (elem_size == 1) { // Only needed when comparing 1-byte elements -+ // 0-1 bytes left. -+ andi(t0, cnt1, 1); -+ beqz(t0, SAME); -+ { -+ lbu(tmp1, a1, 0); -+ lbu(tmp2, a2, 0); -+ bne(tmp1, tmp2, DONE); ++ // We'd have to be pretty unlucky to be mislead at this point ++ return true; ++} ++ ++BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ Method* method = interpreter_frame_method(); ++ BasicType type = method->result_type(); ++ ++ intptr_t* tos_addr = NULL; ++ if (method->is_native()) { ++ tos_addr = (intptr_t*)sp(); ++ if (type == T_FLOAT || type == T_DOUBLE) { ++ // This is because we do a push(ltos) after push(dtos) in generate_native_entry. ++ tos_addr += 2 * Interpreter::stackElementWords; + } ++ } else { ++ tos_addr = (intptr_t*)interpreter_frame_tos_address(); + } + -+ // Arrays are equal. -+ bind(SAME); -+ mv(result, true); ++ switch (type) { ++ case T_OBJECT : ++ case T_ARRAY : { ++ oop obj; ++ if (method->is_native()) { ++ obj = cast_to_oop(at(interpreter_frame_oop_temp_offset)); ++ } else { ++ oop* obj_p = (oop*)tos_addr; ++ obj = (obj_p == NULL) ? (oop)NULL : *obj_p; ++ } ++ assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check"); ++ *oop_result = obj; ++ break; ++ } ++ case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break; ++ case T_BYTE : value_result->b = *(jbyte*)tos_addr; break; ++ case T_CHAR : value_result->c = *(jchar*)tos_addr; break; ++ case T_SHORT : value_result->s = *(jshort*)tos_addr; break; ++ case T_INT : value_result->i = *(jint*)tos_addr; break; ++ case T_LONG : value_result->j = *(jlong*)tos_addr; break; ++ case T_FLOAT : { ++ value_result->f = *(jfloat*)tos_addr; ++ break; ++ } ++ case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break; ++ case T_VOID : /* Nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } + -+ // That's it. -+ bind(DONE); -+ BLOCK_COMMENT("} string_equals"); ++ return type; +} + -+typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far); -+typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label, -+ bool is_far, bool is_unordered); + -+static conditional_branch_insn conditional_branches[] = -+{ -+ /* SHORT branches */ -+ (conditional_branch_insn)&Assembler::beq, -+ (conditional_branch_insn)&Assembler::bgt, -+ NULL, // BoolTest::overflow -+ (conditional_branch_insn)&Assembler::blt, -+ (conditional_branch_insn)&Assembler::bne, -+ (conditional_branch_insn)&Assembler::ble, -+ NULL, // BoolTest::no_overflow -+ (conditional_branch_insn)&Assembler::bge, ++intptr_t* frame::interpreter_frame_tos_at(jint offset) const { ++ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); ++ return &interpreter_frame_tos_address()[index]; ++} + -+ /* UNSIGNED branches */ -+ (conditional_branch_insn)&Assembler::beq, -+ (conditional_branch_insn)&Assembler::bgtu, -+ NULL, -+ (conditional_branch_insn)&Assembler::bltu, -+ (conditional_branch_insn)&Assembler::bne, -+ (conditional_branch_insn)&Assembler::bleu, -+ NULL, -+ (conditional_branch_insn)&Assembler::bgeu -+}; ++#ifndef PRODUCT + -+static float_conditional_branch_insn float_conditional_branches[] = -+{ -+ /* FLOAT SHORT branches */ -+ (float_conditional_branch_insn)&MacroAssembler::float_beq, -+ (float_conditional_branch_insn)&MacroAssembler::float_bgt, -+ NULL, // BoolTest::overflow -+ (float_conditional_branch_insn)&MacroAssembler::float_blt, -+ (float_conditional_branch_insn)&MacroAssembler::float_bne, -+ (float_conditional_branch_insn)&MacroAssembler::float_ble, -+ NULL, // BoolTest::no_overflow -+ (float_conditional_branch_insn)&MacroAssembler::float_bge, -+ -+ /* DOUBLE SHORT branches */ -+ (float_conditional_branch_insn)&MacroAssembler::double_beq, -+ (float_conditional_branch_insn)&MacroAssembler::double_bgt, -+ NULL, -+ (float_conditional_branch_insn)&MacroAssembler::double_blt, -+ (float_conditional_branch_insn)&MacroAssembler::double_bne, -+ (float_conditional_branch_insn)&MacroAssembler::double_ble, -+ NULL, -+ (float_conditional_branch_insn)&MacroAssembler::double_bge -+}; -+ -+void C2_MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) { -+ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])), -+ "invalid conditional branch index"); -+ (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far); -+} -+ -+// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use -+// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode(). -+void C2_MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) { -+ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])), -+ "invalid float conditional branch index"); -+ int booltest_flag = cmpFlag & ~(C2_MacroAssembler::double_branch_mask); -+ (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far, -+ (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true); -+} -+ -+void C2_MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { -+ switch (cmpFlag) { -+ case BoolTest::eq: -+ case BoolTest::le: -+ beqz(op1, L, is_far); -+ break; -+ case BoolTest::ne: -+ case BoolTest::gt: -+ bnez(op1, L, is_far); -+ break; -+ default: -+ ShouldNotReachHere(); -+ } -+} -+ -+void C2_MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { -+ switch (cmpFlag) { -+ case BoolTest::eq: -+ beqz(op1, L, is_far); -+ break; -+ case BoolTest::ne: -+ bnez(op1, L, is_far); -+ break; -+ default: -+ ShouldNotReachHere(); -+ } -+} -+ -+void C2_MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) { -+ Label L; -+ cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L); -+ mv(dst, src); -+ bind(L); -+} -+ -+// Set dst to NaN if any NaN input. -+void C2_MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2, -+ bool is_double, bool is_min) { -+ assert_different_registers(dst, src1, src2); -+ -+ Label Done; -+ fsflags(zr); -+ if (is_double) { -+ is_min ? fmin_d(dst, src1, src2) -+ : fmax_d(dst, src1, src2); -+ // Checking NaNs -+ flt_d(zr, src1, src2); -+ } else { -+ is_min ? fmin_s(dst, src1, src2) -+ : fmax_s(dst, src1, src2); -+ // Checking NaNs -+ flt_s(zr, src1, src2); -+ } -+ -+ frflags(t0); -+ beqz(t0, Done); -+ -+ // In case of NaNs -+ is_double ? fadd_d(dst, src1, src2) -+ : fadd_s(dst, src1, src2); -+ -+ bind(Done); -+} -+ -+void C2_MacroAssembler::element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2, -+ VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE) { -+ Label loop; -+ Assembler::SEW sew = islatin ? Assembler::e8 : Assembler::e16; -+ -+ bind(loop); -+ vsetvli(tmp1, cnt, sew, Assembler::m2); -+ vlex_v(vr1, a1, sew); -+ vlex_v(vr2, a2, sew); -+ vmsne_vv(vrs, vr1, vr2); -+ vfirst_m(tmp2, vrs); -+ bgez(tmp2, DONE); -+ sub(cnt, cnt, tmp1); -+ if (!islatin) { -+ slli(tmp1, tmp1, 1); // get byte counts -+ } -+ add(a1, a1, tmp1); -+ add(a2, a2, tmp1); -+ bnez(cnt, loop); -+ -+ mv(result, true); -+} -+ -+void C2_MacroAssembler::string_equals_v(Register a1, Register a2, Register result, Register cnt, int elem_size) { -+ Label DONE; -+ Register tmp1 = t0; -+ Register tmp2 = t1; -+ -+ BLOCK_COMMENT("string_equals_v {"); -+ -+ mv(result, false); ++#define DESCRIBE_FP_OFFSET(name) \ ++ values.describe(frame_no, fp() + frame::name##_offset, #name) + -+ if (elem_size == 2) { -+ srli(cnt, cnt, 1); ++void frame::describe_pd(FrameValues& values, int frame_no) { ++ if (is_interpreted_frame()) { ++ DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_last_sp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_method); ++ DESCRIBE_FP_OFFSET(interpreter_frame_mdp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_mirror); ++ DESCRIBE_FP_OFFSET(interpreter_frame_cache); ++ DESCRIBE_FP_OFFSET(interpreter_frame_locals); ++ DESCRIBE_FP_OFFSET(interpreter_frame_bcp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp); + } -+ -+ element_compare(a1, a2, result, cnt, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE); -+ -+ bind(DONE); -+ BLOCK_COMMENT("} string_equals_v"); -+} -+ -+// used by C2 ClearArray patterns. -+// base: Address of a buffer to be zeroed -+// cnt: Count in HeapWords -+// -+// base, cnt, v0, v1 and t0 are clobbered. -+void C2_MacroAssembler::clear_array_v(Register base, Register cnt) { -+ Label loop; -+ -+ // making zero words -+ vsetvli(t0, cnt, Assembler::e64, Assembler::m4); -+ vxor_vv(v0, v0, v0); -+ -+ bind(loop); -+ vsetvli(t0, cnt, Assembler::e64, Assembler::m4); -+ vse64_v(v0, base); -+ sub(cnt, cnt, t0); -+ shadd(base, t0, base, t0, 3); -+ bnez(cnt, loop); +} ++#endif + -+void C2_MacroAssembler::arrays_equals_v(Register a1, Register a2, Register result, -+ Register cnt1, int elem_size) { -+ Label DONE; -+ Register tmp1 = t0; -+ Register tmp2 = t1; -+ Register cnt2 = tmp2; -+ int length_offset = arrayOopDesc::length_offset_in_bytes(); -+ int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); -+ -+ BLOCK_COMMENT("arrays_equals_v {"); -+ -+ // if (a1 == a2), return true -+ mv(result, true); -+ beq(a1, a2, DONE); -+ -+ mv(result, false); -+ // if a1 == null or a2 == null, return false -+ beqz(a1, DONE); -+ beqz(a2, DONE); -+ // if (a1.length != a2.length), return false -+ lwu(cnt1, Address(a1, length_offset)); -+ lwu(cnt2, Address(a2, length_offset)); -+ bne(cnt1, cnt2, DONE); -+ -+ la(a1, Address(a1, base_offset)); -+ la(a2, Address(a2, base_offset)); -+ -+ element_compare(a1, a2, result, cnt1, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE); -+ -+ bind(DONE); -+ -+ BLOCK_COMMENT("} arrays_equals_v"); ++intptr_t *frame::initial_deoptimization_info() { ++ // Not used on riscv, but we must return something. ++ return NULL; +} + -+void C2_MacroAssembler::string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2, -+ Register result, Register tmp1, Register tmp2, int encForm) { -+ Label DIFFERENCE, DONE, L, loop; -+ bool encLL = encForm == StrIntrinsicNode::LL; -+ bool encLU = encForm == StrIntrinsicNode::LU; -+ bool encUL = encForm == StrIntrinsicNode::UL; -+ -+ bool str1_isL = encLL || encLU; -+ bool str2_isL = encLL || encUL; -+ -+ int minCharsInWord = encLL ? wordSize : wordSize / 2; -+ -+ BLOCK_COMMENT("string_compare {"); -+ -+ // for Lating strings, 1 byte for 1 character -+ // for UTF16 strings, 2 bytes for 1 character -+ if (!str1_isL) -+ sraiw(cnt1, cnt1, 1); -+ if (!str2_isL) -+ sraiw(cnt2, cnt2, 1); -+ -+ // if str1 == str2, return the difference -+ // save the minimum of the string lengths in cnt2. -+ sub(result, cnt1, cnt2); -+ bgt(cnt1, cnt2, L); -+ mv(cnt2, cnt1); -+ bind(L); -+ -+ if (str1_isL == str2_isL) { // LL or UU -+ element_compare(str1, str2, zr, cnt2, tmp1, tmp2, v2, v4, v1, encLL, DIFFERENCE); -+ j(DONE); -+ } else { // LU or UL -+ Register strL = encLU ? str1 : str2; -+ Register strU = encLU ? str2 : str1; -+ VectorRegister vstr1 = encLU ? v4 : v0; -+ VectorRegister vstr2 = encLU ? v0 : v4; -+ -+ bind(loop); -+ vsetvli(tmp1, cnt2, Assembler::e8, Assembler::m2); -+ vle8_v(vstr1, strL); -+ vsetvli(tmp1, cnt2, Assembler::e16, Assembler::m4); -+ vzext_vf2(vstr2, vstr1); -+ vle16_v(vstr1, strU); -+ vmsne_vv(v0, vstr2, vstr1); -+ vfirst_m(tmp2, v0); -+ bgez(tmp2, DIFFERENCE); -+ sub(cnt2, cnt2, tmp1); -+ add(strL, strL, tmp1); -+ shadd(strU, tmp1, strU, tmp1, 1); -+ bnez(cnt2, loop); -+ j(DONE); ++intptr_t* frame::real_fp() const { ++ if (_cb != NULL) { ++ // use the frame size if valid ++ int size = _cb->frame_size(); ++ if (size > 0) { ++ return unextended_sp() + size; ++ } + } -+ bind(DIFFERENCE); -+ slli(tmp1, tmp2, 1); -+ add(str1, str1, str1_isL ? tmp2 : tmp1); -+ add(str2, str2, str2_isL ? tmp2 : tmp1); -+ str1_isL ? lbu(tmp1, Address(str1, 0)) : lhu(tmp1, Address(str1, 0)); -+ str2_isL ? lbu(tmp2, Address(str2, 0)) : lhu(tmp2, Address(str2, 0)); -+ sub(result, tmp1, tmp2); -+ -+ bind(DONE); -+} -+ -+void C2_MacroAssembler::byte_array_inflate_v(Register src, Register dst, Register len, Register tmp) { -+ Label loop; -+ assert_different_registers(src, dst, len, tmp, t0); -+ -+ BLOCK_COMMENT("byte_array_inflate_v {"); -+ bind(loop); -+ vsetvli(tmp, len, Assembler::e8, Assembler::m2); -+ vle8_v(v2, src); -+ vsetvli(t0, len, Assembler::e16, Assembler::m4); -+ vzext_vf2(v0, v2); -+ vse16_v(v0, dst); -+ sub(len, len, tmp); -+ add(src, src, tmp); -+ shadd(dst, tmp, dst, tmp, 1); -+ bnez(len, loop); -+ BLOCK_COMMENT("} byte_array_inflate_v"); -+} -+ -+// Compress char[] array to byte[]. -+// result: the array length if every element in array can be encoded; 0, otherwise. -+void C2_MacroAssembler::char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp) { -+ Label done; -+ encode_iso_array_v(src, dst, len, result, tmp); -+ beqz(len, done); -+ mv(result, zr); -+ bind(done); -+} -+ -+// result: the number of elements had been encoded. -+void C2_MacroAssembler::encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp) { -+ Label loop, DIFFERENCE, DONE; -+ -+ BLOCK_COMMENT("encode_iso_array_v {"); -+ mv(result, 0); -+ -+ bind(loop); -+ mv(tmp, 0xff); -+ vsetvli(t0, len, Assembler::e16, Assembler::m2); -+ vle16_v(v2, src); -+ // if element > 0xff, stop -+ vmsgtu_vx(v1, v2, tmp); -+ vfirst_m(tmp, v1); -+ vmsbf_m(v0, v1); -+ // compress char to byte -+ vsetvli(t0, len, Assembler::e8); -+ vncvt_x_x_w(v1, v2, Assembler::v0_t); -+ vse8_v(v1, dst, Assembler::v0_t); -+ -+ bgez(tmp, DIFFERENCE); -+ add(result, result, t0); -+ add(dst, dst, t0); -+ sub(len, len, t0); -+ shadd(src, t0, src, t0, 1); -+ bnez(len, loop); -+ j(DONE); -+ -+ bind(DIFFERENCE); -+ add(result, result, tmp); -+ -+ bind(DONE); -+ BLOCK_COMMENT("} encode_iso_array_v"); -+} -+ -+void C2_MacroAssembler::count_positives_v(Register ary, Register len, Register result, Register tmp) { -+ Label LOOP, SET_RESULT, DONE; -+ -+ BLOCK_COMMENT("count_positives_v {"); -+ mv(result, zr); -+ -+ bind(LOOP); -+ vsetvli(t0, len, Assembler::e8, Assembler::m4); -+ vle8_v(v0, ary); -+ vmslt_vx(v0, v0, zr); -+ vfirst_m(tmp, v0); -+ bgez(tmp, SET_RESULT); -+ // if tmp == -1, all bytes are positive -+ add(result, result, t0); -+ -+ sub(len, len, t0); -+ add(ary, ary, t0); -+ bnez(len, LOOP); -+ j(DONE); -+ -+ // add remaining positive bytes count -+ bind(SET_RESULT); -+ add(result, result, tmp); -+ -+ bind(DONE); -+ BLOCK_COMMENT("} count_positives_v"); ++ // else rely on fp() ++ assert(!is_compiled_frame(), "unknown compiled frame size"); ++ return fp(); +} + -+void C2_MacroAssembler::string_indexof_char_v(Register str1, Register cnt1, -+ Register ch, Register result, -+ Register tmp1, Register tmp2, -+ bool isL) { -+ mv(result, zr); -+ -+ Label loop, MATCH, DONE; -+ Assembler::SEW sew = isL ? Assembler::e8 : Assembler::e16; -+ bind(loop); -+ vsetvli(tmp1, cnt1, sew, Assembler::m4); -+ vlex_v(v0, str1, sew); -+ vmseq_vx(v0, v0, ch); -+ vfirst_m(tmp2, v0); -+ bgez(tmp2, MATCH); // if equal, return index -+ -+ add(result, result, tmp1); -+ sub(cnt1, cnt1, tmp1); -+ if (!isL) slli(tmp1, tmp1, 1); -+ add(str1, str1, tmp1); -+ bnez(cnt1, loop); -+ -+ mv(result, -1); -+ j(DONE); -+ -+ bind(MATCH); -+ add(result, result, tmp2); ++#undef DESCRIBE_FP_OFFSET + -+ bind(DONE); ++#ifndef PRODUCT ++// This is a generic constructor which is only used by pns() in debug.cpp. ++frame::frame(void* ptr_sp, void* ptr_fp, void* pc) { ++ init((intptr_t*)ptr_sp, (intptr_t*)ptr_fp, (address)pc); +} + -+// Set dst to NaN if any NaN input. -+void C2_MacroAssembler::minmax_FD_v(VectorRegister dst, VectorRegister src1, VectorRegister src2, -+ bool is_double, bool is_min) { -+ assert_different_registers(dst, src1, src2); -+ -+ vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32); -+ -+ is_min ? vfmin_vv(dst, src1, src2) -+ : vfmax_vv(dst, src1, src2); ++void frame::pd_ps() {} ++#endif + -+ vmfne_vv(v0, src1, src1); -+ vfadd_vv(dst, src1, src1, Assembler::v0_t); -+ vmfne_vv(v0, src2, src2); -+ vfadd_vv(dst, src2, src2, Assembler::v0_t); ++void JavaFrameAnchor::make_walkable(JavaThread* thread) { ++ // last frame set? ++ if (last_Java_sp() == NULL) { return; } ++ // already walkable? ++ if (walkable()) { return; } ++ vmassert(Thread::current() == (Thread*)thread, "not current thread"); ++ vmassert(last_Java_sp() != NULL, "not called from Java code?"); ++ vmassert(last_Java_pc() == NULL, "already walkable"); ++ capture_last_Java_pc(); ++ vmassert(walkable(), "something went wrong"); +} + -+// Set dst to NaN if any NaN input. -+void C2_MacroAssembler::reduce_minmax_FD_v(FloatRegister dst, -+ FloatRegister src1, VectorRegister src2, -+ VectorRegister tmp1, VectorRegister tmp2, -+ bool is_double, bool is_min) { -+ assert_different_registers(src2, tmp1, tmp2); -+ -+ Label L_done, L_NaN; -+ vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32); -+ vfmv_s_f(tmp2, src1); -+ -+ is_min ? vfredmin_vs(tmp1, src2, tmp2) -+ : vfredmax_vs(tmp1, src2, tmp2); -+ -+ fsflags(zr); -+ // Checking NaNs -+ vmflt_vf(tmp2, src2, src1); -+ frflags(t0); -+ bnez(t0, L_NaN); -+ j(L_done); -+ -+ bind(L_NaN); -+ vfmv_s_f(tmp2, src1); -+ vfredsum_vs(tmp1, src2, tmp2); -+ -+ bind(L_done); -+ vfmv_f_s(dst, tmp1); ++void JavaFrameAnchor::capture_last_Java_pc() { ++ vmassert(_last_Java_sp != NULL, "no last frame set"); ++ vmassert(_last_Java_pc == NULL, "already walkable"); ++ _last_Java_pc = (address)_last_Java_sp[-1]; +} -diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp +diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp new file mode 100644 -index 00000000000..c71df4c101b +index 0000000000..18e021dcb9 --- /dev/null -+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp -@@ -0,0 +1,193 @@ ++++ b/src/hotspot/cpu/riscv/frame_riscv.hpp +@@ -0,0 +1,199 @@ +/* -+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -13371,182 +12650,189 @@ index 00000000000..c71df4c101b + * + */ + -+#ifndef CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP -+#define CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP -+ -+// C2_MacroAssembler contains high-level macros for C2 -+ -+ private: -+ void element_compare(Register r1, Register r2, -+ Register result, Register cnt, -+ Register tmp1, Register tmp2, -+ VectorRegister vr1, VectorRegister vr2, -+ VectorRegister vrs, -+ bool is_latin, Label& DONE); -+ public: ++#ifndef CPU_RISCV_FRAME_RISCV_HPP ++#define CPU_RISCV_FRAME_RISCV_HPP + -+ void string_compare(Register str1, Register str2, -+ Register cnt1, Register cnt2, Register result, -+ Register tmp1, Register tmp2, Register tmp3, -+ int ae); ++#include "runtime/synchronizer.hpp" + -+ void string_indexof_char_short(Register str1, Register cnt1, -+ Register ch, Register result, -+ bool isL); ++// A frame represents a physical stack frame (an activation). Frames can be ++// C or Java frames, and the Java frames can be interpreted or compiled. ++// In contrast, vframes represent source-level activations, so that one physical frame ++// can correspond to multiple source level frames because of inlining. ++// A frame is comprised of {pc, fp, sp} ++// ------------------------------ Asm interpreter ---------------------------------------- ++// Layout of asm interpreter frame: ++// [expression stack ] * <- sp + -+ void string_indexof_char(Register str1, Register cnt1, -+ Register ch, Register result, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ bool isL); ++// [monitors[0] ] \ ++// ... | monitor block size = k ++// [monitors[k-1] ] / ++// [frame initial esp ] ( == &monitors[0], initially here) initial_sp_offset ++// [byte code index/pointr] = bcx() bcx_offset + -+ void string_indexof(Register str1, Register str2, -+ Register cnt1, Register cnt2, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ Register tmp5, Register tmp6, -+ Register result, int ae); ++// [pointer to locals ] = locals() locals_offset ++// [constant pool cache ] = cache() cache_offset + -+ void string_indexof_linearscan(Register haystack, Register needle, -+ Register haystack_len, Register needle_len, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ int needle_con_cnt, Register result, int ae); ++// [klass of method ] = mirror() mirror_offset ++// [padding ] + -+ void arrays_equals(Register r1, Register r2, -+ Register tmp3, Register tmp4, -+ Register tmp5, Register tmp6, -+ Register result, Register cnt1, -+ int elem_size); ++// [methodData ] = mdp() mdx_offset ++// [Method ] = method() method_offset + -+ void string_equals(Register r1, Register r2, -+ Register result, Register cnt1, -+ int elem_size); ++// [last esp ] = last_sp() last_sp_offset ++// [old stack pointer ] (sender_sp) sender_sp_offset + -+ // refer to conditional_branches and float_conditional_branches -+ static const int bool_test_bits = 3; -+ static const int neg_cond_bits = 2; -+ static const int unsigned_branch_mask = 1 << bool_test_bits; -+ static const int double_branch_mask = 1 << bool_test_bits; ++// [old frame pointer ] ++// [return pc ] + -+ // cmp -+ void cmp_branch(int cmpFlag, -+ Register op1, Register op2, -+ Label& label, bool is_far = false); ++// [last sp ] <- fp = link() ++// [oop temp ] (only for native calls) + -+ void float_cmp_branch(int cmpFlag, -+ FloatRegister op1, FloatRegister op2, -+ Label& label, bool is_far = false); ++// [padding ] (to preserve machine SP alignment) ++// [locals and parameters ] ++// <- sender sp ++// ------------------------------ Asm interpreter ---------------------------------------- + -+ void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op, -+ Label& L, bool is_far = false); ++// ------------------------------ C Frame ------------------------------------------------ ++// Stack: gcc with -fno-omit-frame-pointer ++// . ++// . ++// +-> . ++// | +-----------------+ | ++// | | return address | | ++// | | previous fp ------+ ++// | | saved registers | ++// | | local variables | ++// | | ... | <-+ ++// | +-----------------+ | ++// | | return address | | ++// +------ previous fp | | ++// | saved registers | | ++// | local variables | | ++// +-> | ... | | ++// | +-----------------+ | ++// | | return address | | ++// | | previous fp ------+ ++// | | saved registers | ++// | | local variables | ++// | | ... | <-+ ++// | +-----------------+ | ++// | | return address | | ++// +------ previous fp | | ++// | saved registers | | ++// | local variables | | ++// $fp --> | ... | | ++// +-----------------+ | ++// | return address | | ++// | previous fp ------+ ++// | saved registers | ++// $sp --> | local variables | ++// +-----------------+ ++// ------------------------------ C Frame ------------------------------------------------ + -+ void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op, -+ Label& L, bool is_far = false); ++ public: ++ enum { ++ pc_return_offset = 0, ++ // All frames ++ link_offset = -2, ++ return_addr_offset = -1, ++ sender_sp_offset = 0, ++ // Interpreter frames ++ interpreter_frame_oop_temp_offset = 1, // for native calls only + -+ void enc_cmove(int cmpFlag, -+ Register op1, Register op2, -+ Register dst, Register src); ++ interpreter_frame_sender_sp_offset = -3, ++ // outgoing sp before a call to an invoked method ++ interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1, ++ interpreter_frame_method_offset = interpreter_frame_last_sp_offset - 1, ++ interpreter_frame_mdp_offset = interpreter_frame_method_offset - 1, ++ interpreter_frame_padding_offset = interpreter_frame_mdp_offset - 1, ++ interpreter_frame_mirror_offset = interpreter_frame_padding_offset - 1, ++ interpreter_frame_cache_offset = interpreter_frame_mirror_offset - 1, ++ interpreter_frame_locals_offset = interpreter_frame_cache_offset - 1, ++ interpreter_frame_bcp_offset = interpreter_frame_locals_offset - 1, ++ interpreter_frame_initial_sp_offset = interpreter_frame_bcp_offset - 1, + -+ void spill(Register r, bool is64, int offset) { -+ is64 ? sd(r, Address(sp, offset)) -+ : sw(r, Address(sp, offset)); -+ } ++ interpreter_frame_monitor_block_top_offset = interpreter_frame_initial_sp_offset, ++ interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset, + -+ void spill(FloatRegister f, bool is64, int offset) { -+ is64 ? fsd(f, Address(sp, offset)) -+ : fsw(f, Address(sp, offset)); -+ } ++ // Entry frames ++ // n.b. these values are determined by the layout defined in ++ // stubGenerator for the Java call stub ++ entry_frame_after_call_words = 34, ++ entry_frame_call_wrapper_offset = -10, + -+ void spill(VectorRegister v, int offset) { -+ add(t0, sp, offset); -+ vs1r_v(v, t0); -+ } ++ // we don't need a save area ++ arg_reg_save_area_bytes = 0 ++ }; + -+ void unspill(Register r, bool is64, int offset) { -+ is64 ? ld(r, Address(sp, offset)) -+ : lw(r, Address(sp, offset)); ++ intptr_t ptr_at(int offset) const { ++ return *ptr_at_addr(offset); + } + -+ void unspillu(Register r, bool is64, int offset) { -+ is64 ? ld(r, Address(sp, offset)) -+ : lwu(r, Address(sp, offset)); ++ void ptr_at_put(int offset, intptr_t value) { ++ *ptr_at_addr(offset) = value; + } + -+ void unspill(FloatRegister f, bool is64, int offset) { -+ is64 ? fld(f, Address(sp, offset)) -+ : flw(f, Address(sp, offset)); -+ } ++ private: ++ // an additional field beyond _sp and _pc: ++ intptr_t* _fp; // frame pointer ++ // The interpreter and adapters will extend the frame of the caller. ++ // Since oopMaps are based on the sp of the caller before extension ++ // we need to know that value. However in order to compute the address ++ // of the return address we need the real "raw" sp. Since sparc already ++ // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's ++ // original sp we use that convention. + -+ void unspill(VectorRegister v, int offset) { -+ add(t0, sp, offset); -+ vl1r_v(v, t0); -+ } ++ intptr_t* _unextended_sp; ++ void adjust_unextended_sp(); + -+ void spill_copy_vector_stack_to_stack(int src_offset, int dst_offset, int vec_reg_size_in_bytes) { -+ assert(vec_reg_size_in_bytes % 16 == 0, "unexpected vector reg size"); -+ unspill(v0, src_offset); -+ spill(v0, dst_offset); ++ intptr_t* ptr_at_addr(int offset) const { ++ return (intptr_t*) addr_at(offset); + } + -+ void minmax_FD(FloatRegister dst, -+ FloatRegister src1, FloatRegister src2, -+ bool is_double, bool is_min); ++#ifdef ASSERT ++ // Used in frame::sender_for_{interpreter,compiled}_frame ++ static void verify_deopt_original_pc( CompiledMethod* nm, intptr_t* unextended_sp); ++#endif + -+ // intrinsic methods implemented by rvv instructions -+ void string_equals_v(Register r1, Register r2, -+ Register result, Register cnt1, -+ int elem_size); ++ public: ++ // Constructors + -+ void arrays_equals_v(Register r1, Register r2, -+ Register result, Register cnt1, -+ int elem_size); ++ frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc); + -+ void string_compare_v(Register str1, Register str2, -+ Register cnt1, Register cnt2, -+ Register result, -+ Register tmp1, Register tmp2, -+ int encForm); ++ frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc); + -+ void clear_array_v(Register base, Register cnt); ++ frame(intptr_t* ptr_sp, intptr_t* ptr_fp); + -+ void byte_array_inflate_v(Register src, Register dst, -+ Register len, Register tmp); ++ void init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc); + -+ void char_array_compress_v(Register src, Register dst, -+ Register len, Register result, -+ Register tmp); ++ // accessors for the instance variables ++ // Note: not necessarily the real 'frame pointer' (see real_fp) ++ intptr_t* fp() const { return _fp; } + -+ void encode_iso_array_v(Register src, Register dst, -+ Register len, Register result, -+ Register tmp); ++ inline address* sender_pc_addr() const; + -+ void count_positives_v(Register ary, Register len, -+ Register result, Register tmp); ++ // expression stack tos if we are nested in a java call ++ intptr_t* interpreter_frame_last_sp() const; + -+ void string_indexof_char_v(Register str1, Register cnt1, -+ Register ch, Register result, -+ Register tmp1, Register tmp2, -+ bool isL); ++ // helper to update a map with callee-saved RBP ++ static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr); + -+ void minmax_FD_v(VectorRegister dst, -+ VectorRegister src1, VectorRegister src2, -+ bool is_double, bool is_min); ++ // deoptimization support ++ void interpreter_frame_set_last_sp(intptr_t* last_sp); + -+ void reduce_minmax_FD_v(FloatRegister dst, -+ FloatRegister src1, VectorRegister src2, -+ VectorRegister tmp1, VectorRegister tmp2, -+ bool is_double, bool is_min); ++ static jint interpreter_frame_expression_stack_direction() { return -1; } + -+#endif // CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp ++#endif // CPU_RISCV_FRAME_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp new file mode 100644 -index 00000000000..53a41665f4b +index 0000000000..abd5bda7e4 --- /dev/null -+++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp -@@ -0,0 +1,83 @@ ++++ b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp +@@ -0,0 +1,245 @@ +/* -+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -13570,368 +12856,234 @@ index 00000000000..53a41665f4b + * + */ + -+#ifndef CPU_RISCV_C2_GLOBALS_RISCV_HPP -+#define CPU_RISCV_C2_GLOBALS_RISCV_HPP ++#ifndef CPU_RISCV_FRAME_RISCV_INLINE_HPP ++#define CPU_RISCV_FRAME_RISCV_INLINE_HPP + -+#include "utilities/globalDefinitions.hpp" -+#include "utilities/macros.hpp" ++#include "code/codeCache.hpp" ++#include "code/vmreg.inline.hpp" + -+// Sets the default values for platform dependent flags used by the server compiler. -+// (see c2_globals.hpp). Alpha-sorted. ++// Inline functions for RISCV frames: + -+define_pd_global(bool, BackgroundCompilation, true); -+define_pd_global(bool, CICompileOSR, true); -+define_pd_global(bool, InlineIntrinsics, true); -+define_pd_global(bool, PreferInterpreterNativeStubs, false); -+define_pd_global(bool, ProfileTraps, true); -+define_pd_global(bool, UseOnStackReplacement, true); -+define_pd_global(bool, ProfileInterpreter, true); -+define_pd_global(bool, TieredCompilation, COMPILER1_PRESENT(true) NOT_COMPILER1(false)); -+define_pd_global(intx, CompileThreshold, 10000); ++// Constructors: + -+define_pd_global(intx, OnStackReplacePercentage, 140); -+define_pd_global(intx, ConditionalMoveLimit, 0); -+define_pd_global(intx, FreqInlineSize, 325); -+define_pd_global(intx, MinJumpTableSize, 10); -+define_pd_global(intx, InteriorEntryAlignment, 16); -+define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); -+define_pd_global(intx, LoopUnrollLimit, 60); -+define_pd_global(intx, LoopPercentProfileLimit, 10); -+// InitialCodeCacheSize derived from specjbb2000 run. -+define_pd_global(intx, InitialCodeCacheSize, 2496*K); // Integral multiple of CodeCacheExpansionSize -+define_pd_global(intx, CodeCacheExpansionSize, 64*K); ++inline frame::frame() { ++ _pc = NULL; ++ _sp = NULL; ++ _unextended_sp = NULL; ++ _fp = NULL; ++ _cb = NULL; ++ _deopt_state = unknown; ++} + -+// Ergonomics related flags -+define_pd_global(uint64_t,MaxRAM, 128ULL*G); -+define_pd_global(intx, RegisterCostAreaRatio, 16000); ++static int spin; + -+// Peephole and CISC spilling both break the graph, and so makes the -+// scheduler sick. -+define_pd_global(bool, OptoPeephole, false); -+define_pd_global(bool, UseCISCSpill, false); -+define_pd_global(bool, OptoScheduling, true); -+define_pd_global(bool, OptoBundling, false); -+define_pd_global(bool, OptoRegScheduling, false); -+define_pd_global(bool, SuperWordLoopUnrollAnalysis, true); -+define_pd_global(bool, IdealizeClearArrayNode, true); ++inline void frame::init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) { ++ intptr_t a = intptr_t(ptr_sp); ++ intptr_t b = intptr_t(ptr_fp); ++ _sp = ptr_sp; ++ _unextended_sp = ptr_sp; ++ _fp = ptr_fp; ++ _pc = pc; ++ assert(pc != NULL, "no pc?"); ++ _cb = CodeCache::find_blob(pc); ++ adjust_unextended_sp(); + -+define_pd_global(intx, ReservedCodeCacheSize, 48*M); -+define_pd_global(intx, NonProfiledCodeHeapSize, 21*M); -+define_pd_global(intx, ProfiledCodeHeapSize, 22*M); -+define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); -+define_pd_global(uintx, CodeCacheMinBlockLength, 6); -+define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} + -+// Ergonomics related flags -+define_pd_global(bool, NeverActAsServerClassMachine, false); ++inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) { ++ init(ptr_sp, ptr_fp, pc); ++} + -+define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed. ++inline frame::frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc) { ++ intptr_t a = intptr_t(ptr_sp); ++ intptr_t b = intptr_t(ptr_fp); ++ _sp = ptr_sp; ++ _unextended_sp = unextended_sp; ++ _fp = ptr_fp; ++ _pc = pc; ++ assert(pc != NULL, "no pc?"); ++ _cb = CodeCache::find_blob(pc); ++ adjust_unextended_sp(); + -+#endif // CPU_RISCV_C2_GLOBALS_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/c2_init_riscv.cpp b/src/hotspot/cpu/riscv/c2_init_riscv.cpp -new file mode 100644 -index 00000000000..cdbd69807be ---- /dev/null -+++ b/src/hotspot/cpu/riscv/c2_init_riscv.cpp -@@ -0,0 +1,38 @@ -+/* -+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ assert(_cb->as_compiled_method()->insts_contains_inclusive(_pc), ++ "original PC must be in the main code section of the the compiled method (or must be immediately following it)"); ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} + -+#include "precompiled.hpp" -+#include "opto/compile.hpp" -+#include "opto/node.hpp" ++inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp) { ++ intptr_t a = intptr_t(ptr_sp); ++ intptr_t b = intptr_t(ptr_fp); ++ _sp = ptr_sp; ++ _unextended_sp = ptr_sp; ++ _fp = ptr_fp; ++ _pc = (address)(ptr_sp[-1]); + -+// processor dependent initialization for riscv ++ // Here's a sticky one. This constructor can be called via AsyncGetCallTrace ++ // when last_Java_sp is non-null but the pc fetched is junk. If we are truly ++ // unlucky the junk value could be to a zombied method and we'll die on the ++ // find_blob call. This is also why we can have no asserts on the validity ++ // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler ++ // -> pd_last_frame should use a specialized version of pd_last_frame which could ++ // call a specilaized frame constructor instead of this one. ++ // Then we could use the assert below. However this assert is of somewhat dubious ++ // value. + -+extern void reg_mask_init(); ++ _cb = CodeCache::find_blob(_pc); ++ adjust_unextended_sp(); + -+void Compile::pd_compiler2_init() { -+ guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); -+ reg_mask_init(); ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } +} -diff --git a/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp b/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp -new file mode 100644 -index 00000000000..a90d9fdc160 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp -@@ -0,0 +1,47 @@ -+/* -+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+#include "precompiled.hpp" -+#include "asm/macroAssembler.hpp" -+#include "opto/compile.hpp" -+#include "opto/node.hpp" -+#include "opto/output.hpp" -+#include "runtime/sharedRuntime.hpp" + -+#define __ masm. -+void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointPollStub* entry) const { -+ assert(SharedRuntime::polling_page_return_handler_blob() != NULL, -+ "polling page return stub not created yet"); -+ address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); -+ RuntimeAddress callback_addr(stub); ++// Accessors + -+ __ bind(entry->_stub_label); -+ InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset); -+ masm.code_section()->relocate(masm.pc(), safepoint_pc.rspec()); -+ __ la(t0, safepoint_pc.target()); -+ __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset())); -+ __ far_jump(callback_addr); ++inline bool frame::equal(frame other) const { ++ bool ret = sp() == other.sp() && ++ unextended_sp() == other.unextended_sp() && ++ fp() == other.fp() && ++ pc() == other.pc(); ++ assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction"); ++ return ret; +} -+#undef __ -diff --git a/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp -new file mode 100644 -index 00000000000..14a68b45026 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp -@@ -0,0 +1,36 @@ -+/* -+ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ + -+#ifndef CPU_RISCV_CODEBUFFER_RISCV_HPP -+#define CPU_RISCV_CODEBUFFER_RISCV_HPP ++// Return unique id for this frame. The id must have a value where we can distinguish ++// identity and younger/older relationship. NULL represents an invalid (incomparable) ++// frame. ++inline intptr_t* frame::id(void) const { return unextended_sp(); } + -+private: -+ void pd_initialize() {} ++// Return true if the frame is older (less recent activation) than the frame represented by id ++inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); ++ return this->id() > id ; } + -+public: -+ void flush_bundle(bool start_new_bundle) {} ++inline intptr_t* frame::link() const { return (intptr_t*) *(intptr_t **)addr_at(link_offset); } + -+#endif // CPU_RISCV_CODEBUFFER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp -new file mode 100644 -index 00000000000..75bc4be7840 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp -@@ -0,0 +1,149 @@ -+/* -+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++inline intptr_t* frame::link_or_null() const { ++ intptr_t** ptr = (intptr_t **)addr_at(link_offset); ++ return os::is_readable_pointer(ptr) ? *ptr : NULL; ++} + -+#include "precompiled.hpp" -+#include "asm/macroAssembler.inline.hpp" -+#include "code/compiledIC.hpp" -+#include "code/icBuffer.hpp" -+#include "code/nmethod.hpp" -+#include "memory/resourceArea.hpp" -+#include "runtime/mutexLocker.hpp" -+#include "runtime/safepoint.hpp" ++inline intptr_t* frame::unextended_sp() const { return _unextended_sp; } + -+// ---------------------------------------------------------------------------- ++// Return address ++inline address* frame::sender_pc_addr() const { return (address*) addr_at(return_addr_offset); } ++inline address frame::sender_pc() const { return *sender_pc_addr(); } ++inline intptr_t* frame::sender_sp() const { return addr_at(sender_sp_offset); } + -+#define __ _masm. -+address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) { -+ precond(cbuf.stubs()->start() != badAddress); -+ precond(cbuf.stubs()->end() != badAddress); -+ // Stub is fixed up when the corresponding call is converted from -+ // calling compiled code to calling interpreted code. -+ // mv xmethod, 0 -+ // jalr -4 # to self ++inline intptr_t** frame::interpreter_frame_locals_addr() const { ++ return (intptr_t**)addr_at(interpreter_frame_locals_offset); ++} + -+ if (mark == NULL) { -+ mark = cbuf.insts_mark(); // Get mark within main instrs section. -+ } ++inline intptr_t* frame::interpreter_frame_last_sp() const { ++ return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset); ++} + -+ // Note that the code buffer's insts_mark is always relative to insts. -+ // That's why we must use the macroassembler to generate a stub. -+ MacroAssembler _masm(&cbuf); ++inline intptr_t* frame::interpreter_frame_bcp_addr() const { ++ return (intptr_t*)addr_at(interpreter_frame_bcp_offset); ++} + -+ address base = __ start_a_stub(to_interp_stub_size()); -+ int offset = __ offset(); -+ if (base == NULL) { -+ return NULL; // CodeBuffer::expand failed -+ } -+ // static stub relocation stores the instruction address of the call -+ __ relocate(static_stub_Relocation::spec(mark)); ++inline intptr_t* frame::interpreter_frame_mdp_addr() const { ++ return (intptr_t*)addr_at(interpreter_frame_mdp_offset); ++} + -+ __ emit_static_call_stub(); + -+ assert((__ offset() - offset) <= (int)to_interp_stub_size(), "stub too big"); -+ __ end_a_stub(); -+ return base; -+} -+#undef __ ++// Constant pool cache + -+int CompiledStaticCall::to_interp_stub_size() { -+ // fence_i + fence* + (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr -+ return NativeFenceI::instruction_size() + 12 * NativeInstruction::instruction_size; ++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { ++ return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset); +} + -+int CompiledStaticCall::to_trampoline_stub_size() { -+ // Somewhat pessimistically, we count 4 instructions here (although -+ // there are only 3) because we sometimes emit an alignment nop. -+ // Trampoline stubs are always word aligned. -+ return NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size; -+} ++// Method + -+// Relocation entries for call stub, compiled java to interpreter. -+int CompiledStaticCall::reloc_to_interp_stub() { -+ return 4; // 3 in emit_to_interp_stub + 1 in emit_call ++inline Method** frame::interpreter_frame_method_addr() const { ++ return (Method**)addr_at(interpreter_frame_method_offset); +} + -+void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) { -+ address stub = find_stub(); -+ guarantee(stub != NULL, "stub not found"); ++// Mirror + -+ if (TraceICs) { -+ ResourceMark rm; -+ tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s", -+ p2i(instruction_address()), -+ callee->name_and_sig_as_C_string()); ++inline oop* frame::interpreter_frame_mirror_addr() const { ++ return (oop*)addr_at(interpreter_frame_mirror_offset); ++} ++ ++// top of expression stack ++inline intptr_t* frame::interpreter_frame_tos_address() const { ++ intptr_t* last_sp = interpreter_frame_last_sp(); ++ if (last_sp == NULL) { ++ return sp(); ++ } else { ++ // sp() may have been extended or shrunk by an adapter. At least ++ // check that we don't fall behind the legal region. ++ // For top deoptimized frame last_sp == interpreter_frame_monitor_end. ++ assert(last_sp <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos"); ++ return last_sp; + } ++} + -+ // Creation also verifies the object. -+ NativeMovConstReg* method_holder -+ = nativeMovConstReg_at(stub + NativeFenceI::instruction_size()); -+#ifdef ASSERT -+ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++inline oop* frame::interpreter_frame_temp_oop_addr() const { ++ return (oop *)(fp() + interpreter_frame_oop_temp_offset); ++} + -+ verify_mt_safe(callee, entry, method_holder, jump); -+#endif -+ // Update stub. -+ method_holder->set_data((intptr_t)callee()); -+ NativeGeneralJump::insert_unconditional(method_holder->next_instruction_address(), entry); -+ ICache::invalidate_range(stub, to_interp_stub_size()); -+ // Update jump to call. -+ set_destination_mt_safe(stub); ++inline int frame::interpreter_frame_monitor_size() { ++ return BasicObjectLock::size(); +} + -+void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { -+ // Reset stub. -+ address stub = static_stub->addr(); -+ assert(stub != NULL, "stub not found"); -+ assert(CompiledICLocker::is_safe(stub), "mt unsafe call"); -+ // Creation also verifies the object. -+ NativeMovConstReg* method_holder -+ = nativeMovConstReg_at(stub + NativeFenceI::instruction_size()); -+ method_holder->set_data(0); -+ NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); -+ jump->set_jump_destination((address)-1); ++ ++// expression stack ++// (the max_stack arguments are used by the GC; see class FrameClosure) ++ ++inline intptr_t* frame::interpreter_frame_expression_stack() const { ++ intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end(); ++ return monitor_end-1; +} + -+//----------------------------------------------------------------------------- -+// Non-product mode code -+#ifndef PRODUCT + -+void CompiledDirectStaticCall::verify() { -+ // Verify call. -+ _call->verify(); -+ _call->verify_alignment(); ++// Entry frames + -+ // Verify stub. -+ address stub = find_stub(); -+ assert(stub != NULL, "no stub found for static call"); -+ // Creation also verifies the object. -+ NativeMovConstReg* method_holder -+ = nativeMovConstReg_at(stub + NativeFenceI::instruction_size()); -+ NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); ++inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { ++ return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset); ++} + -+ // Verify state. -+ assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); ++ ++// Compiled frames ++inline oop frame::saved_oop_result(RegisterMap* map) const { ++ oop* result_adr = (oop *)map->location(x10->as_VMReg()); ++ guarantee(result_adr != NULL, "bad register save location"); ++ return (*result_adr); +} + -+#endif // !PRODUCT -diff --git a/src/hotspot/cpu/riscv/copy_riscv.hpp b/src/hotspot/cpu/riscv/copy_riscv.hpp ++inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { ++ oop* result_adr = (oop *)map->location(x10->as_VMReg()); ++ guarantee(result_adr != NULL, "bad register save location"); ++ *result_adr = obj; ++} ++ ++#endif // CPU_RISCV_FRAME_RISCV_INLINE_HPP +diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp new file mode 100644 -index 00000000000..bceadcc5dcc +index 0000000000..e191cbcee2 --- /dev/null -+++ b/src/hotspot/cpu/riscv/copy_riscv.hpp -@@ -0,0 +1,136 @@ ++++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +@@ -0,0 +1,481 @@ +/* -+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -13954,126 +13106,471 @@ index 00000000000..bceadcc5dcc + * + */ + -+#ifndef CPU_RISCV_COPY_RISCV_HPP -+#define CPU_RISCV_COPY_RISCV_HPP -+ -+#include OS_CPU_HEADER(copy) ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/g1/g1BarrierSet.hpp" ++#include "gc/g1/g1BarrierSetAssembler.hpp" ++#include "gc/g1/g1BarrierSetRuntime.hpp" ++#include "gc/g1/g1CardTable.hpp" ++#include "gc/g1/g1ThreadLocalData.hpp" ++#include "gc/g1/heapRegion.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/thread.hpp" ++#ifdef COMPILER1 ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "gc/g1/c1/g1BarrierSetC1.hpp" ++#endif + -+static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { -+ julong* to = (julong*) tohw; -+ julong v = ((julong) value << 32) | value; -+ while (count-- > 0) { -+ *to++ = v; -+ } -+} ++#define __ masm-> + -+static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { -+ pd_fill_to_words(tohw, count, value); -+} ++void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, RegSet saved_regs) { ++ assert_cond(masm != NULL); ++ bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; ++ if (!dest_uninitialized) { ++ Label done; ++ Address in_progress(xthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + -+static void pd_fill_to_bytes(void* to, size_t count, jubyte value) { -+ (void)memset(to, value, count); -+} ++ // Is marking active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { ++ __ lwu(t0, in_progress); ++ } else { ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ lbu(t0, in_progress); ++ } ++ __ beqz(t0, done); + -+static void pd_zero_to_words(HeapWord* tohw, size_t count) { -+ pd_fill_to_words(tohw, count, 0); -+} ++ __ push_reg(saved_regs, sp); ++ if (count == c_rarg0) { ++ if (addr == c_rarg1) { ++ // exactly backwards!! ++ __ mv(t0, c_rarg0); ++ __ mv(c_rarg0, c_rarg1); ++ __ mv(c_rarg1, t0); ++ } else { ++ __ mv(c_rarg1, count); ++ __ mv(c_rarg0, addr); ++ } ++ } else { ++ __ mv(c_rarg0, addr); ++ __ mv(c_rarg1, count); ++ } ++ if (UseCompressedOops) { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2); ++ } else { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2); ++ } ++ __ pop_reg(saved_regs, sp); + -+static void pd_zero_to_bytes(void* to, size_t count) { -+ (void)memset(to, 0, count); ++ __ bind(done); ++ } +} + -+static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { -+ (void)memmove(to, from, count * HeapWordSize); ++void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register start, Register count, Register tmp, RegSet saved_regs) { ++ assert_cond(masm != NULL); ++ __ push_reg(saved_regs, sp); ++ assert_different_registers(start, count, tmp); ++ assert_different_registers(c_rarg0, count); ++ __ mv(c_rarg0, start); ++ __ mv(c_rarg1, count); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2); ++ __ pop_reg(saved_regs, sp); +} + -+static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { -+ switch (count) { -+ case 8: to[7] = from[7]; // fall through -+ case 7: to[6] = from[6]; // fall through -+ case 6: to[5] = from[5]; // fall through -+ case 5: to[4] = from[4]; // fall through -+ case 4: to[3] = from[3]; // fall through -+ case 3: to[2] = from[2]; // fall through -+ case 2: to[1] = from[1]; // fall through -+ case 1: to[0] = from[0]; // fall through -+ case 0: break; -+ default: -+ memcpy(to, from, count * HeapWordSize); -+ break; ++void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call) { ++ // If expand_call is true then we expand the call_VM_leaf macro ++ // directly to skip generating the check by ++ // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. ++ ++ assert_cond(masm != NULL); ++ assert(thread == xthread, "must be"); ++ ++ Label done; ++ Label runtime; ++ ++ assert_different_registers(obj, pre_val, tmp, t0); ++ assert(pre_val != noreg && tmp != noreg, "expecting a register"); ++ ++ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); ++ Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); ++ ++ // Is marking active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width ++ __ lwu(tmp, in_progress); ++ } else { ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ lbu(tmp, in_progress); + } -+} ++ __ beqz(tmp, done); + -+static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { -+ shared_disjoint_words_atomic(from, to, count); -+} ++ // Do we need to load the previous value? ++ if (obj != noreg) { ++ __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); ++ } + -+static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { -+ pd_conjoint_words(from, to, count); -+} ++ // Is the previous value null? ++ __ beqz(pre_val, done); + -+static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { -+ pd_disjoint_words(from, to, count); -+} ++ // Can we store original value in the thread's buffer? ++ // Is index == 0? ++ // (The index field is typed as size_t.) + -+static void pd_conjoint_bytes(const void* from, void* to, size_t count) { -+ (void)memmove(to, from, count); -+} ++ __ ld(tmp, index); // tmp := *index_adr ++ __ beqz(tmp, runtime); // tmp == 0? ++ // If yes, goto runtime + -+static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) { -+ pd_conjoint_bytes(from, to, count); -+} ++ __ sub(tmp, tmp, wordSize); // tmp := tmp - wordSize ++ __ sd(tmp, index); // *index_adr := tmp ++ __ ld(t0, buffer); ++ __ add(tmp, tmp, t0); // tmp := tmp + *buffer_adr + -+static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { -+ _Copy_conjoint_jshorts_atomic(from, to, count); -+} ++ // Record the previous value ++ __ sd(pre_val, Address(tmp, 0)); ++ __ j(done); ++ ++ __ bind(runtime); ++ ++ __ push_call_clobbered_registers(); ++ if (expand_call) { ++ assert(pre_val != c_rarg1, "smashed arg"); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); ++ } else { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); ++ } ++ __ pop_call_clobbered_registers(); ++ ++ __ bind(done); + -+static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { -+ _Copy_conjoint_jints_atomic(from, to, count); +} + -+static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { -+ _Copy_conjoint_jlongs_atomic(from, to, count); ++void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, ++ Register store_addr, ++ Register new_val, ++ Register thread, ++ Register tmp, ++ Register tmp2) { ++ assert_cond(masm != NULL); ++ assert(thread == xthread, "must be"); ++ assert_different_registers(store_addr, new_val, thread, tmp, tmp2, ++ t0); ++ assert(store_addr != noreg && new_val != noreg && tmp != noreg && ++ tmp2 != noreg, "expecting a register"); ++ ++ Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); ++ ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ ++ Label done; ++ Label runtime; ++ ++ // Does store cross heap regions? ++ ++ __ xorr(tmp, store_addr, new_val); ++ __ srli(tmp, tmp, HeapRegion::LogOfHRGrainBytes); ++ __ beqz(tmp, done); ++ ++ // crosses regions, storing NULL? ++ ++ __ beqz(new_val, done); ++ ++ // storing region crossing non-NULL, is card already dirty? ++ ++ ExternalAddress cardtable((address) ct->byte_map_base()); ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ const Register card_addr = tmp; ++ ++ __ srli(card_addr, store_addr, CardTable::card_shift); ++ ++ // get the address of the card ++ __ load_byte_map_base(tmp2); ++ __ add(card_addr, card_addr, tmp2); ++ __ lbu(tmp2, Address(card_addr)); ++ __ mv(t0, (int)G1CardTable::g1_young_card_val()); ++ __ beq(tmp2, t0, done); ++ ++ assert((int)CardTable::dirty_card_val() == 0, "must be 0"); ++ ++ __ membar(MacroAssembler::StoreLoad); ++ ++ __ lbu(tmp2, Address(card_addr)); ++ __ beqz(tmp2, done); ++ ++ // storing a region crossing, non-NULL oop, card is clean. ++ // dirty card and log. ++ ++ __ sb(zr, Address(card_addr)); ++ ++ __ ld(t0, queue_index); ++ __ beqz(t0, runtime); ++ __ sub(t0, t0, wordSize); ++ __ sd(t0, queue_index); ++ ++ __ ld(tmp2, buffer); ++ __ add(t0, tmp2, t0); ++ __ sd(card_addr, Address(t0, 0)); ++ __ j(done); ++ ++ __ bind(runtime); ++ // save the live input values ++ RegSet saved = RegSet::of(store_addr, new_val); ++ __ push_reg(saved, sp); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); ++ __ pop_reg(saved, sp); ++ ++ __ bind(done); +} + -+static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { -+ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size."); -+ _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count); ++void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread) { ++ assert_cond(masm != NULL); ++ bool on_oop = is_reference_type(type); ++ bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; ++ bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; ++ bool on_reference = on_weak || on_phantom; ++ ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); ++ if (on_oop && on_reference) { ++ // RA is live. It must be saved around calls. ++ __ enter(); // barrier may call runtime ++ // Generate the G1 pre-barrier code to log the value of ++ // the referent field in an SATB buffer. ++ g1_write_barrier_pre(masm /* masm */, ++ noreg /* obj */, ++ dst /* pre_val */, ++ xthread /* thread */, ++ tmp1 /* tmp */, ++ true /* tosca_live */, ++ true /* expand_call */); ++ __ leave(); ++ } +} + -+static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { -+ _Copy_arrayof_conjoint_bytes(from, to, count); ++void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ assert_cond(masm != NULL); ++ // flatten object address if needed ++ if (dst.offset() == 0) { ++ if (dst.base() != x13) { ++ __ mv(x13, dst.base()); ++ } ++ } else { ++ __ la(x13, dst); ++ } ++ ++ g1_write_barrier_pre(masm, ++ x13 /* obj */, ++ tmp2 /* pre_val */, ++ xthread /* thread */, ++ tmp1 /* tmp */, ++ val != noreg /* tosca_live */, ++ false /* expand_call */); ++ ++ if (val == noreg) { ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg); ++ } else { ++ // G1 barrier needs uncompressed oop for region cross check. ++ Register new_val = val; ++ if (UseCompressedOops) { ++ new_val = t1; ++ __ mv(new_val, val); ++ } ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg); ++ g1_write_barrier_post(masm, ++ x13 /* store_adr */, ++ new_val /* new_val */, ++ xthread /* thread */, ++ tmp1 /* tmp */, ++ tmp2 /* tmp2 */); ++ } +} + -+static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) { -+ _Copy_arrayof_conjoint_jshorts(from, to, count); ++#ifdef COMPILER1 ++ ++#undef __ ++#define __ ce->masm()-> ++ ++void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) { ++ G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); ++ ++ // At this point we know that marking is in progress. ++ // If do_load() is true then we have to emit the ++ // load of the previous value; otherwise it has already ++ // been loaded into _pre_val. ++ __ bind(*stub->entry()); ++ ++ assert(stub->pre_val()->is_register(), "Precondition."); ++ ++ Register pre_val_reg = stub->pre_val()->as_register(); ++ ++ if (stub->do_load()) { ++ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/); ++ } ++ __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true); ++ ce->store_parameter(stub->pre_val()->as_register(), 0); ++ __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); ++ __ j(*stub->continuation()); +} + -+static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) { -+ _Copy_arrayof_conjoint_jints(from, to, count); ++void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) { ++ G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); ++ __ bind(*stub->entry()); ++ assert(stub->addr()->is_register(), "Precondition"); ++ assert(stub->new_val()->is_register(), "Precondition"); ++ Register new_val_reg = stub->new_val()->as_register(); ++ __ beqz(new_val_reg, *stub->continuation(), /* is_far */ true); ++ ce->store_parameter(stub->addr()->as_pointer_register(), 0); ++ __ far_call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin())); ++ __ j(*stub->continuation()); +} + -+static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) { -+ _Copy_arrayof_conjoint_jlongs(from, to, count); ++#undef __ ++ ++#define __ sasm-> ++ ++void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { ++ __ prologue("g1_pre_barrier", false); ++ ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ ++ // arg0 : previous value of memory ++ const Register pre_val = x10; ++ const Register thread = xthread; ++ const Register tmp = t0; ++ ++ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); ++ Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); ++ ++ Label done; ++ Label runtime; ++ ++ // Is marking still active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width ++ __ lwu(tmp, in_progress); ++ } else { ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ lbu(tmp, in_progress); ++ } ++ __ beqz(tmp, done); ++ ++ // Can we store original value in the thread's buffer? ++ __ ld(tmp, queue_index); ++ __ beqz(tmp, runtime); ++ ++ __ sub(tmp, tmp, wordSize); ++ __ sd(tmp, queue_index); ++ __ ld(t1, buffer); ++ __ add(tmp, tmp, t1); ++ __ load_parameter(0, t1); ++ __ sd(t1, Address(tmp, 0)); ++ __ j(done); ++ ++ __ bind(runtime); ++ __ push_call_clobbered_registers(); ++ __ load_parameter(0, pre_val); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); ++ __ pop_call_clobbered_registers(); ++ __ bind(done); ++ ++ __ epilogue(); +} + -+static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) { -+ assert(!UseCompressedOops, "foo!"); -+ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); -+ _Copy_arrayof_conjoint_jlongs(from, to, count); ++void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) { ++ __ prologue("g1_post_barrier", false); ++ ++ // arg0 : store_address ++ Address store_addr(fp, 2 * BytesPerWord); // 2 BytesPerWord from fp ++ ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ ++ Label done; ++ Label runtime; ++ ++ // At this point we know new_value is non-NULL and the new_value crosses regions. ++ // Must check to see if card is already dirty ++ const Register thread = xthread; ++ ++ Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); ++ ++ const Register card_offset = t1; ++ // RA is free here, so we can use it to hold the byte_map_base. ++ const Register byte_map_base = ra; ++ ++ assert_different_registers(card_offset, byte_map_base, t0); ++ ++ __ load_parameter(0, card_offset); ++ __ srli(card_offset, card_offset, CardTable::card_shift); ++ __ load_byte_map_base(byte_map_base); ++ ++ // Convert card offset into an address in card_addr ++ Register card_addr = card_offset; ++ __ add(card_addr, byte_map_base, card_addr); ++ ++ __ lbu(t0, Address(card_addr, 0)); ++ __ sub(t0, t0, (int)G1CardTable::g1_young_card_val()); ++ __ beqz(t0, done); ++ ++ assert((int)CardTable::dirty_card_val() == 0, "must be 0"); ++ ++ __ membar(MacroAssembler::StoreLoad); ++ __ lbu(t0, Address(card_addr, 0)); ++ __ beqz(t0, done); ++ ++ // storing region crossing non-NULL, card is clean. ++ // dirty card and log. ++ __ sb(zr, Address(card_addr, 0)); ++ ++ __ ld(t0, queue_index); ++ __ beqz(t0, runtime); ++ __ sub(t0, t0, wordSize); ++ __ sd(t0, queue_index); ++ ++ // Reuse RA to hold buffer_addr ++ const Register buffer_addr = ra; ++ ++ __ ld(buffer_addr, buffer); ++ __ add(t0, buffer_addr, t0); ++ __ sd(card_addr, Address(t0, 0)); ++ __ j(done); ++ ++ __ bind(runtime); ++ __ push_call_clobbered_registers(); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); ++ __ pop_call_clobbered_registers(); ++ __ bind(done); ++ __ epilogue(); +} + -+#endif // CPU_RISCV_COPY_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/disassembler_riscv.hpp b/src/hotspot/cpu/riscv/disassembler_riscv.hpp ++#undef __ ++ ++#endif // COMPILER1 +diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp new file mode 100644 -index 00000000000..b0e5560c906 +index 0000000000..37bc183f39 --- /dev/null -+++ b/src/hotspot/cpu/riscv/disassembler_riscv.hpp -@@ -0,0 +1,58 @@ ++++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp +@@ -0,0 +1,78 @@ +/* -+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -14096,47 +13593,68 @@ index 00000000000..b0e5560c906 + * + */ + -+#ifndef CPU_RISCV_DISASSEMBLER_RISCV_HPP -+#define CPU_RISCV_DISASSEMBLER_RISCV_HPP ++#ifndef CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP ++#define CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP + -+static int pd_instruction_alignment() { -+ return 1; -+} ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" ++#include "utilities/macros.hpp" + -+static const char* pd_cpu_opts() { -+ return ""; -+} ++#ifdef COMPILER1 ++class LIR_Assembler; ++#endif ++class StubAssembler; ++class G1PreBarrierStub; ++class G1PostBarrierStub; + -+// Returns address of n-th instruction preceding addr, -+// NULL if no preceding instruction can be found. -+// On riscv, we assume a constant instruction length. -+// It might be beneficial to check "is_readable" as we do on ppc and s390. -+static address find_prev_instr(address addr, int n_instr) { -+ return addr - Assembler::instruction_size * n_instr; -+} ++class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { ++protected: ++ void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, RegSet saved_regs); ++ void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register start, Register count, Register tmp, RegSet saved_regs); + -+// special-case instruction decoding. -+// There may be cases where the binutils disassembler doesn't do -+// the perfect job. In those cases, decode_instruction0 may kick in -+// and do it right. -+// If nothing had to be done, just return "here", otherwise return "here + instr_len(here)" -+static address decode_instruction0(address here, outputStream* st, address virtual_begin = NULL) { -+ return here; -+} ++ void g1_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call); ++ ++ void g1_write_barrier_post(MacroAssembler* masm, ++ Register store_addr, ++ Register new_val, ++ Register thread, ++ Register tmp, ++ Register tmp2); + -+// platform-specific instruction annotations (like value of loaded constants) -+static void annotate(address pc, outputStream* st) {} ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); + -+#endif // CPU_RISCV_DISASSEMBLER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp b/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp ++public: ++#ifdef COMPILER1 ++ void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub); ++ void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub); ++ ++ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); ++ void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm); ++#endif ++ ++ void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread); ++}; ++ ++#endif // CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp new file mode 100644 -index 00000000000..5c700be9c91 +index 0000000000..8735fd014f --- /dev/null -+++ b/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp -@@ -0,0 +1,44 @@ ++++ b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp +@@ -0,0 +1,31 @@ +/* -+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -14159,73 +13677,21 @@ index 00000000000..5c700be9c91 + * + */ + -+#include "precompiled.hpp" -+#include "prims/foreign_globals.hpp" -+#include "utilities/debug.hpp" -+ -+// Stubbed out, implement later -+const ABIDescriptor ForeignGlobals::parse_abi_descriptor_impl(jobject jabi) const { -+ Unimplemented(); -+ return {}; -+} ++#ifndef CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP ++#define CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP + -+const BufferLayout ForeignGlobals::parse_buffer_layout_impl(jobject jlayout) const { -+ Unimplemented(); -+ return {}; -+} ++const size_t G1MergeHeapRootsPrefetchCacheSize = 16; + -+const CallRegs ForeignGlobals::parse_call_regs_impl(jobject jconv) const { -+ ShouldNotCallThis(); -+ return {}; -+} -diff --git a/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp b/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp ++#endif // CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp new file mode 100644 -index 00000000000..3ac89752c27 +index 0000000000..2b556b95d7 --- /dev/null -+++ b/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp -@@ -0,0 +1,32 @@ ++++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp +@@ -0,0 +1,231 @@ +/* -+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+#ifndef CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP -+#define CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP -+ -+class ABIDescriptor {}; -+class BufferLayout {}; -+ -+#endif // CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp -new file mode 100644 -index 00000000000..6e38960598a ---- /dev/null -+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp -@@ -0,0 +1,697 @@ -+/* -+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -14249,1147 +13715,1447 @@ index 00000000000..6e38960598a + */ + +#include "precompiled.hpp" -+#include "compiler/oopMap.hpp" -+#include "interpreter/interpreter.hpp" -+#include "memory/resourceArea.hpp" ++#include "classfile/classLoaderData.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "interpreter/interp_masm.hpp" +#include "memory/universe.hpp" -+#include "oops/markWord.hpp" -+#include "oops/method.hpp" -+#include "oops/oop.inline.hpp" -+#include "prims/methodHandles.hpp" -+#include "runtime/frame.inline.hpp" -+#include "runtime/handles.inline.hpp" -+#include "runtime/javaCalls.hpp" -+#include "runtime/monitorChunk.hpp" -+#include "runtime/os.inline.hpp" -+#include "runtime/signature.hpp" -+#include "runtime/stackWatermarkSet.hpp" -+#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/jniHandles.hpp" ++#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" -+#include "vmreg_riscv.inline.hpp" -+#ifdef COMPILER1 -+#include "c1/c1_Runtime1.hpp" -+#include "runtime/vframeArray.hpp" -+#endif ++#include "runtime/thread.hpp" + -+#ifdef ASSERT -+void RegisterMap::check_location_valid() { -+} -+#endif ++#define __ masm-> + ++void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread) { ++ assert_cond(masm != NULL); + -+// Profiling/safepoint support ++ // RA is live. It must be saved around calls. + -+bool frame::safe_for_sender(JavaThread *thread) { -+ address addr_sp = (address)_sp; -+ address addr_fp = (address)_fp; -+ address unextended_sp = (address)_unextended_sp; ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool in_native = (decorators & IN_NATIVE) != 0; ++ bool is_not_null = (decorators & IS_NOT_NULL) != 0; ++ switch (type) { ++ case T_OBJECT: // fall through ++ case T_ARRAY: { ++ if (in_heap) { ++ if (UseCompressedOops) { ++ __ lwu(dst, src); ++ if (is_not_null) { ++ __ decode_heap_oop_not_null(dst); ++ } else { ++ __ decode_heap_oop(dst); ++ } ++ } else { ++ __ ld(dst, src); ++ } ++ } else { ++ assert(in_native, "why else?"); ++ __ ld(dst, src); ++ } ++ break; ++ } ++ case T_BOOLEAN: __ load_unsigned_byte (dst, src); break; ++ case T_BYTE: __ load_signed_byte (dst, src); break; ++ case T_CHAR: __ load_unsigned_short(dst, src); break; ++ case T_SHORT: __ load_signed_short (dst, src); break; ++ case T_INT: __ lw (dst, src); break; ++ case T_LONG: __ ld (dst, src); break; ++ case T_ADDRESS: __ ld (dst, src); break; ++ case T_FLOAT: __ flw (f10, src); break; ++ case T_DOUBLE: __ fld (f10, src); break; ++ default: Unimplemented(); ++ } ++} + -+ // consider stack guards when trying to determine "safe" stack pointers -+ // sp must be within the usable part of the stack (not in guards) -+ if (!thread->is_in_usable_stack(addr_sp)) { -+ return false; ++void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ assert_cond(masm != NULL); ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool in_native = (decorators & IN_NATIVE) != 0; ++ switch (type) { ++ case T_OBJECT: // fall through ++ case T_ARRAY: { ++ val = val == noreg ? zr : val; ++ if (in_heap) { ++ if (UseCompressedOops) { ++ assert(!dst.uses(val), "not enough registers"); ++ if (val != zr) { ++ __ encode_heap_oop(val); ++ } ++ __ sw(val, dst); ++ } else { ++ __ sd(val, dst); ++ } ++ } else { ++ assert(in_native, "why else?"); ++ __ sd(val, dst); ++ } ++ break; ++ } ++ case T_BOOLEAN: ++ __ andi(val, val, 0x1); // boolean is true if LSB is 1 ++ __ sb(val, dst); ++ break; ++ case T_BYTE: __ sb(val, dst); break; ++ case T_CHAR: __ sh(val, dst); break; ++ case T_SHORT: __ sh(val, dst); break; ++ case T_INT: __ sw(val, dst); break; ++ case T_LONG: __ sd(val, dst); break; ++ case T_ADDRESS: __ sd(val, dst); break; ++ case T_FLOAT: __ fsw(f10, dst); break; ++ case T_DOUBLE: __ fsd(f10, dst); break; ++ default: Unimplemented(); + } + -+ // When we are running interpreted code the machine stack pointer, SP, is -+ // set low enough so that the Java expression stack can grow and shrink -+ // without ever exceeding the machine stack bounds. So, ESP >= SP. ++} + -+ // When we call out of an interpreted method, SP is incremented so that -+ // the space between SP and ESP is removed. The SP saved in the callee's -+ // frame is the SP *before* this increment. So, when we walk a stack of -+ // interpreter frames the sender's SP saved in a frame might be less than -+ // the SP at the point of call. ++void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath) { ++ assert_cond(masm != NULL); ++ // If mask changes we need to ensure that the inverse is still encodable as an immediate ++ STATIC_ASSERT(JNIHandles::weak_tag_mask == 1); ++ __ andi(obj, obj, ~JNIHandles::weak_tag_mask); ++ __ ld(obj, Address(obj, 0)); // *obj ++} + -+ // So unextended sp must be within the stack but we need not to check -+ // that unextended sp >= sp ++// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes. ++void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register tmp1, ++ Register tmp2, ++ Label& slow_case, ++ bool is_far) { ++ assert_cond(masm != NULL); ++ assert_different_registers(obj, tmp2); ++ assert_different_registers(obj, var_size_in_bytes); ++ Register end = tmp2; + -+ if (!thread->is_in_full_stack_checked(unextended_sp)) { -+ return false; ++ __ ld(obj, Address(xthread, JavaThread::tlab_top_offset())); ++ if (var_size_in_bytes == noreg) { ++ __ la(end, Address(obj, con_size_in_bytes)); ++ } else { ++ __ add(end, obj, var_size_in_bytes); + } ++ __ ld(t0, Address(xthread, JavaThread::tlab_end_offset())); ++ __ bgtu(end, t0, slow_case, is_far); + -+ // an fp must be within the stack and above (but not equal) sp -+ // second evaluation on fp+ is added to handle situation where fp is -1 -+ bool fp_safe = thread->is_in_stack_range_excl(addr_fp, addr_sp) && -+ thread->is_in_full_stack_checked(addr_fp + (return_addr_offset * sizeof(void*))); -+ -+ // We know sp/unextended_sp are safe only fp is questionable here -+ -+ // If the current frame is known to the code cache then we can attempt to -+ // to construct the sender and do some validation of it. This goes a long way -+ // toward eliminating issues when we get in frame construction code ++ // update the tlab top pointer ++ __ sd(end, Address(xthread, JavaThread::tlab_top_offset())); + -+ if (_cb != NULL) { ++ // recover var_size_in_bytes if necessary ++ if (var_size_in_bytes == end) { ++ __ sub(var_size_in_bytes, var_size_in_bytes, obj); ++ } ++} + -+ // First check if frame is complete and tester is reliable -+ // Unfortunately we can only check frame complete for runtime stubs and nmethod -+ // other generic buffer blobs are more problematic so we just assume they are -+ // ok. adapter blobs never have a frame complete and are never ok. ++// Defines obj, preserves var_size_in_bytes ++void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register tmp1, ++ Label& slow_case, ++ bool is_far) { ++ assert_cond(masm != NULL); ++ assert_different_registers(obj, var_size_in_bytes, tmp1); ++ if (!Universe::heap()->supports_inline_contig_alloc()) { ++ __ j(slow_case); ++ } else { ++ Register end = tmp1; ++ Label retry; ++ __ bind(retry); + -+ if (!_cb->is_frame_complete_at(_pc)) { -+ if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) { -+ return false; -+ } ++ // Get the current end of the heap ++ ExternalAddress address_end((address) Universe::heap()->end_addr()); ++ { ++ int32_t offset; ++ __ la_patchable(t1, address_end, offset); ++ __ ld(t1, Address(t1, offset)); + } + -+ // Could just be some random pointer within the codeBlob -+ if (!_cb->code_contains(_pc)) { -+ return false; ++ // Get the current top of the heap ++ ExternalAddress address_top((address) Universe::heap()->top_addr()); ++ { ++ int32_t offset; ++ __ la_patchable(t0, address_top, offset); ++ __ addi(t0, t0, offset); ++ __ lr_d(obj, t0, Assembler::aqrl); + } + -+ // Entry frame checks -+ if (is_entry_frame()) { -+ // an entry frame must have a valid fp. -+ return fp_safe && is_entry_frame_valid(thread); ++ // Adjust it my the size of our new object ++ if (var_size_in_bytes == noreg) { ++ __ la(end, Address(obj, con_size_in_bytes)); ++ } else { ++ __ add(end, obj, var_size_in_bytes); + } + -+ intptr_t* sender_sp = NULL; -+ intptr_t* sender_unextended_sp = NULL; -+ address sender_pc = NULL; -+ intptr_t* saved_fp = NULL; -+ -+ if (is_interpreted_frame()) { -+ // fp must be safe -+ if (!fp_safe) { -+ return false; -+ } -+ -+ sender_pc = (address)this->fp()[return_addr_offset]; -+ // for interpreted frames, the value below is the sender "raw" sp, -+ // which can be different from the sender unextended sp (the sp seen -+ // by the sender) because of current frame local variables -+ sender_sp = (intptr_t*) addr_at(sender_sp_offset); -+ sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset]; -+ saved_fp = (intptr_t*) this->fp()[link_offset]; -+ } else { -+ // must be some sort of compiled/runtime frame -+ // fp does not have to be safe (although it could be check for c1?) ++ // if end < obj then we wrapped around high memory ++ __ bltu(end, obj, slow_case, is_far); + -+ // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc -+ if (_cb->frame_size() <= 0) { -+ return false; -+ } ++ __ bgtu(end, t1, slow_case, is_far); + -+ sender_sp = _unextended_sp + _cb->frame_size(); -+ // Is sender_sp safe? -+ if (!thread->is_in_full_stack_checked((address)sender_sp)) { -+ return false; -+ } ++ // If heap_top hasn't been changed by some other thread, update it. ++ __ sc_d(t1, end, t0, Assembler::rl); ++ __ bnez(t1, retry); + -+ sender_unextended_sp = sender_sp; -+ sender_pc = (address) *(sender_sp - 1); -+ saved_fp = (intptr_t*) *(sender_sp - 2); -+ } ++ incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, tmp1); ++ } ++} + ++void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register tmp1) { ++ assert_cond(masm != NULL); ++ assert(tmp1->is_valid(), "need temp reg"); + -+ // If the potential sender is the interpreter then we can do some more checking -+ if (Interpreter::contains(sender_pc)) { ++ __ ld(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset()))); ++ if (var_size_in_bytes->is_valid()) { ++ __ add(tmp1, tmp1, var_size_in_bytes); ++ } else { ++ __ add(tmp1, tmp1, con_size_in_bytes); ++ } ++ __ sd(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset()))); ++} +diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp +new file mode 100644 +index 0000000000..984d94f4c3 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp +@@ -0,0 +1,76 @@ ++/* ++ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ // fp is always saved in a recognizable place in any code we generate. However -+ // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp -+ // is really a frame pointer. -+ if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) { -+ return false; -+ } ++#ifndef CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP ++#define CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP + -+ // construct the potential sender -+ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "memory/allocation.hpp" ++#include "oops/access.hpp" + -+ return sender.is_interpreted_frame_valid(thread); -+ } ++class BarrierSetAssembler: public CHeapObj { ++private: ++ void incr_allocated_bytes(MacroAssembler* masm, ++ Register var_size_in_bytes, int con_size_in_bytes, ++ Register t1 = noreg); + -+ // We must always be able to find a recognizable pc -+ CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc); -+ if (sender_pc == NULL || sender_blob == NULL) { -+ return false; -+ } ++public: ++ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register src, Register dst, Register count, RegSet saved_regs) {} ++ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register start, Register end, Register tmp, RegSet saved_regs) {} ++ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread); ++ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); + -+ // Could be a zombie method -+ if (sender_blob->is_zombie() || sender_blob->is_unloaded()) { -+ return false; -+ } ++ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath); + -+ // Could just be some random pointer within the codeBlob -+ if (!sender_blob->code_contains(sender_pc)) { -+ return false; -+ } ++ virtual void tlab_allocate(MacroAssembler* masm, ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register tmp1, // temp register ++ Register tmp2, // temp register ++ Label& slow_case, // continuation point if fast allocation fails ++ bool is_far = false ++ ); + -+ // We should never be able to see an adapter if the current frame is something from code cache -+ if (sender_blob->is_adapter_blob()) { -+ return false; -+ } ++ void eden_allocate(MacroAssembler* masm, ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register tmp1, // temp register ++ Label& slow_case, // continuation point if fast allocation fails ++ bool is_far = false ++ ); ++ virtual void barrier_stubs_init() {} + -+ // Could be the call_stub -+ if (StubRoutines::returns_to_call_stub(sender_pc)) { -+ if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) { -+ return false; -+ } ++ virtual ~BarrierSetAssembler() {} ++}; + -+ // construct the potential sender -+ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); ++#endif // CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp +new file mode 100644 +index 0000000000..81d47d61d4 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp +@@ -0,0 +1,125 @@ ++/* ++ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ // Validate the JavaCallWrapper an entry frame must have -+ address jcw = (address)sender.entry_frame_call_wrapper(); ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/cardTable.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "gc/shared/cardTableBarrierSetAssembler.hpp" ++#include "gc/shared/gc_globals.hpp" ++#include "interpreter/interp_masm.hpp" + -+ bool jcw_safe = (jcw < thread->stack_base()) && (jcw > (address)sender.fp()); ++#define __ masm-> + -+ return jcw_safe; -+ } + -+ CompiledMethod* nm = sender_blob->as_compiled_method_or_null(); -+ if (nm != NULL) { -+ if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) || -+ nm->method()->is_method_handle_intrinsic()) { -+ return false; -+ } -+ } ++void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Register tmp) { ++ assert_cond(masm != NULL); ++ assert_different_registers(obj, tmp); ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind"); + -+ // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size -+ // because the return address counts against the callee's frame. -+ if (sender_blob->frame_size() <= 0) { -+ assert(!sender_blob->is_compiled(), "should count return address at least"); -+ return false; -+ } ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); + -+ // We should never be able to see anything here except an nmethod. If something in the -+ // code cache (current frame) is called by an entity within the code cache that entity -+ // should not be anything but the call stub (already covered), the interpreter (already covered) -+ // or an nmethod. -+ if (!sender_blob->is_compiled()) { -+ return false; -+ } ++ __ srli(obj, obj, CardTable::card_shift); + -+ // Could put some more validation for the potential non-interpreted sender -+ // frame we'd create by calling sender if I could think of any. Wait for next crash in forte... ++ assert(CardTable::dirty_card_val() == 0, "must be"); + -+ // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb ++ __ load_byte_map_base(tmp); ++ __ add(tmp, obj, tmp); + -+ // We've validated the potential sender that would be created -+ return true; ++ if (UseCondCardMark) { ++ Label L_already_dirty; ++ __ membar(MacroAssembler::StoreLoad); ++ __ lbu(t1, Address(tmp)); ++ __ beqz(t1, L_already_dirty); ++ __ sb(zr, Address(tmp)); ++ __ bind(L_already_dirty); ++ } else { ++ if (ct->scanned_concurrently()) { ++ __ membar(MacroAssembler::StoreStore); ++ } ++ __ sb(zr, Address(tmp)); + } ++} + -+ // Must be native-compiled frame. Since sender will try and use fp to find -+ // linkages it must be safe -+ if (!fp_safe) { -+ return false; -+ } ++void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register start, Register count, Register tmp, RegSet saved_regs) { ++ assert_cond(masm != NULL); ++ assert_different_registers(start, tmp); ++ assert_different_registers(count, tmp); + -+ // Will the pc we fetch be non-zero (which we'll find at the oldest frame) -+ if ((address)this->fp()[return_addr_offset] == NULL) { return false; } ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); + -+ return true; -+} ++ Label L_loop, L_done; ++ const Register end = count; + -+void frame::patch_pc(Thread* thread, address pc) { -+ assert(_cb == CodeCache::find_blob(pc), "unexpected pc"); -+ address* pc_addr = &(((address*) sp())[-1]); -+ if (TracePcPatching) { -+ tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", -+ p2i(pc_addr), p2i(*pc_addr), p2i(pc)); -+ } -+ // Either the return address is the original one or we are going to -+ // patch in the same address that's already there. -+ assert(_pc == *pc_addr || pc == *pc_addr, "must be"); -+ *pc_addr = pc; -+ address original_pc = CompiledMethod::get_deopt_original_pc(this); -+ if (original_pc != NULL) { -+ assert(original_pc == _pc, "expected original PC to be stored before patching"); -+ _deopt_state = is_deoptimized; -+ // leave _pc as is -+ } else { -+ _deopt_state = not_deoptimized; -+ _pc = pc; ++ __ beqz(count, L_done); // zero count - nothing to do ++ // end = start + count << LogBytesPerHeapOop ++ __ shadd(end, count, start, count, LogBytesPerHeapOop); ++ __ sub(end, end, BytesPerHeapOop); // last element address to make inclusive ++ ++ __ srli(start, start, CardTable::card_shift); ++ __ srli(end, end, CardTable::card_shift); ++ __ sub(count, end, start); // number of bytes to copy ++ ++ __ load_byte_map_base(tmp); ++ __ add(start, start, tmp); ++ if (ct->scanned_concurrently()) { ++ __ membar(MacroAssembler::StoreStore); + } -+} + -+bool frame::is_interpreted_frame() const { -+ return Interpreter::contains(pc()); ++ __ bind(L_loop); ++ __ add(tmp, start, count); ++ __ sb(zr, Address(tmp)); ++ __ sub(count, count, 1); ++ __ bgez(count, L_loop); ++ __ bind(L_done); +} + -+int frame::frame_size(RegisterMap* map) const { -+ frame sender = this->sender(map); -+ return sender.sp() - sp(); -+} ++void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool is_array = (decorators & IS_ARRAY) != 0; ++ bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; ++ bool precise = is_array || on_anonymous; + -+intptr_t* frame::entry_frame_argument_at(int offset) const { -+ // convert offset to index to deal with tsi -+ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); -+ // Entry frame's arguments are always in relation to unextended_sp() -+ return &unextended_sp()[index]; ++ bool needs_post_barrier = val != noreg && in_heap; ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg); ++ if (needs_post_barrier) { ++ // flatten object address if needed ++ if (!precise || dst.offset() == 0) { ++ store_check(masm, dst.base(), x13); ++ } else { ++ assert_cond(masm != NULL); ++ __ la(x13, dst); ++ store_check(masm, x13, t0); ++ } ++ } +} +diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp +new file mode 100644 +index 0000000000..686fe8fa47 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp +@@ -0,0 +1,42 @@ ++/* ++ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+// sender_sp -+intptr_t* frame::interpreter_frame_sender_sp() const { -+ assert(is_interpreted_frame(), "interpreted frame expected"); -+ return (intptr_t*) at(interpreter_frame_sender_sp_offset); -+} ++#ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP ++#define CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP + -+void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) { -+ assert(is_interpreted_frame(), "interpreted frame expected"); -+ ptr_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp); -+} ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" + ++class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler { ++protected: ++ void store_check(MacroAssembler* masm, Register obj, Register tmp); + -+// monitor elements ++ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register start, Register count, Register tmp, RegSet saved_regs); ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++}; + -+BasicObjectLock* frame::interpreter_frame_monitor_begin() const { -+ return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset); -+} ++#endif // #ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp +new file mode 100644 +index 0000000000..7aa2015f9e +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp +@@ -0,0 +1,55 @@ ++/* ++ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+BasicObjectLock* frame::interpreter_frame_monitor_end() const { -+ BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset); -+ // make sure the pointer points inside the frame -+ assert(sp() <= (intptr_t*) result, "monitor end should be above the stack pointer"); -+ assert((intptr_t*) result < fp(), "monitor end should be strictly below the frame pointer"); -+ return result; -+} ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" + -+void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) { -+ *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value; -+} ++#define __ masm-> + -+// Used by template based interpreter deoptimization -+void frame::interpreter_frame_set_last_sp(intptr_t* last_sp) { -+ *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = last_sp; -+} ++void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register src, Register dst, Register count, RegSet saved_regs) { + -+frame frame::sender_for_entry_frame(RegisterMap* map) const { -+ assert(map != NULL, "map must be set"); -+ // Java frame called from C; skip all C frames and return top C -+ // frame of that chunk as the sender -+ JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor(); -+ assert(!entry_frame_is_first(), "next Java fp must be non zero"); -+ assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack"); -+ // Since we are walking the stack now this nested anchor is obviously walkable -+ // even if it wasn't when it was stacked. -+ if (!jfa->walkable()) { -+ // Capture _last_Java_pc (if needed) and mark anchor walkable. -+ jfa->capture_last_Java_pc(); ++ if (is_oop) { ++ gen_write_ref_array_pre_barrier(masm, decorators, dst, count, saved_regs); + } -+ map->clear(); -+ assert(map->include_argument_oops(), "should be set by clear"); -+ vmassert(jfa->last_Java_pc() != NULL, "not walkable"); -+ frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc()); -+ return fr; +} + -+OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const { -+ ShouldNotCallThis(); -+ return nullptr; ++void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register start, Register count, Register tmp, ++ RegSet saved_regs) { ++ if (is_oop) { ++ gen_write_ref_array_post_barrier(masm, decorators, start, count, tmp, saved_regs); ++ } +} + -+bool frame::optimized_entry_frame_is_first() const { -+ ShouldNotCallThis(); -+ return false; ++void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ if (is_reference_type(type)) { ++ oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2); ++ } else { ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); ++ } +} +diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp +new file mode 100644 +index 0000000000..00419c3163 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp +@@ -0,0 +1,55 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+frame frame::sender_for_optimized_entry_frame(RegisterMap* map) const { -+ ShouldNotCallThis(); -+ return {}; -+} ++#ifndef CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP ++#define CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP + -+//------------------------------------------------------------------------------ -+// frame::verify_deopt_original_pc -+// -+// Verifies the calculated original PC of a deoptimization PC for the -+// given unextended SP. -+#ifdef ASSERT -+void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) { -+ frame fr; ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" + -+ // This is ugly but it's better than to change {get,set}_original_pc -+ // to take an SP value as argument. And it's only a debugging -+ // method anyway. -+ fr._unextended_sp = unextended_sp; ++// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other ++// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected ++// accesses, which are overridden in the concrete BarrierSetAssembler. + -+ assert_cond(nm != NULL); -+ address original_pc = nm->get_original_pc(&fr); -+ assert(nm->insts_contains_inclusive(original_pc), -+ "original PC must be in the main code section of the the compiled method (or must be immediately following it)"); -+} -+#endif ++class ModRefBarrierSetAssembler: public BarrierSetAssembler { ++protected: ++ virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, RegSet saved_regs) {} ++ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register start, Register count, Register tmp, RegSet saved_regs) {} + -+//------------------------------------------------------------------------------ -+// frame::adjust_unextended_sp -+void frame::adjust_unextended_sp() { -+ // On riscv, sites calling method handle intrinsics and lambda forms are treated -+ // as any other call site. Therefore, no special action is needed when we are -+ // returning to any of these call sites. ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) = 0; + -+ if (_cb != NULL) { -+ CompiledMethod* sender_cm = _cb->as_compiled_method_or_null(); -+ if (sender_cm != NULL) { -+ // If the sender PC is a deoptimization point, get the original PC. -+ if (sender_cm->is_deopt_entry(_pc) || -+ sender_cm->is_deopt_mh_entry(_pc)) { -+ DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp)); -+ } -+ } -+ } -+} ++public: ++ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register src, Register dst, Register count, RegSet saved_regs); ++ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register start, Register count, Register tmp, RegSet saved_regs); ++ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++}; + -+//------------------------------------------------------------------------------ -+// frame::update_map_with_saved_link -+void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) { -+ // The interpreter and compiler(s) always save fp in a known -+ // location on entry. We must record where that location is -+ // so that if fp was live on callout from c2 we can find -+ // the saved copy no matter what it called. ++#endif // CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp +new file mode 100644 +index 0000000000..d19f5b859c +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp +@@ -0,0 +1,117 @@ ++/* ++ * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ // Since the interpreter always saves fp if we record where it is then -+ // we don't have to always save fp on entry and exit to c2 compiled -+ // code, on entry will be enough. -+ assert(map != NULL, "map must be set"); -+ map->set_location(::fp->as_VMReg(), (address) link_addr); -+ // this is weird "H" ought to be at a higher address however the -+ // oopMaps seems to have the "H" regs at the same address and the -+ // vanilla register. -+ map->set_location(::fp->as_VMReg()->next(), (address) link_addr); -+} ++#include "precompiled.hpp" ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "gc/shared/gc_globals.hpp" ++#include "gc/shenandoah/shenandoahBarrierSet.hpp" ++#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" ++#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" + ++#define __ masm->masm()-> + -+//------------------------------------------------------------------------------ -+// frame::sender_for_interpreter_frame -+frame frame::sender_for_interpreter_frame(RegisterMap* map) const { -+ // SP is the raw SP from the sender after adapter or interpreter -+ // extension. -+ intptr_t* sender_sp = this->sender_sp(); ++void LIR_OpShenandoahCompareAndSwap::emit_code(LIR_Assembler* masm) { ++ Register addr = _addr->as_register_lo(); ++ Register newval = _new_value->as_register(); ++ Register cmpval = _cmp_value->as_register(); ++ Register tmp1 = _tmp1->as_register(); ++ Register tmp2 = _tmp2->as_register(); ++ Register result = result_opr()->as_register(); + -+ // This is the sp before any possible extension (adapter/locals). -+ intptr_t* unextended_sp = interpreter_frame_sender_sp(); ++ ShenandoahBarrierSet::assembler()->iu_barrier(masm->masm(), newval, t1); + -+#ifdef COMPILER2 -+ assert(map != NULL, "map must be set"); -+ if (map->update_map()) { -+ update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset)); ++ if (UseCompressedOops) { ++ __ encode_heap_oop(tmp1, cmpval); ++ cmpval = tmp1; ++ __ encode_heap_oop(tmp2, newval); ++ newval = tmp2; + } -+#endif // COMPILER2 + -+ return frame(sender_sp, unextended_sp, link(), sender_pc()); ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /* acquire */ Assembler::aq, ++ /* release */ Assembler::rl, /* is_cae */ false, result); +} + ++#undef __ + -+//------------------------------------------------------------------------------ -+// frame::sender_for_compiled_frame -+frame frame::sender_for_compiled_frame(RegisterMap* map) const { -+ // we cannot rely upon the last fp having been saved to the thread -+ // in C2 code but it will have been pushed onto the stack. so we -+ // have to find it relative to the unextended sp -+ -+ assert(_cb->frame_size() >= 0, "must have non-zero frame size"); -+ intptr_t* l_sender_sp = unextended_sp() + _cb->frame_size(); -+ intptr_t* unextended_sp = l_sender_sp; ++#ifdef ASSERT ++#define __ gen->lir(__FILE__, __LINE__)-> ++#else ++#define __ gen->lir()-> ++#endif + -+ // the return_address is always the word on the stack -+ address sender_pc = (address) *(l_sender_sp + frame::return_addr_offset); ++LIR_Opr ShenandoahBarrierSetC1::atomic_cmpxchg_at_resolved(LIRAccess& access, LIRItem& cmp_value, LIRItem& new_value) { ++ BasicType bt = access.type(); ++ if (access.is_oop()) { ++ LIRGenerator *gen = access.gen(); ++ if (ShenandoahSATBBarrier) { ++ pre_barrier(gen, access.access_emit_info(), access.decorators(), access.resolved_addr(), ++ LIR_OprFact::illegalOpr /* pre_val */); ++ } ++ if (ShenandoahCASBarrier) { ++ cmp_value.load_item(); ++ new_value.load_item(); + -+ intptr_t** saved_fp_addr = (intptr_t**) (l_sender_sp + frame::link_offset); ++ LIR_Opr tmp1 = gen->new_register(T_OBJECT); ++ LIR_Opr tmp2 = gen->new_register(T_OBJECT); ++ LIR_Opr addr = access.resolved_addr()->as_address_ptr()->base(); ++ LIR_Opr result = gen->new_register(T_INT); + -+ assert(map != NULL, "map must be set"); -+ if (map->update_map()) { -+ // Tell GC to use argument oopmaps for some runtime stubs that need it. -+ // For C1, the runtime stub might not have oop maps, so set this flag -+ // outside of update_register_map. -+ map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); -+ if (_cb->oop_maps() != NULL) { -+ OopMapSet::update_register_map(this, map); ++ __ append(new LIR_OpShenandoahCompareAndSwap(addr, cmp_value.result(), new_value.result(), tmp1, tmp2, result)); ++ return result; + } -+ -+ // Since the prolog does the save and restore of FP there is no -+ // oopmap for it so we must fill in its location as if there was -+ // an oopmap entry since if our caller was compiled code there -+ // could be live jvm state in it. -+ update_map_with_saved_link(map, saved_fp_addr); + } -+ -+ return frame(l_sender_sp, unextended_sp, *saved_fp_addr, sender_pc); ++ return BarrierSetC1::atomic_cmpxchg_at_resolved(access, cmp_value, new_value); +} + -+//------------------------------------------------------------------------------ -+// frame::sender_raw -+frame frame::sender_raw(RegisterMap* map) const { -+ // Default is we done have to follow them. The sender_for_xxx will -+ // update it accordingly -+ assert(map != NULL, "map must be set"); -+ map->set_include_argument_oops(false); ++LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRItem& value) { ++ LIRGenerator* gen = access.gen(); ++ BasicType type = access.type(); + -+ if (is_entry_frame()) { -+ return sender_for_entry_frame(map); -+ } -+ if (is_interpreted_frame()) { -+ return sender_for_interpreter_frame(map); -+ } -+ assert(_cb == CodeCache::find_blob(pc()),"Must be the same"); ++ LIR_Opr result = gen->new_register(type); ++ value.load_item(); ++ LIR_Opr value_opr = value.result(); + -+ // This test looks odd: why is it not is_compiled_frame() ? That's -+ // because stubs also have OOP maps. -+ if (_cb != NULL) { -+ return sender_for_compiled_frame(map); ++ if (access.is_oop()) { ++ value_opr = iu_barrier(access.gen(), value_opr, access.access_emit_info(), access.decorators()); + } + -+ // Must be native-compiled frame, i.e. the marshaling code for native -+ // methods that exists in the core system. -+ return frame(sender_sp(), link(), sender_pc()); -+} -+ -+frame frame::sender(RegisterMap* map) const { -+ frame result = sender_raw(map); ++ assert(type == T_INT || is_reference_type(type) LP64_ONLY( || type == T_LONG ), "unexpected type"); ++ LIR_Opr tmp = gen->new_register(T_INT); ++ __ xchg(access.resolved_addr(), value_opr, result, tmp); + -+ if (map->process_frames()) { -+ StackWatermarkSet::on_iteration(map->thread(), result); ++ if (access.is_oop()) { ++ result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0)); ++ LIR_Opr tmp_opr = gen->new_register(type); ++ __ move(result, tmp_opr); ++ result = tmp_opr; ++ if (ShenandoahSATBBarrier) { ++ pre_barrier(access.gen(), access.access_emit_info(), access.decorators(), LIR_OprFact::illegalOpr, ++ result /* pre_val */); ++ } + } + + return result; +} +diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp +new file mode 100644 +index 0000000000..b8534c52e7 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp +@@ -0,0 +1,715 @@ ++/* ++ * Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+bool frame::is_interpreted_frame_valid(JavaThread* thread) const { -+ assert(is_interpreted_frame(), "Not an interpreted frame"); -+ // These are reasonable sanity checks -+ if (fp() == NULL || (intptr_t(fp()) & (wordSize-1)) != 0) { -+ return false; -+ } -+ if (sp() == NULL || (intptr_t(sp()) & (wordSize-1)) != 0) { -+ return false; -+ } -+ if (fp() + interpreter_frame_initial_sp_offset < sp()) { -+ return false; -+ } -+ // These are hacks to keep us out of trouble. -+ // The problem with these is that they mask other problems -+ if (fp() <= sp()) { // this attempts to deal with unsigned comparison above -+ return false; -+ } -+ -+ // do some validation of frame elements -+ -+ // first the method -+ Method* m = *interpreter_frame_method_addr(); -+ // validate the method we'd find in this potential sender -+ if (!Method::is_valid_method(m)) { -+ return false; -+ } ++#include "precompiled.hpp" ++#include "gc/shenandoah/shenandoahBarrierSet.hpp" ++#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" ++#include "gc/shenandoah/shenandoahForwarding.hpp" ++#include "gc/shenandoah/shenandoahHeap.hpp" ++#include "gc/shenandoah/shenandoahHeapRegion.hpp" ++#include "gc/shenandoah/shenandoahRuntime.hpp" ++#include "gc/shenandoah/shenandoahThreadLocalData.hpp" ++#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/thread.hpp" ++#ifdef COMPILER1 ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" ++#endif + -+ // stack frames shouldn't be much larger than max_stack elements -+ // this test requires the use of unextended_sp which is the sp as seen by -+ // the current frame, and not sp which is the "raw" pc which could point -+ // further because of local variables of the callee method inserted after -+ // method arguments -+ if (fp() - unextended_sp() > 1024 + m->max_stack()*Interpreter::stackElementSize) { -+ return false; -+ } ++#define __ masm-> + -+ // validate bci/bcx -+ address bcp = interpreter_frame_bcp(); -+ if (m->validate_bci_from_bcp(bcp) < 0) { -+ return false; -+ } ++address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL; + -+ // validate constantPoolCache* -+ ConstantPoolCache* cp = *interpreter_frame_cache_addr(); -+ if (MetaspaceObj::is_valid(cp) == false) { -+ return false; -+ } ++void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register src, Register dst, Register count, RegSet saved_regs) { ++ if (is_oop) { ++ bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; ++ if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahIUBarrier || ShenandoahLoadRefBarrier) { + -+ // validate locals -+ address locals = (address) *interpreter_frame_locals_addr(); -+ if (locals > thread->stack_base() || locals < (address) fp()) { -+ return false; -+ } ++ Label done; + -+ // We'd have to be pretty unlucky to be mislead at this point -+ return true; -+} ++ // Avoid calling runtime if count == 0 ++ __ beqz(count, done); + -+BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) { -+ assert(is_interpreted_frame(), "interpreted frame expected"); -+ Method* method = interpreter_frame_method(); -+ BasicType type = method->result_type(); ++ // Is GC active? ++ Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); ++ assert_different_registers(src, dst, count, t0); + -+ intptr_t* tos_addr = NULL; -+ if (method->is_native()) { -+ tos_addr = (intptr_t*)sp(); -+ if (type == T_FLOAT || type == T_DOUBLE) { -+ // This is because we do a push(ltos) after push(dtos) in generate_native_entry. -+ tos_addr += 2 * Interpreter::stackElementWords; -+ } -+ } else { -+ tos_addr = (intptr_t*)interpreter_frame_tos_address(); -+ } ++ __ lbu(t0, gc_state); ++ if (ShenandoahSATBBarrier && dest_uninitialized) { ++ __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED); ++ __ beqz(t0, done); ++ } else { ++ __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING); ++ __ beqz(t0, done); ++ } + -+ switch (type) { -+ case T_OBJECT : -+ case T_ARRAY : { -+ oop obj; -+ if (method->is_native()) { -+ obj = cast_to_oop(at(interpreter_frame_oop_temp_offset)); ++ __ push_reg(saved_regs, sp); ++ if (UseCompressedOops) { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry), ++ src, dst, count); + } else { -+ oop* obj_p = (oop*)tos_addr; -+ obj = (obj_p == NULL) ? (oop)NULL : *obj_p; ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), src, dst, count); + } -+ assert(Universe::is_in_heap_or_null(obj), "sanity check"); -+ *oop_result = obj; -+ break; -+ } -+ case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break; -+ case T_BYTE : value_result->b = *(jbyte*)tos_addr; break; -+ case T_CHAR : value_result->c = *(jchar*)tos_addr; break; -+ case T_SHORT : value_result->s = *(jshort*)tos_addr; break; -+ case T_INT : value_result->i = *(jint*)tos_addr; break; -+ case T_LONG : value_result->j = *(jlong*)tos_addr; break; -+ case T_FLOAT : { -+ value_result->f = *(jfloat*)tos_addr; -+ break; ++ __ pop_reg(saved_regs, sp); ++ __ bind(done); + } -+ case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break; -+ case T_VOID : /* Nothing to do */ break; -+ default : ShouldNotReachHere(); + } -+ -+ return type; -+} -+ -+ -+intptr_t* frame::interpreter_frame_tos_at(jint offset) const { -+ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); -+ return &interpreter_frame_tos_address()[index]; +} + -+#ifndef PRODUCT -+ -+#define DESCRIBE_FP_OFFSET(name) \ -+ values.describe(frame_no, fp() + frame::name##_offset, #name) -+ -+void frame::describe_pd(FrameValues& values, int frame_no) { -+ if (is_interpreted_frame()) { -+ DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp); -+ DESCRIBE_FP_OFFSET(interpreter_frame_last_sp); -+ DESCRIBE_FP_OFFSET(interpreter_frame_method); -+ DESCRIBE_FP_OFFSET(interpreter_frame_mdp); -+ DESCRIBE_FP_OFFSET(interpreter_frame_mirror); -+ DESCRIBE_FP_OFFSET(interpreter_frame_cache); -+ DESCRIBE_FP_OFFSET(interpreter_frame_locals); -+ DESCRIBE_FP_OFFSET(interpreter_frame_bcp); -+ DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp); ++void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call) { ++ if (ShenandoahSATBBarrier) { ++ satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call); + } +} -+#endif -+ -+intptr_t *frame::initial_deoptimization_info() { -+ // Not used on riscv, but we must return something. -+ return NULL; -+} + -+intptr_t* frame::real_fp() const { -+ if (_cb != NULL) { -+ // use the frame size if valid -+ int size = _cb->frame_size(); -+ if (size > 0) { -+ return unextended_sp() + size; -+ } -+ } -+ // else rely on fp() -+ assert(!is_compiled_frame(), "unknown compiled frame size"); -+ return fp(); -+} ++void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call) { ++ // If expand_call is true then we expand the call_VM_leaf macro ++ // directly to skip generating the check by ++ // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. ++ assert(thread == xthread, "must be"); + -+#undef DESCRIBE_FP_OFFSET ++ Label done; ++ Label runtime; + -+#ifndef PRODUCT -+// This is a generic constructor which is only used by pns() in debug.cpp. -+frame::frame(void* ptr_sp, void* ptr_fp, void* pc) { -+ init((intptr_t*)ptr_sp, (intptr_t*)ptr_fp, (address)pc); -+} ++ assert_different_registers(obj, pre_val, tmp, t0); ++ assert(pre_val != noreg && tmp != noreg, "expecting a register"); + -+#endif ++ Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset())); ++ Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); ++ Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); + -+void JavaFrameAnchor::make_walkable(JavaThread* thread) { -+ // last frame set? -+ if (last_Java_sp() == NULL) { return; } -+ // already walkable? -+ if (walkable()) { return; } -+ vmassert(Thread::current() == (Thread*)thread, "not current thread"); -+ vmassert(last_Java_sp() != NULL, "not called from Java code?"); -+ vmassert(last_Java_pc() == NULL, "already walkable"); -+ capture_last_Java_pc(); -+ vmassert(walkable(), "something went wrong"); -+} ++ // Is marking active? ++ if (in_bytes(ShenandoahSATBMarkQueue::byte_width_of_active()) == 4) { ++ __ lwu(tmp, in_progress); ++ } else { ++ assert(in_bytes(ShenandoahSATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ lbu(tmp, in_progress); ++ } ++ __ beqz(tmp, done); + -+void JavaFrameAnchor::capture_last_Java_pc() { -+ vmassert(_last_Java_sp != NULL, "no last frame set"); -+ vmassert(_last_Java_pc == NULL, "already walkable"); -+ _last_Java_pc = (address)_last_Java_sp[-1]; -+} -diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp -new file mode 100644 -index 00000000000..c06aaa9e391 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/frame_riscv.hpp -@@ -0,0 +1,202 @@ -+/* -+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ // Do we need to load the previous value? ++ if (obj != noreg) { ++ __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); ++ } + -+#ifndef CPU_RISCV_FRAME_RISCV_HPP -+#define CPU_RISCV_FRAME_RISCV_HPP ++ // Is the previous value null? ++ __ beqz(pre_val, done); + -+#include "runtime/synchronizer.hpp" ++ // Can we store original value in the thread's buffer? ++ // Is index == 0? ++ // (The index field is typed as size_t.) ++ __ ld(tmp, index); // tmp := *index_adr ++ __ beqz(tmp, runtime); // tmp == 0? If yes, goto runtime + -+// A frame represents a physical stack frame (an activation). Frames can be -+// C or Java frames, and the Java frames can be interpreted or compiled. -+// In contrast, vframes represent source-level activations, so that one physical frame -+// can correspond to multiple source level frames because of inlining. -+// A frame is comprised of {pc, fp, sp} -+// ------------------------------ Asm interpreter ---------------------------------------- -+// Layout of asm interpreter frame: -+// [expression stack ] * <- sp ++ __ sub(tmp, tmp, wordSize); // tmp := tmp - wordSize ++ __ sd(tmp, index); // *index_adr := tmp ++ __ ld(t0, buffer); ++ __ add(tmp, tmp, t0); // tmp := tmp + *buffer_adr + -+// [monitors[0] ] \ -+// ... | monitor block size = k -+// [monitors[k-1] ] / -+// [frame initial esp ] ( == &monitors[0], initially here) initial_sp_offset -+// [byte code index/pointr] = bcx() bcx_offset ++ // Record the previous value ++ __ sd(pre_val, Address(tmp, 0)); ++ __ j(done); + -+// [pointer to locals ] = locals() locals_offset -+// [constant pool cache ] = cache() cache_offset ++ __ bind(runtime); ++ // save the live input values ++ RegSet saved = RegSet::of(pre_val); ++ if (tosca_live) saved += RegSet::of(x10); ++ if (obj != noreg) saved += RegSet::of(obj); + -+// [klass of method ] = mirror() mirror_offset -+// [padding ] ++ __ push_reg(saved, sp); + -+// [methodData ] = mdp() mdx_offset -+// [Method ] = method() method_offset ++ // Calling the runtime using the regular call_VM_leaf mechanism generates ++ // code (generated by InterpreterMacroAssember::call_VM_leaf_base) ++ // that checks that the *(rfp+frame::interpreter_frame_last_sp) == NULL. ++ // ++ // If we care generating the pre-barrier without a frame (e.g. in the ++ // intrinsified Reference.get() routine) then ebp might be pointing to ++ // the caller frame and so this check will most likely fail at runtime. ++ // ++ // Expanding the call directly bypasses the generation of the check. ++ // So when we do not have have a full interpreter frame on the stack ++ // expand_call should be passed true. ++ if (expand_call) { ++ assert(pre_val != c_rarg1, "smashed arg"); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); ++ } else { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); ++ } + -+// [last esp ] = last_sp() last_sp_offset -+// [old stack pointer ] (sender_sp) sender_sp_offset ++ __ pop_reg(saved, sp); + -+// [old frame pointer ] -+// [return pc ] ++ __ bind(done); ++} + -+// [last sp ] <- fp = link() -+// [oop temp ] (only for native calls) ++void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) { ++ assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled"); + -+// [padding ] (to preserve machine SP alignment) -+// [locals and parameters ] -+// <- sender sp -+// ------------------------------ Asm interpreter ---------------------------------------- ++ Label is_null; ++ __ beqz(dst, is_null); ++ resolve_forward_pointer_not_null(masm, dst, tmp); ++ __ bind(is_null); ++} + -+// ------------------------------ C Frame ------------------------------------------------ -+// Stack: gcc with -fno-omit-frame-pointer -+// . -+// . -+// +-> . -+// | +-----------------+ | -+// | | return address | | -+// | | previous fp ------+ -+// | | saved registers | -+// | | local variables | -+// | | ... | <-+ -+// | +-----------------+ | -+// | | return address | | -+// +------ previous fp | | -+// | saved registers | | -+// | local variables | | -+// +-> | ... | | -+// | +-----------------+ | -+// | | return address | | -+// | | previous fp ------+ -+// | | saved registers | -+// | | local variables | -+// | | ... | <-+ -+// | +-----------------+ | -+// | | return address | | -+// +------ previous fp | | -+// | saved registers | | -+// | local variables | | -+// $fp --> | ... | | -+// +-----------------+ | -+// | return address | | -+// | previous fp ------+ -+// | saved registers | -+// $sp --> | local variables | -+// +-----------------+ -+// ------------------------------ C Frame ------------------------------------------------ ++// IMPORTANT: This must preserve all registers, even t0 and t1, except those explicitely ++// passed in. ++void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) { ++ assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled"); ++ // The below loads the mark word, checks if the lowest two bits are ++ // set, and if so, clear the lowest two bits and copy the result ++ // to dst. Otherwise it leaves dst alone. ++ // Implementing this is surprisingly awkward. I do it here by: ++ // - Inverting the mark word ++ // - Test lowest two bits == 0 ++ // - If so, set the lowest two bits ++ // - Invert the result back, and copy to dst ++ RegSet saved_regs = RegSet::of(t2); ++ bool borrow_reg = (tmp == noreg); ++ if (borrow_reg) { ++ // No free registers available. Make one useful. ++ tmp = t0; ++ if (tmp == dst) { ++ tmp = t1; ++ } ++ saved_regs += RegSet::of(tmp); ++ } + -+ public: -+ enum { -+ pc_return_offset = 0, -+ // All frames -+ link_offset = -2, -+ return_addr_offset = -1, -+ sender_sp_offset = 0, -+ // Interpreter frames -+ interpreter_frame_oop_temp_offset = 1, // for native calls only ++ assert_different_registers(tmp, dst, t2); ++ __ push_reg(saved_regs, sp); + -+ interpreter_frame_sender_sp_offset = -3, -+ // outgoing sp before a call to an invoked method -+ interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1, -+ interpreter_frame_method_offset = interpreter_frame_last_sp_offset - 1, -+ interpreter_frame_mdp_offset = interpreter_frame_method_offset - 1, -+ interpreter_frame_padding_offset = interpreter_frame_mdp_offset - 1, -+ interpreter_frame_mirror_offset = interpreter_frame_padding_offset - 1, -+ interpreter_frame_cache_offset = interpreter_frame_mirror_offset - 1, -+ interpreter_frame_locals_offset = interpreter_frame_cache_offset - 1, -+ interpreter_frame_bcp_offset = interpreter_frame_locals_offset - 1, -+ interpreter_frame_initial_sp_offset = interpreter_frame_bcp_offset - 1, ++ Label done; ++ __ ld(tmp, Address(dst, oopDesc::mark_offset_in_bytes())); ++ __ xori(tmp, tmp, -1); // eon with 0 is equivalent to XOR with -1 ++ __ andi(t2, tmp, markOopDesc::lock_mask_in_place); ++ __ bnez(t2, done); ++ __ ori(tmp, tmp, markOopDesc::marked_value); ++ __ xori(dst, tmp, -1); // eon with 0 is equivalent to XOR with -1 ++ __ bind(done); + -+ interpreter_frame_monitor_block_top_offset = interpreter_frame_initial_sp_offset, -+ interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset, ++ __ pop_reg(saved_regs, sp); ++} + -+ // Entry frames -+ // n.b. these values are determined by the layout defined in -+ // stubGenerator for the Java call stub -+ entry_frame_after_call_words = 22, -+ entry_frame_call_wrapper_offset = -10, ++void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, ++ Register dst, ++ Address load_addr) { ++ assert(ShenandoahLoadRefBarrier, "Should be enabled"); ++ assert(dst != t1 && load_addr.base() != t1, "need t1"); ++ assert_different_registers(load_addr.base(), t0, t1); + -+ // we don't need a save area -+ arg_reg_save_area_bytes = 0 -+ }; ++ Label done; ++ __ enter(); ++ Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); ++ __ lbu(t1, gc_state); + -+ intptr_t ptr_at(int offset) const { -+ return *ptr_at_addr(offset); -+ } ++ // Check for heap stability ++ __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED); ++ __ beqz(t1, done); + -+ void ptr_at_put(int offset, intptr_t value) { -+ *ptr_at_addr(offset) = value; ++ // use x11 for load address ++ Register result_dst = dst; ++ if (dst == x11) { ++ __ mv(t1, dst); ++ dst = t1; + } + -+ private: -+ // an additional field beyond _sp and _pc: -+ intptr_t* _fp; // frame pointer -+ // The interpreter and adapters will extend the frame of the caller. -+ // Since oopMaps are based on the sp of the caller before extension -+ // we need to know that value. However in order to compute the address -+ // of the return address we need the real "raw" sp. Since sparc already -+ // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's -+ // original sp we use that convention. -+ -+ intptr_t* _unextended_sp; -+ void adjust_unextended_sp(); -+ -+ intptr_t* ptr_at_addr(int offset) const { -+ return (intptr_t*) addr_at(offset); -+ } ++ // Save x10 and x11, unless it is an output register ++ RegSet saved_regs = RegSet::of(x10, x11) - result_dst; ++ __ push_reg(saved_regs, sp); ++ __ la(x11, load_addr); ++ __ mv(x10, dst); + -+#ifdef ASSERT -+ // Used in frame::sender_for_{interpreter,compiled}_frame -+ static void verify_deopt_original_pc( CompiledMethod* nm, intptr_t* unextended_sp); -+#endif ++ __ far_call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb()))); + -+ public: -+ // Constructors ++ __ mv(result_dst, x10); ++ __ pop_reg(saved_regs, sp); + -+ frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc); ++ __ bind(done); ++ __ leave(); ++} + -+ frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc); ++void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register dst, Register tmp) { ++ if (ShenandoahIUBarrier) { ++ __ push_call_clobbered_registers(); + -+ frame(intptr_t* ptr_sp, intptr_t* ptr_fp); ++ satb_write_barrier_pre(masm, noreg, dst, xthread, tmp, true, false); + -+ void init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc); ++ __ pop_call_clobbered_registers(); ++ } ++} + -+ // accessors for the instance variables -+ // Note: not necessarily the real 'frame pointer' (see real_fp) -+ intptr_t* fp() const { return _fp; } ++void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr) { ++ if (ShenandoahLoadRefBarrier) { ++ Label is_null; ++ __ beqz(dst, is_null); ++ load_reference_barrier_not_null(masm, dst, load_addr); ++ __ bind(is_null); ++ } ++} + -+ inline address* sender_pc_addr() const; ++// ++// Arguments: ++// ++// Inputs: ++// src: oop location to load from, might be clobbered ++// ++// Output: ++// dst: oop loaded from src location ++// ++// Kill: ++// x30 (tmp reg) ++// ++// Alias: ++// dst: x30 (might use x30 as temporary output register to avoid clobbering src) ++// ++void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, ++ DecoratorSet decorators, ++ BasicType type, ++ Register dst, ++ Address src, ++ Register tmp1, ++ Register tmp_thread) { ++ // 1: non-reference load, no additional barrier is needed ++ if (!is_reference_type(type)) { ++ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); ++ return; ++ } + -+ // expression stack tos if we are nested in a java call -+ intptr_t* interpreter_frame_last_sp() const; ++ // 2: load a reference from src location and apply LRB if needed ++ if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) { ++ Register result_dst = dst; + -+ // helper to update a map with callee-saved RBP -+ static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr); ++ // Preserve src location for LRB ++ RegSet saved_regs; ++ if (dst == src.base()) { ++ dst = (src.base() == x28) ? x29 : x28; ++ saved_regs = RegSet::of(dst); ++ __ push_reg(saved_regs, sp); ++ } ++ assert_different_registers(dst, src.base()); + -+ // deoptimization support -+ void interpreter_frame_set_last_sp(intptr_t* last_sp); ++ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); + -+ static jint interpreter_frame_expression_stack_direction() { return -1; } ++ load_reference_barrier(masm, dst, src); + -+ // returns the sending frame, without applying any barriers -+ frame sender_raw(RegisterMap* map) const; ++ if (dst != result_dst) { ++ __ mv(result_dst, dst); ++ dst = result_dst; ++ } + -+#endif // CPU_RISCV_FRAME_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp -new file mode 100644 -index 00000000000..5ac1bf57f57 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp -@@ -0,0 +1,248 @@ -+/* -+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ if (saved_regs.bits() != 0) { ++ __ pop_reg(saved_regs, sp); ++ } ++ } else { ++ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); ++ } + -+#ifndef CPU_RISCV_FRAME_RISCV_INLINE_HPP -+#define CPU_RISCV_FRAME_RISCV_INLINE_HPP ++ // 3: apply keep-alive barrier if needed ++ if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) { ++ __ enter(); ++ __ push_call_clobbered_registers(); ++ satb_write_barrier_pre(masm /* masm */, ++ noreg /* obj */, ++ dst /* pre_val */, ++ xthread /* thread */, ++ tmp1 /* tmp */, ++ true /* tosca_live */, ++ true /* expand_call */); ++ __ pop_call_clobbered_registers(); ++ __ leave(); ++ } ++} + -+#include "code/codeCache.hpp" -+#include "code/vmreg.inline.hpp" ++void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ bool on_oop = is_reference_type(type); ++ if (!on_oop) { ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); ++ return; ++ } + -+// Inline functions for RISCV frames: ++ // flatten object address if needed ++ if (dst.offset() == 0) { ++ if (dst.base() != x13) { ++ __ mv(x13, dst.base()); ++ } ++ } else { ++ __ la(x13, dst); ++ } + -+// Constructors: ++ shenandoah_write_barrier_pre(masm, ++ x13 /* obj */, ++ tmp2 /* pre_val */, ++ xthread /* thread */, ++ tmp1 /* tmp */, ++ val != noreg /* tosca_live */, ++ false /* expand_call */); + -+inline frame::frame() { -+ _pc = NULL; -+ _sp = NULL; -+ _unextended_sp = NULL; -+ _fp = NULL; -+ _cb = NULL; -+ _deopt_state = unknown; ++ if (val == noreg) { ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg); ++ } else { ++ iu_barrier(masm, val, tmp1); ++ // G1 barrier needs uncompressed oop for region cross check. ++ Register new_val = val; ++ if (UseCompressedOops) { ++ new_val = t1; ++ __ mv(new_val, val); ++ } ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg); ++ } +} + -+static int spin; ++void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath) { ++ Label done; ++ // Resolve jobject ++ BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath); + -+inline void frame::init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) { -+ intptr_t a = intptr_t(ptr_sp); -+ intptr_t b = intptr_t(ptr_fp); -+ _sp = ptr_sp; -+ _unextended_sp = ptr_sp; -+ _fp = ptr_fp; -+ _pc = pc; -+ assert(pc != NULL, "no pc?"); -+ _cb = CodeCache::find_blob(pc); -+ adjust_unextended_sp(); ++ // Check for null. ++ __ beqz(obj, done); + -+ address original_pc = CompiledMethod::get_deopt_original_pc(this); -+ if (original_pc != NULL) { -+ _pc = original_pc; -+ _deopt_state = is_deoptimized; -+ } else { -+ _deopt_state = not_deoptimized; -+ } -+} ++ assert(obj != t1, "need t1"); ++ Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset()); ++ __ lbu(t1, gc_state); + -+inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) { -+ init(ptr_sp, ptr_fp, pc); ++ // Check for heap in evacuation phase ++ __ andi(t0, t1, ShenandoahHeap::EVACUATION); ++ __ bnez(t0, slowpath); ++ ++ __ bind(done); +} + -+inline frame::frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc) { -+ intptr_t a = intptr_t(ptr_sp); -+ intptr_t b = intptr_t(ptr_fp); -+ _sp = ptr_sp; -+ _unextended_sp = unextended_sp; -+ _fp = ptr_fp; -+ _pc = pc; -+ assert(pc != NULL, "no pc?"); -+ _cb = CodeCache::find_blob(pc); -+ adjust_unextended_sp(); ++// Special Shenandoah CAS implementation that handles false negatives due ++// to concurrent evacuation. The service is more complex than a ++// traditional CAS operation because the CAS operation is intended to ++// succeed if the reference at addr exactly matches expected or if the ++// reference at addr holds a pointer to a from-space object that has ++// been relocated to the location named by expected. There are two ++// races that must be addressed: ++// a) A parallel thread may mutate the contents of addr so that it points ++// to a different object. In this case, the CAS operation should fail. ++// b) A parallel thread may heal the contents of addr, replacing a ++// from-space pointer held in addr with the to-space pointer ++// representing the new location of the object. ++// Upon entry to cmpxchg_oop, it is assured that new_val equals NULL ++// or it refers to an object that is not being evacuated out of ++// from-space, or it refers to the to-space version of an object that ++// is being evacuated out of from-space. ++// ++// By default the value held in the result register following execution ++// of the generated code sequence is 0 to indicate failure of CAS, ++// non-zero to indicate success. If is_cae, the result is the value most ++// recently fetched from addr rather than a boolean success indicator. ++// ++// Clobbers t0, t1 ++void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, ++ Register addr, ++ Register expected, ++ Register new_val, ++ Assembler::Aqrl acquire, ++ Assembler::Aqrl release, ++ bool is_cae, ++ Register result) { ++ bool is_narrow = UseCompressedOops; ++ Assembler::operand_size size = is_narrow ? Assembler::uint32 : Assembler::int64; + -+ address original_pc = CompiledMethod::get_deopt_original_pc(this); -+ if (original_pc != NULL) { -+ _pc = original_pc; -+ assert(_cb->as_compiled_method()->insts_contains_inclusive(_pc), -+ "original PC must be in the main code section of the the compiled method (or must be immediately following it)"); -+ _deopt_state = is_deoptimized; -+ } else { -+ _deopt_state = not_deoptimized; ++ assert_different_registers(addr, expected, t0, t1); ++ assert_different_registers(addr, new_val, t0, t1); ++ ++ Label retry, success, fail, done; ++ ++ __ bind(retry); ++ ++ // Step1: Try to CAS. ++ __ cmpxchg(addr, expected, new_val, size, acquire, release, /* result */ t1); ++ ++ // If success, then we are done. ++ __ beq(expected, t1, success); ++ ++ // Step2: CAS failed, check the forwared pointer. ++ __ mv(t0, t1); ++ ++ if (is_narrow) { ++ __ decode_heap_oop(t0, t0); + } -+} ++ resolve_forward_pointer(masm, t0); + -+inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp) { -+ intptr_t a = intptr_t(ptr_sp); -+ intptr_t b = intptr_t(ptr_fp); -+ _sp = ptr_sp; -+ _unextended_sp = ptr_sp; -+ _fp = ptr_fp; -+ _pc = (address)(ptr_sp[-1]); ++ __ encode_heap_oop(t0, t0); + -+ // Here's a sticky one. This constructor can be called via AsyncGetCallTrace -+ // when last_Java_sp is non-null but the pc fetched is junk. If we are truly -+ // unlucky the junk value could be to a zombied method and we'll die on the -+ // find_blob call. This is also why we can have no asserts on the validity -+ // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler -+ // -> pd_last_frame should use a specialized version of pd_last_frame which could -+ // call a specilaized frame constructor instead of this one. -+ // Then we could use the assert below. However this assert is of somewhat dubious -+ // value. ++ // Report failure when the forwarded oop was not expected. ++ __ bne(t0, expected, fail); + -+ _cb = CodeCache::find_blob(_pc); -+ adjust_unextended_sp(); ++ // Step 3: CAS again using the forwarded oop. ++ __ cmpxchg(addr, t1, new_val, size, acquire, release, /* result */ t0); + -+ address original_pc = CompiledMethod::get_deopt_original_pc(this); -+ if (original_pc != NULL) { -+ _pc = original_pc; -+ _deopt_state = is_deoptimized; ++ // Retry when failed. ++ __ bne(t0, t1, retry); ++ ++ __ bind(success); ++ if (is_cae) { ++ __ mv(result, expected); + } else { -+ _deopt_state = not_deoptimized; ++ __ addi(result, zr, 1); + } -+} ++ __ j(done); + -+// Accessors ++ __ bind(fail); ++ if (is_cae) { ++ __ mv(result, t0); ++ } else { ++ __ mv(result, zr); ++ } + -+inline bool frame::equal(frame other) const { -+ bool ret = sp() == other.sp() && -+ unextended_sp() == other.unextended_sp() && -+ fp() == other.fp() && -+ pc() == other.pc(); -+ assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction"); -+ return ret; ++ __ bind(done); +} + -+// Return unique id for this frame. The id must have a value where we can distinguish -+// identity and younger/older relationship. NULL represents an invalid (incomparable) -+// frame. -+inline intptr_t* frame::id(void) const { return unextended_sp(); } ++#undef __ + -+// Return true if the frame is older (less recent activation) than the frame represented by id -+inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); -+ return this->id() > id ; } ++#ifdef COMPILER1 + -+inline intptr_t* frame::link() const { return (intptr_t*) *(intptr_t **)addr_at(link_offset); } ++#define __ ce->masm()-> + -+inline intptr_t* frame::link_or_null() const { -+ intptr_t** ptr = (intptr_t **)addr_at(link_offset); -+ return os::is_readable_pointer(ptr) ? *ptr : NULL; -+} ++void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) { ++ ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); ++ // At this point we know that marking is in progress. ++ // If do_load() is true then we have to emit the ++ // load of the previous value; otherwise it has already ++ // been loaded into _pre_val. ++ __ bind(*stub->entry()); + -+inline intptr_t* frame::unextended_sp() const { return _unextended_sp; } ++ assert(stub->pre_val()->is_register(), "Precondition."); + -+// Return address -+inline address* frame::sender_pc_addr() const { return (address*) addr_at(return_addr_offset); } -+inline address frame::sender_pc() const { return *sender_pc_addr(); } -+inline intptr_t* frame::sender_sp() const { return addr_at(sender_sp_offset); } ++ Register pre_val_reg = stub->pre_val()->as_register(); + -+inline intptr_t** frame::interpreter_frame_locals_addr() const { -+ return (intptr_t**)addr_at(interpreter_frame_locals_offset); ++ if (stub->do_load()) { ++ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/); ++ } ++ __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true); ++ ce->store_parameter(stub->pre_val()->as_register(), 0); ++ __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); ++ __ j(*stub->continuation()); +} + -+inline intptr_t* frame::interpreter_frame_last_sp() const { -+ return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset); -+} ++void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ++ ShenandoahLoadReferenceBarrierStub* stub) { ++ ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); ++ __ bind(*stub->entry()); + -+inline intptr_t* frame::interpreter_frame_bcp_addr() const { -+ return (intptr_t*)addr_at(interpreter_frame_bcp_offset); -+} ++ Register obj = stub->obj()->as_register(); ++ Register res = stub->result()->as_register(); ++ Register addr = stub->addr()->as_pointer_register(); ++ Register tmp1 = stub->tmp1()->as_register(); ++ Register tmp2 = stub->tmp2()->as_register(); + -+inline intptr_t* frame::interpreter_frame_mdp_addr() const { -+ return (intptr_t*)addr_at(interpreter_frame_mdp_offset); -+} ++ assert(res == x10, "result must arrive in x10"); ++ assert_different_registers(tmp1, tmp2, t0); + ++ if (res != obj) { ++ __ mv(res, obj); ++ } + -+// Constant pool cache ++ // Check for null. ++ __ beqz(res, *stub->continuation(), /* is_far */ true); + -+inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { -+ return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset); -+} ++ // Check for object in cset. ++ __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr()); ++ __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint()); ++ __ add(t0, tmp2, tmp1); ++ __ lb(tmp2, Address(t0)); ++ __ beqz(tmp2, *stub->continuation(), /* is_far */ true); + -+// Method ++ // Check if object is already forwarded. ++ Label slow_path; ++ __ ld(tmp1, Address(res, oopDesc::mark_offset_in_bytes())); ++ __ xori(tmp1, tmp1, -1); ++ __ andi(t0, tmp1, markOopDesc::lock_mask_in_place); ++ __ bnez(t0, slow_path); + -+inline Method** frame::interpreter_frame_method_addr() const { -+ return (Method**)addr_at(interpreter_frame_method_offset); -+} ++ // Decode forwarded object. ++ __ ori(tmp1, tmp1, markOopDesc::marked_value); ++ __ xori(res, tmp1, -1); ++ __ j(*stub->continuation()); + -+// Mirror ++ __ bind(slow_path); ++ ce->store_parameter(res, 0); ++ ce->store_parameter(addr, 1); ++ __ far_call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin())); + -+inline oop* frame::interpreter_frame_mirror_addr() const { -+ return (oop*)addr_at(interpreter_frame_mirror_offset); ++ __ j(*stub->continuation()); +} + -+// top of expression stack -+inline intptr_t* frame::interpreter_frame_tos_address() const { -+ intptr_t* last_sp = interpreter_frame_last_sp(); -+ if (last_sp == NULL) { -+ return sp(); -+ } else { -+ // sp() may have been extended or shrunk by an adapter. At least -+ // check that we don't fall behind the legal region. -+ // For top deoptimized frame last_sp == interpreter_frame_monitor_end. -+ assert(last_sp <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos"); -+ return last_sp; -+ } -+} ++#undef __ + -+inline oop* frame::interpreter_frame_temp_oop_addr() const { -+ return (oop *)(fp() + interpreter_frame_oop_temp_offset); -+} ++#define __ sasm-> + -+inline int frame::interpreter_frame_monitor_size() { -+ return BasicObjectLock::size(); -+} ++void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { ++ __ prologue("shenandoah_pre_barrier", false); + ++ // arg0 : previous value of memory + -+// expression stack -+// (the max_stack arguments are used by the GC; see class FrameClosure) ++ BarrierSet* bs = BarrierSet::barrier_set(); + -+inline intptr_t* frame::interpreter_frame_expression_stack() const { -+ intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end(); -+ return monitor_end-1; -+} ++ const Register pre_val = x10; ++ const Register thread = xthread; ++ const Register tmp = t0; + ++ Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); ++ Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); + -+// Entry frames ++ Label done; ++ Label runtime; + -+inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { -+ return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset); ++ // Is marking still active? ++ Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); ++ __ lb(tmp, gc_state); ++ __ andi(tmp, tmp, ShenandoahHeap::MARKING); ++ __ beqz(tmp, done); ++ ++ // Can we store original value in the thread's buffer? ++ __ ld(tmp, queue_index); ++ __ beqz(tmp, runtime); ++ ++ __ sub(tmp, tmp, wordSize); ++ __ sd(tmp, queue_index); ++ __ ld(t1, buffer); ++ __ add(tmp, tmp, t1); ++ __ load_parameter(0, t1); ++ __ sd(t1, Address(tmp, 0)); ++ __ j(done); ++ ++ __ bind(runtime); ++ __ push_call_clobbered_registers(); ++ __ load_parameter(0, pre_val); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); ++ __ pop_call_clobbered_registers(); ++ __ bind(done); ++ ++ __ epilogue(); +} + ++void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm) { ++ __ prologue("shenandoah_load_reference_barrier", false); ++ // arg0 : object to be resolved + -+// Compiled frames -+PRAGMA_DIAG_PUSH -+PRAGMA_NONNULL_IGNORED -+inline oop frame::saved_oop_result(RegisterMap* map) const { -+ oop* result_adr = (oop *)map->location(x10->as_VMReg()); -+ guarantee(result_adr != NULL, "bad register save location"); -+ return (*result_adr); ++ __ push_call_clobbered_registers(); ++ __ load_parameter(0, x10); ++ __ load_parameter(1, x11); ++ ++ if (UseCompressedOops) { ++ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow)); ++ } else { ++ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier)); ++ } ++ __ jalr(ra); ++ __ mv(t0, x10); ++ __ pop_call_clobbered_registers(); ++ __ mv(x10, t0); ++ ++ __ epilogue(); +} + -+inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { -+ oop* result_adr = (oop *)map->location(x10->as_VMReg()); -+ guarantee(result_adr != NULL, "bad register save location"); -+ *result_adr = obj; ++#undef __ ++ ++#endif // COMPILER1 ++ ++address ShenandoahBarrierSetAssembler::shenandoah_lrb() { ++ assert(_shenandoah_lrb != NULL, "need load reference barrier stub"); ++ return _shenandoah_lrb; +} -+PRAGMA_DIAG_POP + -+#endif // CPU_RISCV_FRAME_RISCV_INLINE_HPP -diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp -new file mode 100644 -index 00000000000..1c46b3947d3 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp -@@ -0,0 +1,484 @@ -+/* -+ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++#define __ cgen->assembler()-> ++ ++// Shenandoah load reference barrier. ++// ++// Input: ++// x10: OOP to evacuate. Not null. ++// x11: load address ++// ++// Output: ++// x10: Pointer to evacuated OOP. ++// ++// Trash t0 t1 Preserve everything else. ++address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) { ++ __ align(6); ++ StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb"); ++ address start = __ pc(); ++ ++ Label slow_path; ++ __ mv(t1, ShenandoahHeap::in_cset_fast_test_addr()); ++ __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint()); ++ __ add(t1, t1, t0); ++ __ lbu(t1, Address(t1, 0)); ++ __ andi(t0, t1, 1); ++ __ bnez(t0, slow_path); ++ __ ret(); ++ ++ __ bind(slow_path); ++ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ ++ __ push_call_clobbered_registers(); ++ ++ if (UseCompressedOops) { ++ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow)); ++ } else { ++ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier)); ++ } ++ __ jalr(ra); ++ __ mv(t0, x10); ++ __ pop_call_clobbered_registers(); ++ __ mv(x10, t0); ++ ++ __ leave(); // required for proper stackwalking of RuntimeStub frame ++ __ ret(); ++ ++ return start; ++} ++ ++#undef __ ++ ++void ShenandoahBarrierSetAssembler::barrier_stubs_init() { ++ if (ShenandoahLoadRefBarrier) { ++ int stub_code_size = 2048; ++ ResourceMark rm; ++ BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size); ++ CodeBuffer buf(bb); ++ StubCodeGenerator cgen(&buf); ++ _shenandoah_lrb = generate_shenandoah_lrb(&cgen); ++ } ++} +diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp +new file mode 100644 +index 0000000000..5d75035e9d +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp +@@ -0,0 +1,97 @@ ++/* ++ * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -15412,474 +15178,545 @@ index 00000000000..1c46b3947d3 + * + */ + -+#include "precompiled.hpp" -+#include "asm/macroAssembler.inline.hpp" -+#include "gc/g1/g1BarrierSet.hpp" -+#include "gc/g1/g1BarrierSetAssembler.hpp" -+#include "gc/g1/g1BarrierSetRuntime.hpp" -+#include "gc/g1/g1CardTable.hpp" -+#include "gc/g1/g1ThreadLocalData.hpp" -+#include "gc/g1/heapRegion.hpp" -+#include "gc/shared/collectedHeap.hpp" -+#include "interpreter/interp_masm.hpp" -+#include "runtime/sharedRuntime.hpp" -+#include "runtime/thread.hpp" ++#ifndef CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP ++#define CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shenandoah/shenandoahBarrierSet.hpp" +#ifdef COMPILER1 -+#include "c1/c1_LIRAssembler.hpp" -+#include "c1/c1_MacroAssembler.hpp" -+#include "gc/g1/c1/g1BarrierSetC1.hpp" ++class LIR_Assembler; ++class ShenandoahPreBarrierStub; ++class ShenandoahLoadReferenceBarrierStub; ++class StubAssembler; +#endif ++class StubCodeGenerator; + -+#define __ masm-> ++class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { ++private: + -+void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, -+ Register addr, Register count, RegSet saved_regs) { -+ assert_cond(masm != NULL); -+ bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; -+ if (!dest_uninitialized) { -+ Label done; -+ Address in_progress(xthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); ++ static address _shenandoah_lrb; + -+ // Is marking active? -+ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { -+ __ lwu(t0, in_progress); -+ } else { -+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); -+ __ lbu(t0, in_progress); -+ } -+ __ beqz(t0, done); ++ void satb_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call); ++ void shenandoah_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call); + -+ __ push_reg(saved_regs, sp); -+ if (count == c_rarg0) { -+ if (addr == c_rarg1) { -+ // exactly backwards!! -+ __ mv(t0, c_rarg0); -+ __ mv(c_rarg0, c_rarg1); -+ __ mv(c_rarg1, t0); -+ } else { -+ __ mv(c_rarg1, count); -+ __ mv(c_rarg0, addr); -+ } -+ } else { -+ __ mv(c_rarg0, addr); -+ __ mv(c_rarg1, count); -+ } -+ if (UseCompressedOops) { -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2); -+ } else { -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2); -+ } -+ __ pop_reg(saved_regs, sp); ++ void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg); ++ void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg); ++ void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr); ++ void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr); + -+ __ bind(done); -+ } -+} ++ address generate_shenandoah_lrb(StubCodeGenerator* cgen); + -+void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, -+ Register start, Register count, Register tmp, RegSet saved_regs) { -+ assert_cond(masm != NULL); -+ __ push_reg(saved_regs, sp); -+ assert_different_registers(start, count, tmp); -+ assert_different_registers(c_rarg0, count); -+ __ mv(c_rarg0, start); -+ __ mv(c_rarg1, count); -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2); -+ __ pop_reg(saved_regs, sp); -+} ++public: + -+void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, -+ Register obj, -+ Register pre_val, -+ Register thread, -+ Register tmp, -+ bool tosca_live, -+ bool expand_call) { -+ // If expand_call is true then we expand the call_VM_leaf macro -+ // directly to skip generating the check by -+ // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. ++ static address shenandoah_lrb(); + -+ assert_cond(masm != NULL); -+ assert(thread == xthread, "must be"); ++ void iu_barrier(MacroAssembler* masm, Register dst, Register tmp); + -+ Label done; -+ Label runtime; ++#ifdef COMPILER1 ++ void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub); ++ void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub); ++ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); ++ void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm); ++#endif + -+ assert_different_registers(obj, pre_val, tmp, t0); -+ assert(pre_val != noreg && tmp != noreg, "expecting a register"); ++ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register src, Register dst, Register count, RegSet saved_regs); + -+ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); -+ Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); -+ Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); ++ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread); ++ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); + -+ // Is marking active? -+ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width -+ __ lwu(tmp, in_progress); -+ } else { -+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); -+ __ lbu(tmp, in_progress); -+ } -+ __ beqz(tmp, done); ++ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath); + -+ // Do we need to load the previous value? -+ if (obj != noreg) { -+ __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); -+ } ++ virtual void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result); + -+ // Is the previous value null? -+ __ beqz(pre_val, done); ++ virtual void barrier_stubs_init(); ++}; + -+ // Can we store original value in the thread's buffer? -+ // Is index == 0? -+ // (The index field is typed as size_t.) ++#endif // CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad +new file mode 100644 +index 0000000000..bab407a8b7 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad +@@ -0,0 +1,197 @@ ++// ++// Copyright (c) 2018, Red Hat, Inc. All rights reserved. ++// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// + -+ __ ld(tmp, index); // tmp := *index_adr -+ __ beqz(tmp, runtime); // tmp == 0? -+ // If yes, goto runtime ++source_hpp %{ ++#include "gc/shenandoah/shenandoahBarrierSet.hpp" ++#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" ++%} + -+ __ sub(tmp, tmp, wordSize); // tmp := tmp - wordSize -+ __ sd(tmp, index); // *index_adr := tmp -+ __ ld(t0, buffer); -+ __ add(tmp, tmp, t0); // tmp := tmp + *buffer_adr ++instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ ++ match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); + -+ // Record the previous value -+ __ sd(pre_val, Address(tmp, 0)); -+ __ j(done); ++ effect(TEMP tmp, KILL cr); + -+ __ bind(runtime); -+ // save the live input values -+ RegSet saved = RegSet::of(pre_val); -+ if (tosca_live) { saved += RegSet::of(x10); } -+ if (obj != noreg) { saved += RegSet::of(obj); } ++ format %{ ++ "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapP_shenandoah" ++ %} + -+ __ push_reg(saved, sp); ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, ++ false /* is_cae */, $res$$Register); ++ %} + -+ if (expand_call) { -+ assert(pre_val != c_rarg1, "smashed arg"); -+ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); -+ } else { -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); -+ } ++ ins_pipe(pipe_slow); ++%} + -+ __ pop_reg(saved, sp); ++instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ ++ match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); + -+ __ bind(done); ++ effect(TEMP tmp, KILL cr); + -+} ++ format %{ ++ "cmpxchgw_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapN_shenandoah" ++ %} + -+void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, -+ Register store_addr, -+ Register new_val, -+ Register thread, -+ Register tmp, -+ Register tmp2) { -+ assert_cond(masm != NULL); -+ assert(thread == xthread, "must be"); -+ assert_different_registers(store_addr, new_val, thread, tmp, tmp2, -+ t0); -+ assert(store_addr != noreg && new_val != noreg && tmp != noreg && -+ tmp2 != noreg, "expecting a register"); ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, ++ false /* is_cae */, $res$$Register); ++ %} + -+ Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); -+ Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); ++ ins_pipe(pipe_slow); ++%} + -+ BarrierSet* bs = BarrierSet::barrier_set(); -+ CardTableBarrierSet* ctbs = barrier_set_cast(bs); -+ CardTable* ct = ctbs->card_table(); ++instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ ++ predicate(needs_acquiring_load_reserved(n)); ++ match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); + -+ Label done; -+ Label runtime; ++ effect(TEMP tmp, KILL cr); + -+ // Does store cross heap regions? ++ format %{ ++ "cmpxchg_acq_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapPAcq_shenandoah" ++ %} + -+ __ xorr(tmp, store_addr, new_val); -+ __ srli(tmp, tmp, HeapRegion::LogOfHRGrainBytes); -+ __ beqz(tmp, done); ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, ++ false /* is_cae */, $res$$Register); ++ %} + -+ // crosses regions, storing NULL? ++ ins_pipe(pipe_slow); ++%} + -+ __ beqz(new_val, done); ++instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ ++ predicate(needs_acquiring_load_reserved(n)); ++ match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); + -+ // storing region crossing non-NULL, is card already dirty? ++ effect(TEMP tmp, KILL cr); + -+ ExternalAddress cardtable((address) ct->byte_map_base()); -+ const Register card_addr = tmp; ++ format %{ ++ "cmpxchgw_acq_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapNAcq_shenandoah" ++ %} + -+ __ srli(card_addr, store_addr, CardTable::card_shift()); ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, ++ false /* is_cae */, $res$$Register); ++ %} + -+ // get the address of the card -+ __ load_byte_map_base(tmp2); -+ __ add(card_addr, card_addr, tmp2); -+ __ lbu(tmp2, Address(card_addr)); -+ __ mv(t0, (int)G1CardTable::g1_young_card_val()); -+ __ beq(tmp2, t0, done); ++ ins_pipe(pipe_slow); ++%} + -+ assert((int)CardTable::dirty_card_val() == 0, "must be 0"); ++instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ ++ match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); ++ effect(TEMP_DEF res, TEMP tmp, KILL cr); + -+ __ membar(MacroAssembler::StoreLoad); ++ format %{ ++ "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN_shenandoah" ++ %} + -+ __ lbu(tmp2, Address(card_addr)); -+ __ beqz(tmp2, done); ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, ++ true /* is_cae */, $res$$Register); ++ %} + -+ // storing a region crossing, non-NULL oop, card is clean. -+ // dirty card and log. ++ ins_pipe(pipe_slow); ++%} + -+ __ sb(zr, Address(card_addr)); ++instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ ++ match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); + -+ __ ld(t0, queue_index); -+ __ beqz(t0, runtime); -+ __ sub(t0, t0, wordSize); -+ __ sd(t0, queue_index); ++ effect(TEMP_DEF res, TEMP tmp, KILL cr); ++ format %{ ++ "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndExchangeP_shenandoah" ++ %} + -+ __ ld(tmp2, buffer); -+ __ add(t0, tmp2, t0); -+ __ sd(card_addr, Address(t0, 0)); -+ __ j(done); ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, ++ true /* is_cae */, $res$$Register); ++ %} + -+ __ bind(runtime); -+ // save the live input values -+ RegSet saved = RegSet::of(store_addr); -+ __ push_reg(saved, sp); -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); -+ __ pop_reg(saved, sp); ++ ins_pipe(pipe_slow); ++%} + -+ __ bind(done); -+} ++instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ ++ match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); + -+void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Register dst, Address src, Register tmp1, Register tmp_thread) { -+ assert_cond(masm != NULL); -+ bool on_oop = is_reference_type(type); -+ bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; -+ bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; -+ bool on_reference = on_weak || on_phantom; -+ ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); -+ if (on_oop && on_reference) { -+ // RA is live. It must be saved around calls. -+ __ enter(); // barrier may call runtime -+ // Generate the G1 pre-barrier code to log the value of -+ // the referent field in an SATB buffer. -+ g1_write_barrier_pre(masm /* masm */, -+ noreg /* obj */, -+ dst /* pre_val */, -+ xthread /* thread */, -+ tmp1 /* tmp */, -+ true /* tosca_live */, -+ true /* expand_call */); -+ __ leave(); -+ } -+} ++ effect(TEMP tmp, KILL cr); ++ format %{ ++ "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapN_shenandoah" ++ "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)" ++ %} + -+void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2) { -+ assert_cond(masm != NULL); -+ // flatten object address if needed -+ if (dst.offset() == 0) { -+ if (dst.base() != x13) { -+ __ mv(x13, dst.base()); -+ } -+ } else { -+ __ la(x13, dst); -+ } ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. ++ // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, ++ false /* is_cae */, $res$$Register); ++ %} + -+ g1_write_barrier_pre(masm, -+ x13 /* obj */, -+ tmp2 /* pre_val */, -+ xthread /* thread */, -+ tmp1 /* tmp */, -+ val != noreg /* tosca_live */, -+ false /* expand_call */); ++ ins_pipe(pipe_slow); ++%} + -+ if (val == noreg) { -+ BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg); -+ } else { -+ // G1 barrier needs uncompressed oop for region cross check. -+ Register new_val = val; -+ if (UseCompressedOops) { -+ new_val = t1; -+ __ mv(new_val, val); -+ } -+ BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg); -+ g1_write_barrier_post(masm, -+ x13 /* store_adr */, -+ new_val /* new_val */, -+ xthread /* thread */, -+ tmp1 /* tmp */, -+ tmp2 /* tmp2 */); -+ } -+} ++instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ ++ match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); + -+#ifdef COMPILER1 -+ -+#undef __ -+#define __ ce->masm()-> ++ effect(TEMP tmp, KILL cr); ++ format %{ ++ "cmpxchg_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapP_shenandoah" ++ %} + -+void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) { -+ G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, ++ false /* is_cae */, $res$$Register); ++ %} + -+ // At this point we know that marking is in progress. -+ // If do_load() is true then we have to emit the -+ // load of the previous value; otherwise it has already -+ // been loaded into _pre_val. -+ __ bind(*stub->entry()); ++ ins_pipe(pipe_slow); ++%} +diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp +new file mode 100644 +index 0000000000..d6ce8da07b +--- /dev/null ++++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp +@@ -0,0 +1,46 @@ ++/* ++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ assert(stub->pre_val()->is_register(), "Precondition."); ++#ifndef CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP ++#define CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP + -+ Register pre_val_reg = stub->pre_val()->as_register(); ++const int StackAlignmentInBytes = 16; + -+ if (stub->do_load()) { -+ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */); -+ } -+ __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true); -+ ce->store_parameter(stub->pre_val()->as_register(), 0); -+ __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); -+ __ j(*stub->continuation()); -+} ++// Indicates whether the C calling conventions require that ++// 32-bit integer argument values are extended to 64 bits. ++const bool CCallingConventionRequiresIntsAsLongs = false; + -+void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) { -+ G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); -+ __ bind(*stub->entry()); -+ assert(stub->addr()->is_register(), "Precondition"); -+ assert(stub->new_val()->is_register(), "Precondition"); -+ Register new_val_reg = stub->new_val()->as_register(); -+ __ beqz(new_val_reg, *stub->continuation(), /* is_far */ true); -+ ce->store_parameter(stub->addr()->as_pointer_register(), 0); -+ __ far_call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin())); -+ __ j(*stub->continuation()); -+} ++// To be safe, we deoptimize when we come across an access that needs ++// patching. This is similar to what is done on aarch64. ++#define DEOPTIMIZE_WHEN_PATCHING + -+#undef __ ++#define SUPPORTS_NATIVE_CX8 + -+#define __ sasm-> ++#define SUPPORT_RESERVED_STACK_AREA + -+void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { -+ __ prologue("g1_pre_barrier", false); ++#define THREAD_LOCAL_POLL + -+ BarrierSet* bs = BarrierSet::barrier_set(); ++#endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp +new file mode 100644 +index 0000000000..90db2f4460 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp +@@ -0,0 +1,111 @@ ++/* ++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ // arg0 : previous value of memory -+ const Register pre_val = x10; -+ const Register thread = xthread; -+ const Register tmp = t0; ++#ifndef CPU_RISCV_GLOBALS_RISCV_HPP ++#define CPU_RISCV_GLOBALS_RISCV_HPP + -+ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); -+ Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); -+ Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" + -+ Label done; -+ Label runtime; ++// Sets the default values for platform dependent flags used by the runtime system. ++// (see globals.hpp) + -+ // Is marking still active? -+ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width -+ __ lwu(tmp, in_progress); -+ } else { -+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); -+ __ lbu(tmp, in_progress); -+ } -+ __ beqz(tmp, done); ++define_pd_global(bool, NeedsDeoptSuspend, false); // only register window machines need this + -+ // Can we store original value in the thread's buffer? -+ __ ld(tmp, queue_index); -+ __ beqz(tmp, runtime); ++define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks ++define_pd_global(bool, TrapBasedNullChecks, false); ++define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs past to check cast + -+ __ sub(tmp, tmp, wordSize); -+ __ sd(tmp, queue_index); -+ __ ld(t1, buffer); -+ __ add(tmp, tmp, t1); -+ __ load_parameter(0, t1); -+ __ sd(t1, Address(tmp, 0)); -+ __ j(done); ++define_pd_global(uintx, CodeCacheSegmentSize, 64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment. ++define_pd_global(intx, CodeEntryAlignment, 64); ++define_pd_global(intx, OptoLoopAlignment, 16); ++define_pd_global(intx, InlineFrequencyCount, 100); + -+ __ bind(runtime); -+ __ push_call_clobbered_registers(); -+ __ load_parameter(0, pre_val); -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); -+ __ pop_call_clobbered_registers(); -+ __ bind(done); ++#define DEFAULT_STACK_YELLOW_PAGES (2) ++#define DEFAULT_STACK_RED_PAGES (1) ++// Java_java_net_SocketOutputStream_socketWrite0() uses a 64k buffer on the ++// stack if compiled for unix and LP64. To pass stack overflow tests we need ++// 20 shadow pages. ++#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+5)) ++#define DEFAULT_STACK_RESERVED_PAGES (1) + -+ __ epilogue(); -+} ++#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES ++#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES ++#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES ++#define MIN_STACK_RESERVED_PAGES (0) + -+void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) { -+ __ prologue("g1_post_barrier", false); ++define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES); ++define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES); ++define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); ++define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); + -+ // arg0 : store_address -+ Address store_addr(fp, 2 * BytesPerWord); // 2 BytesPerWord from fp ++define_pd_global(bool, RewriteBytecodes, true); ++define_pd_global(bool, RewriteFrequentPairs, true); + -+ BarrierSet* bs = BarrierSet::barrier_set(); -+ CardTableBarrierSet* ctbs = barrier_set_cast(bs); -+ CardTable* ct = ctbs->card_table(); ++define_pd_global(bool, UseMembar, true); + -+ Label done; -+ Label runtime; ++define_pd_global(bool, PreserveFramePointer, false); + -+ // At this point we know new_value is non-NULL and the new_value crosses regions. -+ // Must check to see if card is already dirty -+ const Register thread = xthread; ++// GC Ergo Flags ++define_pd_global(uintx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread + -+ Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); -+ Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); ++define_pd_global(uintx, TypeProfileLevel, 111); + -+ const Register card_offset = t1; -+ // RA is free here, so we can use it to hold the byte_map_base. -+ const Register byte_map_base = ra; ++define_pd_global(bool, CompactStrings, true); + -+ assert_different_registers(card_offset, byte_map_base, t0); ++// Clear short arrays bigger than one word in an arch-specific way ++define_pd_global(intx, InitArrayShortSize, BytesPerLong); + -+ __ load_parameter(0, card_offset); -+ __ srli(card_offset, card_offset, CardTable::card_shift()); -+ __ load_byte_map_base(byte_map_base); ++define_pd_global(bool, ThreadLocalHandshakes, true); + -+ // Convert card offset into an address in card_addr -+ Register card_addr = card_offset; -+ __ add(card_addr, byte_map_base, card_addr); ++define_pd_global(intx, InlineSmallCode, 1000); + -+ __ lbu(t0, Address(card_addr, 0)); -+ __ sub(t0, t0, (int)G1CardTable::g1_young_card_val()); -+ __ beqz(t0, done); ++#define ARCH_FLAGS(develop, \ ++ product, \ ++ diagnostic, \ ++ experimental, \ ++ notproduct, \ ++ range, \ ++ constraint, \ ++ writeable) \ ++ \ ++ product(bool, NearCpool, true, \ ++ "constant pool is close to instructions") \ ++ product(intx, BlockZeroingLowLimit, 256, \ ++ "Minimum size in bytes when block zeroing will be used") \ ++ range(1, max_jint) \ ++ product(bool, TraceTraps, false, "Trace all traps the signal handler") \ ++ /* For now we're going to be safe and add the I/O bits to userspace fences. */ \ ++ product(bool, UseConservativeFence, true, \ ++ "Extend i for r and o for w in the pred/succ flags of fence") \ ++ product(bool, AvoidUnalignedAccesses, true, \ ++ "Avoid generating unaligned memory accesses") \ ++ experimental(bool, UseRVV, false, "Use RVV instructions") \ ++ experimental(bool, UseZba, false, "Use Zba instructions") \ ++ experimental(bool, UseZbb, false, "Use Zbb instructions") \ ++ experimental(bool, UseZbs, false, "Use Zbs instructions") \ ++ experimental(bool, UseRVC, false, "Use RVC instructions") + -+ assert((int)CardTable::dirty_card_val() == 0, "must be 0"); ++#endif // CPU_RISCV_GLOBALS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/icBuffer_riscv.cpp b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp +new file mode 100644 +index 0000000000..cc93103dc5 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp +@@ -0,0 +1,79 @@ ++/* ++ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ __ membar(MacroAssembler::StoreLoad); -+ __ lbu(t0, Address(card_addr, 0)); -+ __ beqz(t0, done); ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/icBuffer.hpp" ++#include "gc/shared/collectedHeap.inline.hpp" ++#include "interpreter/bytecodes.hpp" ++#include "memory/resourceArea.hpp" ++#include "nativeInst_riscv.hpp" ++#include "oops/oop.inline.hpp" + -+ // storing region crossing non-NULL, card is clean. -+ // dirty card and log. -+ __ sb(zr, Address(card_addr, 0)); ++int InlineCacheBuffer::ic_stub_code_size() { ++ // 6: auipc + ld + auipc + jalr + address(2 * instruction_size) ++ // 5: auipc + ld + j + address(2 * instruction_size) ++ return (MacroAssembler::far_branches() ? 6 : 5) * NativeInstruction::instruction_size; ++} + -+ __ ld(t0, queue_index); -+ __ beqz(t0, runtime); -+ __ sub(t0, t0, wordSize); -+ __ sd(t0, queue_index); ++#define __ masm-> + -+ // Reuse RA to hold buffer_addr -+ const Register buffer_addr = ra; ++void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) { ++ assert_cond(code_begin != NULL && entry_point != NULL); ++ ResourceMark rm; ++ CodeBuffer code(code_begin, ic_stub_code_size()); ++ MacroAssembler* masm = new MacroAssembler(&code); ++ // Note: even though the code contains an embedded value, we do not need reloc info ++ // because ++ // (1) the value is old (i.e., doesn't matter for scavenges) ++ // (2) these ICStubs are removed *before* a GC happens, so the roots disappear + -+ __ ld(buffer_addr, buffer); -+ __ add(t0, buffer_addr, t0); -+ __ sd(card_addr, Address(t0, 0)); -+ __ j(done); ++ address start = __ pc(); ++ Label l; ++ __ ld(t1, l); ++ __ far_jump(ExternalAddress(entry_point)); ++ __ align(wordSize); ++ __ bind(l); ++ __ emit_int64((intptr_t)cached_value); ++ // Only need to invalidate the 1st two instructions - not the whole ic stub ++ ICache::invalidate_range(code_begin, InlineCacheBuffer::ic_stub_code_size()); ++ assert(__ pc() - start == ic_stub_code_size(), "must be"); ++} + -+ __ bind(runtime); -+ __ push_call_clobbered_registers(); -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); -+ __ pop_call_clobbered_registers(); -+ __ bind(done); -+ __ epilogue(); ++address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { ++ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object ++ NativeJump* jump = nativeJump_at(move->next_instruction_address()); ++ return jump->jump_destination(); +} + -+#undef __ + -+#endif // COMPILER1 -diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp ++void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { ++ // The word containing the cached value is at the end of this IC buffer ++ uintptr_t *p = (uintptr_t *)(code_begin + ic_stub_code_size() - wordSize); ++ void* o = (void*)*p; ++ return o; ++} +diff --git a/src/hotspot/cpu/riscv/icache_riscv.cpp b/src/hotspot/cpu/riscv/icache_riscv.cpp new file mode 100644 -index 00000000000..37bc183f39c +index 0000000000..d615dcfb9e --- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp -@@ -0,0 +1,78 @@ ++++ b/src/hotspot/cpu/riscv/icache_riscv.cpp +@@ -0,0 +1,68 @@ +/* -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2023, Rivos Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -15902,68 +15739,57 @@ index 00000000000..37bc183f39c + * + */ + -+#ifndef CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP -+#define CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP -+ ++#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" -+#include "gc/shared/modRefBarrierSetAssembler.hpp" -+#include "utilities/macros.hpp" ++#include "riscv_flush_icache.hpp" ++#include "runtime/java.hpp" ++#include "runtime/icache.hpp" + -+#ifdef COMPILER1 -+class LIR_Assembler; -+#endif -+class StubAssembler; -+class G1PreBarrierStub; -+class G1PostBarrierStub; ++#define __ _masm-> + -+class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { -+protected: -+ void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, -+ Register addr, Register count, RegSet saved_regs); -+ void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, -+ Register start, Register count, Register tmp, RegSet saved_regs); ++static int icache_flush(address addr, int lines, int magic) { ++ // To make a store to instruction memory visible to all RISC-V harts, ++ // the writing hart has to execute a data FENCE before requesting that ++ // all remote RISC-V harts execute a FENCE.I. + -+ void g1_write_barrier_pre(MacroAssembler* masm, -+ Register obj, -+ Register pre_val, -+ Register thread, -+ Register tmp, -+ bool tosca_live, -+ bool expand_call); ++ // We need to make sure stores happens before the I/D cache synchronization. ++ __asm__ volatile("fence rw, rw" : : : "memory"); + -+ void g1_write_barrier_post(MacroAssembler* masm, -+ Register store_addr, -+ Register new_val, -+ Register thread, -+ Register tmp, -+ Register tmp2); ++ RiscvFlushIcache::flush((uintptr_t)addr, ((uintptr_t)lines) << ICache::log2_line_size); + -+ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2); ++ return magic; ++} + -+public: -+#ifdef COMPILER1 -+ void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub); -+ void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub); ++void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) { ++ // Only riscv_flush_icache is supported as I-cache synchronization. ++ // We must make sure the VM can execute such without error. ++ if (!RiscvFlushIcache::test()) { ++ vm_exit_during_initialization("Unable to synchronize I-cache"); ++ } + -+ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); -+ void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm); -+#endif ++ address start = (address)icache_flush; ++ *flush_icache_stub = (ICache::flush_icache_stub_t)start; + -+ void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Register dst, Address src, Register tmp1, Register tmp_thread); -+}; ++ // ICache::invalidate_range() contains explicit condition that the first ++ // call is invoked on the generated icache flush stub code range. ++ ICache::invalidate_range(start, 0); + -+#endif // CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp ++ { ++ StubCodeMark mark(this, "ICache", "fake_stub_for_inlined_icache_flush"); ++ __ ret(); ++ } ++} ++ ++#undef __ +diff --git a/src/hotspot/cpu/riscv/icache_riscv.hpp b/src/hotspot/cpu/riscv/icache_riscv.hpp new file mode 100644 -index 00000000000..8735fd014ff +index 0000000000..5bf40ca820 --- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp -@@ -0,0 +1,31 @@ ++++ b/src/hotspot/cpu/riscv/icache_riscv.hpp +@@ -0,0 +1,42 @@ +/* -+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -15986,20 +15812,32 @@ index 00000000000..8735fd014ff + * + */ + -+#ifndef CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP -+#define CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP ++#ifndef CPU_RISCV_ICACHE_RISCV_HPP ++#define CPU_RISCV_ICACHE_RISCV_HPP + -+const size_t G1MergeHeapRootsPrefetchCacheSize = 16; ++// Interface for updating the instruction cache. Whenever the VM ++// modifies code, part of the processor instruction cache potentially ++// has to be flushed. + -+#endif // CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp ++class ICache : public AbstractICache { ++public: ++ enum { ++ stub_size = 16, // Size of the icache flush stub in bytes ++ line_size = BytesPerWord, // conservative ++ log2_line_size = LogBytesPerWord // log2(line_size) ++ }; ++}; ++ ++#endif // CPU_RISCV_ICACHE_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp new file mode 100644 -index 00000000000..3c115a2ea02 +index 0000000000..b50be7e726 --- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp -@@ -0,0 +1,302 @@ ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +@@ -0,0 +1,1931 @@ +/* -+ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -16024,2066 +15862,1920 @@ index 00000000000..3c115a2ea02 + */ + +#include "precompiled.hpp" -+#include "classfile/classLoaderData.hpp" ++#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" -+#include "gc/shared/barrierSetNMethod.hpp" -+#include "gc/shared/collectedHeap.hpp" -+#include "interpreter/interp_masm.hpp" -+#include "memory/universe.hpp" -+#include "runtime/jniHandles.hpp" ++#include "interp_masm_riscv.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "logging/log.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/method.hpp" ++#include "oops/methodData.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/jvmtiThreadState.hpp" ++#include "runtime/basicLock.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/safepointMechanism.hpp" +#include "runtime/sharedRuntime.hpp" -+#include "runtime/stubRoutines.hpp" -+#include "runtime/thread.hpp" ++#include "runtime/thread.inline.hpp" + -+#define __ masm-> ++void InterpreterMacroAssembler::narrow(Register result) { ++ // Get method->_constMethod->_result_type ++ ld(t0, Address(fp, frame::interpreter_frame_method_offset * wordSize)); ++ ld(t0, Address(t0, Method::const_offset())); ++ lbu(t0, Address(t0, ConstMethod::result_type_offset())); + -+void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Register dst, Address src, Register tmp1, Register tmp_thread) { -+ assert_cond(masm != NULL); ++ Label done, notBool, notByte, notChar; + -+ // RA is live. It must be saved around calls. ++ // common case first ++ mv(t1, T_INT); ++ beq(t0, t1, done); + -+ bool in_heap = (decorators & IN_HEAP) != 0; -+ bool in_native = (decorators & IN_NATIVE) != 0; -+ bool is_not_null = (decorators & IS_NOT_NULL) != 0; -+ switch (type) { -+ case T_OBJECT: // fall through -+ case T_ARRAY: { -+ if (in_heap) { -+ if (UseCompressedOops) { -+ __ lwu(dst, src); -+ if (is_not_null) { -+ __ decode_heap_oop_not_null(dst); -+ } else { -+ __ decode_heap_oop(dst); -+ } -+ } else { -+ __ ld(dst, src); -+ } -+ } else { -+ assert(in_native, "why else?"); -+ __ ld(dst, src); -+ } -+ break; -+ } -+ case T_BOOLEAN: __ load_unsigned_byte (dst, src); break; -+ case T_BYTE: __ load_signed_byte (dst, src); break; -+ case T_CHAR: __ load_unsigned_short(dst, src); break; -+ case T_SHORT: __ load_signed_short (dst, src); break; -+ case T_INT: __ lw (dst, src); break; -+ case T_LONG: __ ld (dst, src); break; -+ case T_ADDRESS: __ ld (dst, src); break; -+ case T_FLOAT: __ flw (f10, src); break; -+ case T_DOUBLE: __ fld (f10, src); break; -+ default: Unimplemented(); ++ // mask integer result to narrower return type. ++ mv(t1, T_BOOLEAN); ++ bne(t0, t1, notBool); ++ ++ andi(result, result, 0x1); ++ j(done); ++ ++ bind(notBool); ++ mv(t1, T_BYTE); ++ bne(t0, t1, notByte); ++ sign_extend(result, result, 8); ++ j(done); ++ ++ bind(notByte); ++ mv(t1, T_CHAR); ++ bne(t0, t1, notChar); ++ zero_extend(result, result, 16); ++ j(done); ++ ++ bind(notChar); ++ sign_extend(result, result, 16); ++ ++ // Nothing to do for T_INT ++ bind(done); ++ addw(result, result, zr); ++} ++ ++void InterpreterMacroAssembler::jump_to_entry(address entry) { ++ assert(entry != NULL, "Entry must have been generated by now"); ++ j(entry); ++} ++ ++void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) { ++ if (JvmtiExport::can_pop_frame()) { ++ Label L; ++ // Initiate popframe handling only if it is not already being ++ // processed. If the flag has the popframe_processing bit set, ++ // it means that this code is called *during* popframe handling - we ++ // don't want to reenter. ++ // This method is only called just after the call into the vm in ++ // call_VM_base, so the arg registers are available. ++ lwu(t1, Address(xthread, JavaThread::popframe_condition_offset())); ++ andi(t0, t1, JavaThread::popframe_pending_bit); ++ beqz(t0, L); ++ andi(t0, t1, JavaThread::popframe_processing_bit); ++ bnez(t0, L); ++ // Call Interpreter::remove_activation_preserving_args_entry() to get the ++ // address of the same-named entrypoint in the generated interpreter code. ++ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); ++ jr(x10); ++ bind(L); + } +} + -+void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2) { -+ assert_cond(masm != NULL); -+ bool in_heap = (decorators & IN_HEAP) != 0; -+ bool in_native = (decorators & IN_NATIVE) != 0; -+ switch (type) { -+ case T_OBJECT: // fall through -+ case T_ARRAY: { -+ val = val == noreg ? zr : val; -+ if (in_heap) { -+ if (UseCompressedOops) { -+ assert(!dst.uses(val), "not enough registers"); -+ if (val != zr) { -+ __ encode_heap_oop(val); -+ } -+ __ sw(val, dst); -+ } else { -+ __ sd(val, dst); -+ } -+ } else { -+ assert(in_native, "why else?"); -+ __ sd(val, dst); -+ } ++ ++void InterpreterMacroAssembler::load_earlyret_value(TosState state) { ++ ld(x12, Address(xthread, JavaThread::jvmti_thread_state_offset())); ++ const Address tos_addr(x12, JvmtiThreadState::earlyret_tos_offset()); ++ const Address oop_addr(x12, JvmtiThreadState::earlyret_oop_offset()); ++ const Address val_addr(x12, JvmtiThreadState::earlyret_value_offset()); ++ switch (state) { ++ case atos: ++ ld(x10, oop_addr); ++ sd(zr, oop_addr); ++ verify_oop(x10); + break; -+ } -+ case T_BOOLEAN: -+ __ andi(val, val, 0x1); // boolean is true if LSB is 1 -+ __ sb(val, dst); ++ case ltos: ++ ld(x10, val_addr); + break; -+ case T_BYTE: __ sb(val, dst); break; -+ case T_CHAR: __ sh(val, dst); break; -+ case T_SHORT: __ sh(val, dst); break; -+ case T_INT: __ sw(val, dst); break; -+ case T_LONG: __ sd(val, dst); break; -+ case T_ADDRESS: __ sd(val, dst); break; -+ case T_FLOAT: __ fsw(f10, dst); break; -+ case T_DOUBLE: __ fsd(f10, dst); break; -+ default: Unimplemented(); ++ case btos: // fall through ++ case ztos: // fall through ++ case ctos: // fall through ++ case stos: // fall through ++ case itos: ++ lwu(x10, val_addr); ++ break; ++ case ftos: ++ flw(f10, val_addr); ++ break; ++ case dtos: ++ fld(f10, val_addr); ++ break; ++ case vtos: ++ /* nothing to do */ ++ break; ++ default: ++ ShouldNotReachHere(); + } ++ // Clean up tos value in the thread object ++ mvw(t0, (int) ilgl); ++ sw(t0, tos_addr); ++ sw(zr, val_addr); ++} ++ ++ ++void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) { ++ if (JvmtiExport::can_force_early_return()) { ++ Label L; ++ ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset())); ++ beqz(t0, L); // if [thread->jvmti_thread_state() == NULL] then exit ++ ++ // Initiate earlyret handling only if it is not already being processed. ++ // If the flag has the earlyret_processing bit set, it means that this code ++ // is called *during* earlyret handling - we don't want to reenter. ++ lwu(t0, Address(t0, JvmtiThreadState::earlyret_state_offset())); ++ mv(t1, JvmtiThreadState::earlyret_pending); ++ bne(t0, t1, L); + ++ // Call Interpreter::remove_activation_early_entry() to get the address of the ++ // same-named entrypoint in the generated interpreter code. ++ ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset())); ++ lwu(t0, Address(t0, JvmtiThreadState::earlyret_tos_offset())); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), t0); ++ jr(x10); ++ bind(L); ++ } +} + -+void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, -+ Register obj, Register tmp, Label& slowpath) { -+ assert_cond(masm != NULL); -+ // If mask changes we need to ensure that the inverse is still encodable as an immediate -+ STATIC_ASSERT(JNIHandles::weak_tag_mask == 1); -+ __ andi(obj, obj, ~JNIHandles::weak_tag_mask); -+ __ ld(obj, Address(obj, 0)); // *obj ++void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset) { ++ assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); ++ lhu(reg, Address(xbcp, bcp_offset)); ++ revb_h(reg, reg); +} + -+// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes. -+void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj, -+ Register var_size_in_bytes, -+ int con_size_in_bytes, -+ Register tmp1, -+ Register tmp2, -+ Label& slow_case, -+ bool is_far) { -+ assert_cond(masm != NULL); -+ assert_different_registers(obj, tmp2); -+ assert_different_registers(obj, var_size_in_bytes); -+ Register end = tmp2; ++void InterpreterMacroAssembler::get_dispatch() { ++ int32_t offset = 0; ++ la_patchable(xdispatch, ExternalAddress((address)Interpreter::dispatch_table()), offset); ++ addi(xdispatch, xdispatch, offset); ++} + -+ __ ld(obj, Address(xthread, JavaThread::tlab_top_offset())); -+ if (var_size_in_bytes == noreg) { -+ __ la(end, Address(obj, con_size_in_bytes)); ++void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, ++ int bcp_offset, ++ size_t index_size) { ++ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); ++ if (index_size == sizeof(u2)) { ++ load_unsigned_short(index, Address(xbcp, bcp_offset)); ++ } else if (index_size == sizeof(u4)) { ++ lwu(index, Address(xbcp, bcp_offset)); ++ // Check if the secondary index definition is still ~x, otherwise ++ // we have to change the following assembler code to calculate the ++ // plain index. ++ assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); ++ xori(index, index, -1); ++ addw(index, index, zr); ++ } else if (index_size == sizeof(u1)) { ++ load_unsigned_byte(index, Address(xbcp, bcp_offset)); + } else { -+ __ add(end, obj, var_size_in_bytes); ++ ShouldNotReachHere(); + } -+ __ ld(t0, Address(xthread, JavaThread::tlab_end_offset())); -+ __ bgtu(end, t0, slow_case, is_far); ++} + -+ // update the tlab top pointer -+ __ sd(end, Address(xthread, JavaThread::tlab_top_offset())); ++// Return ++// Rindex: index into constant pool ++// Rcache: address of cache entry - ConstantPoolCache::base_offset() ++// ++// A caller must add ConstantPoolCache::base_offset() to Rcache to get ++// the true address of the cache entry. ++// ++void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, ++ Register index, ++ int bcp_offset, ++ size_t index_size) { ++ assert_different_registers(cache, index); ++ assert_different_registers(cache, xcpool); ++ get_cache_index_at_bcp(index, bcp_offset, index_size); ++ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); ++ // Convert from field index to ConstantPoolCacheEntry ++ // riscv already has the cache in xcpool so there is no need to ++ // install it in cache. Instead we pre-add the indexed offset to ++ // xcpool and return it in cache. All clients of this method need to ++ // be modified accordingly. ++ shadd(cache, index, xcpool, cache, 5); ++} + -+ // recover var_size_in_bytes if necessary -+ if (var_size_in_bytes == end) { -+ __ sub(var_size_in_bytes, var_size_in_bytes, obj); -+ } ++ ++void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, ++ Register index, ++ Register bytecode, ++ int byte_no, ++ int bcp_offset, ++ size_t index_size) { ++ get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); ++ // We use a 32-bit load here since the layout of 64-bit words on ++ // little-endian machines allow us that. ++ // n.b. unlike x86 cache already includes the index offset ++ la(bytecode, Address(cache, ++ ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::indices_offset())); ++ membar(MacroAssembler::AnyAny); ++ lwu(bytecode, bytecode); ++ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ const int shift_count = (1 + byte_no) * BitsPerByte; ++ slli(bytecode, bytecode, XLEN - (shift_count + BitsPerByte)); ++ srli(bytecode, bytecode, XLEN - BitsPerByte); +} + -+// Defines obj, preserves var_size_in_bytes -+void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj, -+ Register var_size_in_bytes, -+ int con_size_in_bytes, -+ Register tmp1, -+ Label& slow_case, -+ bool is_far) { -+ assert_cond(masm != NULL); -+ assert_different_registers(obj, var_size_in_bytes, tmp1); -+ if (!Universe::heap()->supports_inline_contig_alloc()) { -+ __ j(slow_case); -+ } else { -+ Register end = tmp1; -+ Label retry; -+ __ bind(retry); ++void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, ++ Register tmp, ++ int bcp_offset, ++ size_t index_size) { ++ assert(cache != tmp, "must use different register"); ++ get_cache_index_at_bcp(tmp, bcp_offset, index_size); ++ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); ++ // Convert from field index to ConstantPoolCacheEntry index ++ // and from word offset to byte offset ++ assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, ++ "else change next line"); ++ ld(cache, Address(fp, frame::interpreter_frame_cache_offset * wordSize)); ++ // skip past the header ++ add(cache, cache, in_bytes(ConstantPoolCache::base_offset())); ++ // construct pointer to cache entry ++ shadd(cache, tmp, cache, tmp, 2 + LogBytesPerWord); ++} + -+ // Get the current end of the heap -+ ExternalAddress address_end((address) Universe::heap()->end_addr()); -+ { -+ int32_t offset; -+ __ la_patchable(t1, address_end, offset); -+ __ ld(t1, Address(t1, offset)); -+ } ++// Load object from cpool->resolved_references(index) ++void InterpreterMacroAssembler::load_resolved_reference_at_index( ++ Register result, Register index, Register tmp) { ++ assert_different_registers(result, index); + -+ // Get the current top of the heap -+ ExternalAddress address_top((address) Universe::heap()->top_addr()); -+ { -+ int32_t offset; -+ __ la_patchable(t0, address_top, offset); -+ __ addi(t0, t0, offset); -+ __ lr_d(obj, t0, Assembler::aqrl); -+ } ++ get_constant_pool(result); ++ // Load pointer for resolved_references[] objArray ++ ld(result, Address(result, ConstantPool::cache_offset_in_bytes())); ++ ld(result, Address(result, ConstantPoolCache::resolved_references_offset_in_bytes())); ++ resolve_oop_handle(result, tmp); ++ // Add in the index ++ addi(index, index, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); ++ shadd(result, index, result, index, LogBytesPerHeapOop); ++ load_heap_oop(result, Address(result, 0)); ++} + -+ // Adjust it my the size of our new object -+ if (var_size_in_bytes == noreg) { -+ __ la(end, Address(obj, con_size_in_bytes)); -+ } else { -+ __ add(end, obj, var_size_in_bytes); -+ } ++void InterpreterMacroAssembler::load_resolved_klass_at_offset( ++ Register cpool, Register index, Register klass, Register temp) { ++ shadd(temp, index, cpool, temp, LogBytesPerWord); ++ lhu(temp, Address(temp, sizeof(ConstantPool))); // temp = resolved_klass_index ++ ld(klass, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); // klass = cpool->_resolved_klasses ++ shadd(klass, temp, klass, temp, LogBytesPerWord); ++ ld(klass, Address(klass, Array::base_offset_in_bytes())); ++} + -+ // if end < obj then we wrapped around high memory -+ __ bltu(end, obj, slow_case, is_far); ++// Generate a subtype check: branch to ok_is_subtype if sub_klass is a ++// subtype of super_klass. ++// ++// Args: ++// x10: superklass ++// Rsub_klass: subklass ++// ++// Kills: ++// x12, x15 ++void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass, ++ Label& ok_is_subtype) { ++ assert(Rsub_klass != x10, "x10 holds superklass"); ++ assert(Rsub_klass != x12, "x12 holds 2ndary super array length"); ++ assert(Rsub_klass != x15, "x15 holds 2ndary super array scan ptr"); + -+ __ bgtu(end, t1, slow_case, is_far); ++ // Profile the not-null value's klass. ++ profile_typecheck(x12, Rsub_klass, x15); // blows x12, reloads x15 + -+ // If heap_top hasn't been changed by some other thread, update it. -+ __ sc_d(t1, end, t0, Assembler::rl); -+ __ bnez(t1, retry); ++ // Do the check. ++ check_klass_subtype(Rsub_klass, x10, x12, ok_is_subtype); // blows x12 + -+ incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, tmp1); -+ } ++ // Profile the failure of the check. ++ profile_typecheck_failed(x12); // blows x12 +} + -+void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, -+ Register var_size_in_bytes, -+ int con_size_in_bytes, -+ Register tmp1) { -+ assert_cond(masm != NULL); -+ assert(tmp1->is_valid(), "need temp reg"); ++// Java Expression Stack + -+ __ ld(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset()))); -+ if (var_size_in_bytes->is_valid()) { -+ __ add(tmp1, tmp1, var_size_in_bytes); -+ } else { -+ __ add(tmp1, tmp1, con_size_in_bytes); -+ } -+ __ sd(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset()))); ++void InterpreterMacroAssembler::pop_ptr(Register r) { ++ ld(r, Address(esp, 0)); ++ addi(esp, esp, wordSize); +} + -+void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) { -+ BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); -+ -+ if (bs_nm == NULL) { -+ return; -+ } ++void InterpreterMacroAssembler::pop_i(Register r) { ++ lw(r, Address(esp, 0)); // lw do signed extended ++ addi(esp, esp, wordSize); ++} + -+ // RISCV atomic operations require that the memory address be naturally aligned. -+ __ align(4); ++void InterpreterMacroAssembler::pop_l(Register r) { ++ ld(r, Address(esp, 0)); ++ addi(esp, esp, 2 * Interpreter::stackElementSize); ++} + -+ Label skip, guard; -+ Address thread_disarmed_addr(xthread, in_bytes(bs_nm->thread_disarmed_offset())); ++void InterpreterMacroAssembler::push_ptr(Register r) { ++ addi(esp, esp, -wordSize); ++ sd(r, Address(esp, 0)); ++} + -+ __ lwu(t0, guard); ++void InterpreterMacroAssembler::push_i(Register r) { ++ addi(esp, esp, -wordSize); ++ addw(r, r, zr); // signed extended ++ sd(r, Address(esp, 0)); ++} + -+ // Subsequent loads of oops must occur after load of guard value. -+ // BarrierSetNMethod::disarm sets guard with release semantics. -+ __ membar(MacroAssembler::LoadLoad); -+ __ lwu(t1, thread_disarmed_addr); -+ __ beq(t0, t1, skip); ++void InterpreterMacroAssembler::push_l(Register r) { ++ addi(esp, esp, -2 * wordSize); ++ sd(zr, Address(esp, wordSize)); ++ sd(r, Address(esp)); ++} + -+ int32_t offset = 0; -+ __ movptr_with_offset(t0, StubRoutines::riscv::method_entry_barrier(), offset); -+ __ jalr(ra, t0, offset); -+ __ j(skip); ++void InterpreterMacroAssembler::pop_f(FloatRegister r) { ++ flw(r, esp, 0); ++ addi(esp, esp, wordSize); ++} + -+ __ bind(guard); ++void InterpreterMacroAssembler::pop_d(FloatRegister r) { ++ fld(r, esp, 0); ++ addi(esp, esp, 2 * Interpreter::stackElementSize); ++} + -+ assert(__ offset() % 4 == 0, "bad alignment"); -+ __ emit_int32(0); // nmethod guard value. Skipped over in common case. ++void InterpreterMacroAssembler::push_f(FloatRegister r) { ++ addi(esp, esp, -wordSize); ++ fsw(r, Address(esp, 0)); ++} + -+ __ bind(skip); ++void InterpreterMacroAssembler::push_d(FloatRegister r) { ++ addi(esp, esp, -2 * wordSize); ++ fsd(r, Address(esp, 0)); +} + -+void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) { -+ BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod(); -+ if (bs == NULL) { -+ return; ++void InterpreterMacroAssembler::pop(TosState state) { ++ switch (state) { ++ case atos: ++ pop_ptr(); ++ verify_oop(x10); ++ break; ++ case btos: // fall through ++ case ztos: // fall through ++ case ctos: // fall through ++ case stos: // fall through ++ case itos: ++ pop_i(); ++ break; ++ case ltos: ++ pop_l(); ++ break; ++ case ftos: ++ pop_f(); ++ break; ++ case dtos: ++ pop_d(); ++ break; ++ case vtos: ++ /* nothing to do */ ++ break; ++ default: ++ ShouldNotReachHere(); + } ++} + -+ Label bad_call; -+ __ beqz(xmethod, bad_call); ++void InterpreterMacroAssembler::push(TosState state) { ++ switch (state) { ++ case atos: ++ verify_oop(x10); ++ push_ptr(); ++ break; ++ case btos: // fall through ++ case ztos: // fall through ++ case ctos: // fall through ++ case stos: // fall through ++ case itos: ++ push_i(); ++ break; ++ case ltos: ++ push_l(); ++ break; ++ case ftos: ++ push_f(); ++ break; ++ case dtos: ++ push_d(); ++ break; ++ case vtos: ++ /* nothing to do */ ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} + -+ // Pointer chase to the method holder to find out if the method is concurrently unloading. -+ Label method_live; -+ __ load_method_holder_cld(t0, xmethod); ++// Helpers for swap and dup ++void InterpreterMacroAssembler::load_ptr(int n, Register val) { ++ ld(val, Address(esp, Interpreter::expr_offset_in_bytes(n))); ++} + -+ // Is it a strong CLD? -+ __ lwu(t1, Address(t0, ClassLoaderData::keep_alive_offset())); -+ __ bnez(t1, method_live); ++void InterpreterMacroAssembler::store_ptr(int n, Register val) { ++ sd(val, Address(esp, Interpreter::expr_offset_in_bytes(n))); ++} + -+ // Is it a weak but alive CLD? -+ __ push_reg(RegSet::of(x28, x29), sp); ++void InterpreterMacroAssembler::load_float(Address src) { ++ flw(f10, src); ++} + -+ __ ld(x28, Address(t0, ClassLoaderData::holder_offset())); ++void InterpreterMacroAssembler::load_double(Address src) { ++ fld(f10, src); ++} + -+ // Uses x28 & x29, so we must pass new temporaries. -+ __ resolve_weak_handle(x28, x29); -+ __ mv(t0, x28); ++void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() { ++ // set sender sp ++ mv(x30, sp); ++ // record last_sp ++ sd(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++} + -+ __ pop_reg(RegSet::of(x28, x29), sp); ++// Jump to from_interpreted entry of a call unless single stepping is possible ++// in this thread in which case we must call the i2i entry ++void InterpreterMacroAssembler::jump_from_interpreted(Register method) { ++ prepare_to_jump_from_interpreted(); ++ if (JvmtiExport::can_post_interpreter_events()) { ++ Label run_compiled_code; ++ // JVMTI events, such as single-stepping, are implemented partly by avoiding running ++ // compiled code in threads for which the event is enabled. Check here for ++ // interp_only_mode if these events CAN be enabled. ++ lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset())); ++ beqz(t0, run_compiled_code); ++ ld(t0, Address(method, Method::interpreter_entry_offset())); ++ jr(t0); ++ bind(run_compiled_code); ++ } + -+ __ bnez(t0, method_live); -+ -+ __ bind(bad_call); ++ ld(t0, Address(method, Method::from_interpreted_offset())); ++ jr(t0); ++} + -+ __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); -+ __ bind(method_live); ++// The following two routines provide a hook so that an implementation ++// can schedule the dispatch in two parts. amd64 does not do this. ++void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { +} -diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp -new file mode 100644 -index 00000000000..b85f7f5582b ---- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp -@@ -0,0 +1,79 @@ -+/* -+ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ + -+#ifndef CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP -+#define CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP ++void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { ++ dispatch_next(state, step); ++} + -+#include "asm/macroAssembler.hpp" -+#include "gc/shared/barrierSet.hpp" -+#include "gc/shared/barrierSetNMethod.hpp" -+#include "memory/allocation.hpp" -+#include "oops/access.hpp" ++void InterpreterMacroAssembler::dispatch_base(TosState state, ++ address* table, ++ bool verifyoop, ++ bool generate_poll, ++ Register Rs) { ++ // Pay attention to the argument Rs, which is acquiesce in t0. ++ if (VerifyActivationFrameSize) { ++ Unimplemented(); ++ } ++ if (verifyoop && state == atos) { ++ verify_oop(x10); ++ } + -+class BarrierSetAssembler: public CHeapObj { -+private: -+ void incr_allocated_bytes(MacroAssembler* masm, -+ Register var_size_in_bytes, int con_size_in_bytes, -+ Register t1 = noreg); ++ Label safepoint; ++ address* const safepoint_table = Interpreter::safept_table(state); ++ bool needs_thread_local_poll = generate_poll && ++ SafepointMechanism::uses_thread_local_poll() && table != safepoint_table; + -+public: -+ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, -+ Register src, Register dst, Register count, RegSet saved_regs) {} -+ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, -+ Register start, Register end, Register tmp, RegSet saved_regs) {} -+ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Register dst, Address src, Register tmp1, Register tmp_thread); -+ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2); ++ if (needs_thread_local_poll) { ++ NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); ++ ld(t1, Address(xthread, Thread::polling_page_offset())); ++ andi(t1, t1, SafepointMechanism::poll_bit()); ++ bnez(t1, safepoint); ++ } ++ if (table == Interpreter::dispatch_table(state)) { ++ li(t1, Interpreter::distance_from_dispatch_table(state)); ++ add(t1, Rs, t1); ++ shadd(t1, t1, xdispatch, t1, 3); ++ } else { ++ mv(t1, (address)table); ++ shadd(t1, Rs, t1, Rs, 3); ++ } ++ ld(t1, Address(t1)); ++ jr(t1); + -+ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, -+ Register obj, Register tmp, Label& slowpath); ++ if (needs_thread_local_poll) { ++ bind(safepoint); ++ la(t1, ExternalAddress((address)safepoint_table)); ++ shadd(t1, Rs, t1, Rs, 3); ++ ld(t1, Address(t1)); ++ jr(t1); ++ } ++} + -+ virtual void tlab_allocate(MacroAssembler* masm, -+ Register obj, // result: pointer to object after successful allocation -+ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise -+ int con_size_in_bytes, // object size in bytes if known at compile time -+ Register tmp1, // temp register -+ Register tmp2, // temp register -+ Label& slow_case, // continuation point if fast allocation fails -+ bool is_far = false -+ ); ++void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll, Register Rs) { ++ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll, Rs); ++} + -+ void eden_allocate(MacroAssembler* masm, -+ Register obj, // result: pointer to object after successful allocation -+ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise -+ int con_size_in_bytes, // object size in bytes if known at compile time -+ Register tmp1, // temp register -+ Label& slow_case, // continuation point if fast allocation fails -+ bool is_far = false -+ ); -+ virtual void barrier_stubs_init() {} ++void InterpreterMacroAssembler::dispatch_only_normal(TosState state, Register Rs) { ++ dispatch_base(state, Interpreter::normal_table(state), Rs); ++} + -+ virtual void nmethod_entry_barrier(MacroAssembler* masm); -+ virtual void c2i_entry_barrier(MacroAssembler* masm); -+ virtual ~BarrierSetAssembler() {} -+}; ++void InterpreterMacroAssembler::dispatch_only_noverify(TosState state, Register Rs) { ++ dispatch_base(state, Interpreter::normal_table(state), false, Rs); ++} + -+#endif // CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp -new file mode 100644 -index 00000000000..ae7ee4c5a44 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp -@@ -0,0 +1,171 @@ -+/* -+ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) { ++ // load next bytecode ++ load_unsigned_byte(t0, Address(xbcp, step)); ++ add(xbcp, xbcp, step); ++ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); ++} + -+#include "precompiled.hpp" -+#include "code/codeCache.hpp" -+#include "code/nativeInst.hpp" -+#include "gc/shared/barrierSetNMethod.hpp" -+#include "logging/log.hpp" -+#include "memory/resourceArea.hpp" -+#include "runtime/sharedRuntime.hpp" -+#include "runtime/registerMap.hpp" -+#include "runtime/thread.hpp" -+#include "utilities/align.hpp" -+#include "utilities/debug.hpp" ++void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { ++ // load current bytecode ++ lbu(t0, Address(xbcp, 0)); ++ dispatch_base(state, table); ++} + -+class NativeNMethodBarrier: public NativeInstruction { -+ address instruction_address() const { return addr_at(0); } ++// remove activation ++// ++// Unlock the receiver if this is a synchronized method. ++// Unlock any Java monitors from syncronized blocks. ++// Remove the activation from the stack. ++// ++// If there are locked Java monitors ++// If throw_monitor_exception ++// throws IllegalMonitorStateException ++// Else if install_monitor_exception ++// installs IllegalMonitorStateException ++// Else ++// no error processing ++void InterpreterMacroAssembler::remove_activation( ++ TosState state, ++ bool throw_monitor_exception, ++ bool install_monitor_exception, ++ bool notify_jvmdi) { ++ // Note: Registers x13 may be in use for the ++ // result check if synchronized method ++ Label unlocked, unlock, no_unlock; + -+ int *guard_addr() { -+ /* auipc + lwu + fence + lwu + beq + lui + addi + slli + addi + slli + jalr + j */ -+ return reinterpret_cast(instruction_address() + 12 * 4); -+ } ++ // get the value of _do_not_unlock_if_synchronized into x13 ++ const Address do_not_unlock_if_synchronized(xthread, ++ in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ lbu(x13, do_not_unlock_if_synchronized); ++ sb(zr, do_not_unlock_if_synchronized); // reset the flag + -+public: -+ int get_value() { -+ return Atomic::load_acquire(guard_addr()); -+ } ++ // get method access flags ++ ld(x11, Address(fp, frame::interpreter_frame_method_offset * wordSize)); ++ ld(x12, Address(x11, Method::access_flags_offset())); ++ andi(t0, x12, JVM_ACC_SYNCHRONIZED); ++ beqz(t0, unlocked); + -+ void set_value(int value) { -+ Atomic::release_store(guard_addr(), value); -+ } ++ // Don't unlock anything if the _do_not_unlock_if_synchronized flag ++ // is set. ++ bnez(x13, no_unlock); + -+ void verify() const; -+}; ++ // unlock monitor ++ push(state); // save result + -+// Store the instruction bitmask, bits and name for checking the barrier. -+struct CheckInsn { -+ uint32_t mask; -+ uint32_t bits; -+ const char *name; -+}; ++ // BasicObjectLock will be first in list, since this is a ++ // synchronized method. However, need to check that the object has ++ // not been unlocked by an explicit monitorexit bytecode. ++ const Address monitor(fp, frame::interpreter_frame_initial_sp_offset * ++ wordSize - (int) sizeof(BasicObjectLock)); ++ // We use c_rarg1 so that if we go slow path it will be the correct ++ // register for unlock_object to pass to VM directly ++ la(c_rarg1, monitor); // address of first monitor + -+static const struct CheckInsn barrierInsn[] = { -+ { 0x00000fff, 0x00000297, "auipc t0, 0 "}, -+ { 0x000fffff, 0x0002e283, "lwu t0, 48(t0) "}, -+ { 0xffffffff, 0x0aa0000f, "fence ir, ir "}, -+ { 0x000fffff, 0x000be303, "lwu t1, 112(xthread)"}, -+ { 0x01fff07f, 0x00628063, "beq t0, t1, skip "}, -+ { 0x00000fff, 0x000002b7, "lui t0, imm0 "}, -+ { 0x000fffff, 0x00028293, "addi t0, t0, imm1 "}, -+ { 0xffffffff, 0x00b29293, "slli t0, t0, 11 "}, -+ { 0x000fffff, 0x00028293, "addi t0, t0, imm2 "}, -+ { 0xffffffff, 0x00529293, "slli t0, t0, 5 "}, -+ { 0x000fffff, 0x000280e7, "jalr ra, imm3(t0) "}, -+ { 0x00000fff, 0x0000006f, "j skip "} -+ /* guard: */ -+ /* 32bit nmethod guard value */ -+ /* skip: */ -+}; ++ ld(x10, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); ++ bnez(x10, unlock); + -+// The encodings must match the instructions emitted by -+// BarrierSetAssembler::nmethod_entry_barrier. The matching ignores the specific -+// register numbers and immediate values in the encoding. -+void NativeNMethodBarrier::verify() const { -+ intptr_t addr = (intptr_t) instruction_address(); -+ for(unsigned int i = 0; i < sizeof(barrierInsn)/sizeof(struct CheckInsn); i++ ) { -+ uint32_t inst = *((uint32_t*) addr); -+ if ((inst & barrierInsn[i].mask) != barrierInsn[i].bits) { -+ tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", addr, inst); -+ fatal("not an %s instruction.", barrierInsn[i].name); ++ pop(state); ++ if (throw_monitor_exception) { ++ // Entry already unlocked, need to throw exception ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ should_not_reach_here(); ++ } else { ++ // Monitor already unlocked during a stack unroll. If requested, ++ // install an illegal_monitor_state_exception. Continue with ++ // stack unrolling. ++ if (install_monitor_exception) { ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::new_illegal_monitor_state_exception)); + } -+ addr += 4; ++ j(unlocked); + } -+} -+ + -+/* We're called from an nmethod when we need to deoptimize it. We do -+ this by throwing away the nmethod's frame and jumping to the -+ ic_miss stub. This looks like there has been an IC miss at the -+ entry of the nmethod, so we resolve the call, which will fall back -+ to the interpreter if the nmethod has been unloaded. */ -+void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) { ++ bind(unlock); ++ unlock_object(c_rarg1); ++ pop(state); + -+ typedef struct { -+ intptr_t *sp; intptr_t *fp; address ra; address pc; -+ } frame_pointers_t; ++ // Check that for block-structured locking (i.e., that all locked ++ // objects has been unlocked) ++ bind(unlocked); + -+ frame_pointers_t *new_frame = (frame_pointers_t *)(return_address_ptr - 5); ++ // x10: Might contain return value + -+ JavaThread *thread = JavaThread::current(); -+ RegisterMap reg_map(thread, false); -+ frame frame = thread->last_frame(); ++ // Check that all monitors are unlocked ++ { ++ Label loop, exception, entry, restart; ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ const Address monitor_block_top( ++ fp, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ const Address monitor_block_bot( ++ fp, frame::interpreter_frame_initial_sp_offset * wordSize); + -+ assert(frame.is_compiled_frame() || frame.is_native_frame(), "must be"); -+ assert(frame.cb() == nm, "must be"); -+ frame = frame.sender(®_map); ++ bind(restart); ++ // We use c_rarg1 so that if we go slow path it will be the correct ++ // register for unlock_object to pass to VM directly ++ ld(c_rarg1, monitor_block_top); // points to current entry, starting ++ // with top-most entry ++ la(x9, monitor_block_bot); // points to word before bottom of ++ // monitor block + -+ LogTarget(Trace, nmethod, barrier) out; -+ if (out.is_enabled()) { -+ ResourceMark mark; -+ log_trace(nmethod, barrier)("deoptimize(nmethod: %s(%p), return_addr: %p, osr: %d, thread: %p(%s), making rsp: %p) -> %p", -+ nm->method()->name_and_sig_as_C_string(), -+ nm, *(address *) return_address_ptr, nm->is_osr_method(), thread, -+ thread->name(), frame.sp(), nm->verified_entry_point()); -+ } ++ j(entry); + -+ new_frame->sp = frame.sp(); -+ new_frame->fp = frame.fp(); -+ new_frame->ra = frame.pc(); -+ new_frame->pc = SharedRuntime::get_handle_wrong_method_stub(); -+} ++ // Entry already locked, need to throw exception ++ bind(exception); + -+// This is the offset of the entry barrier from where the frame is completed. -+// If any code changes between the end of the verified entry where the entry -+// barrier resides, and the completion of the frame, then -+// NativeNMethodCmpBarrier::verify() will immediately complain when it does -+// not find the expected native instruction at this offset, which needs updating. -+// Note that this offset is invariant of PreserveFramePointer. ++ if (throw_monitor_exception) { ++ // Throw exception ++ MacroAssembler::call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime:: ++ throw_illegal_monitor_state_exception)); + -+// see BarrierSetAssembler::nmethod_entry_barrier -+// auipc + lwu + fence + lwu + beq + movptr_with_offset(5 instructions) + jalr + j + int32 -+static const int entry_barrier_offset = -4 * 13; ++ should_not_reach_here(); ++ } else { ++ // Stack unrolling. Unlock object and install illegal_monitor_exception. ++ // Unlock does not block, so don't have to worry about the frame. ++ // We don't have to preserve c_rarg1 since we are going to throw an exception. + -+static NativeNMethodBarrier* native_nmethod_barrier(nmethod* nm) { -+ address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset; -+ NativeNMethodBarrier* barrier = reinterpret_cast(barrier_address); -+ debug_only(barrier->verify()); -+ return barrier; -+} ++ push(state); ++ unlock_object(c_rarg1); ++ pop(state); + -+void BarrierSetNMethod::disarm(nmethod* nm) { -+ if (!supports_entry_barrier(nm)) { -+ return; -+ } ++ if (install_monitor_exception) { ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime:: ++ new_illegal_monitor_state_exception)); ++ } + -+ // Disarms the nmethod guard emitted by BarrierSetAssembler::nmethod_entry_barrier. -+ NativeNMethodBarrier* barrier = native_nmethod_barrier(nm); ++ j(restart); ++ } + -+ barrier->set_value(disarmed_value()); -+} ++ bind(loop); ++ // check if current entry is used ++ add(t0, c_rarg1, BasicObjectLock::obj_offset_in_bytes()); ++ ld(t0, Address(t0, 0)); ++ bnez(t0, exception); + -+bool BarrierSetNMethod::is_armed(nmethod* nm) { -+ if (!supports_entry_barrier(nm)) { -+ return false; ++ add(c_rarg1, c_rarg1, entry_size); // otherwise advance to next entry ++ bind(entry); ++ bne(c_rarg1, x9, loop); // check if bottom reached if not at bottom then check this entry + } + -+ NativeNMethodBarrier* barrier = native_nmethod_barrier(nm); -+ return barrier->get_value() != disarmed_value(); -+} -diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp -new file mode 100644 -index 00000000000..a419f92b5f6 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp -@@ -0,0 +1,111 @@ -+/* -+ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+#include "precompiled.hpp" -+#include "asm/macroAssembler.inline.hpp" -+#include "gc/shared/barrierSet.hpp" -+#include "gc/shared/cardTable.hpp" -+#include "gc/shared/cardTableBarrierSet.hpp" -+#include "gc/shared/cardTableBarrierSetAssembler.hpp" -+#include "gc/shared/gc_globals.hpp" -+#include "interpreter/interp_masm.hpp" -+ -+#define __ masm-> ++ bind(no_unlock); + ++ // jvmti support ++ if (notify_jvmdi) { ++ notify_method_exit(state, NotifyJVMTI); // preserve TOSCA + -+void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Register tmp) { -+ assert_cond(masm != NULL); -+ assert_different_registers(obj, tmp); -+ BarrierSet* bs = BarrierSet::barrier_set(); -+ assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind"); ++ } else { ++ notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA ++ } + -+ __ srli(obj, obj, CardTable::card_shift()); ++ // remove activation ++ // get sender esp ++ ld(t1, ++ Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); ++ if (StackReservedPages > 0) { ++ // testing if reserved zone needs to be re-enabled ++ Label no_reserved_zone_enabling; + -+ assert(CardTable::dirty_card_val() == 0, "must be"); ++ ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset())); ++ ble(t1, t0, no_reserved_zone_enabling); + -+ __ load_byte_map_base(tmp); -+ __ add(tmp, obj, tmp); ++ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), xthread); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_delayed_StackOverflowError)); ++ should_not_reach_here(); + -+ if (UseCondCardMark) { -+ Label L_already_dirty; -+ __ membar(MacroAssembler::StoreLoad); -+ __ lbu(t1, Address(tmp)); -+ __ beqz(t1, L_already_dirty); -+ __ sb(zr, Address(tmp)); -+ __ bind(L_already_dirty); -+ } else { -+ __ sb(zr, Address(tmp)); ++ bind(no_reserved_zone_enabling); + } -+} + -+void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, -+ Register start, Register count, Register tmp, RegSet saved_regs) { -+ assert_cond(masm != NULL); -+ assert_different_registers(start, tmp); -+ assert_different_registers(count, tmp); ++ // restore sender esp ++ mv(esp, t1); + -+ Label L_loop, L_done; -+ const Register end = count; ++ // remove frame anchor ++ leave(); ++ // If we're returning to interpreted code we will shortly be ++ // adjusting SP to allow some space for ESP. If we're returning to ++ // compiled code the saved sender SP was saved in sender_sp, so this ++ // restores it. ++ andi(sp, esp, -16); ++} + -+ __ beqz(count, L_done); // zero count - nothing to do -+ // end = start + count << LogBytesPerHeapOop -+ __ shadd(end, count, start, count, LogBytesPerHeapOop); -+ __ sub(end, end, BytesPerHeapOop); // last element address to make inclusive ++// Lock object ++// ++// Args: ++// c_rarg1: BasicObjectLock to be used for locking ++// ++// Kills: ++// x10 ++// c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs) ++// t0, t1 (temp regs) ++void InterpreterMacroAssembler::lock_object(Register lock_reg) ++{ ++ assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1"); ++ if (UseHeavyMonitors) { ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), ++ lock_reg); ++ } else { ++ Label done; + -+ __ srli(start, start, CardTable::card_shift()); -+ __ srli(end, end, CardTable::card_shift()); -+ __ sub(count, end, start); // number of bytes to copy ++ const Register swap_reg = x10; ++ const Register tmp = c_rarg2; ++ const Register obj_reg = c_rarg3; // Will contain the oop + -+ __ load_byte_map_base(tmp); -+ __ add(start, start, tmp); ++ const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); ++ const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); ++ const int mark_offset = lock_offset + ++ BasicLock::displaced_header_offset_in_bytes(); + -+ __ bind(L_loop); -+ __ add(tmp, start, count); -+ __ sb(zr, Address(tmp)); -+ __ sub(count, count, 1); -+ __ bgez(count, L_loop); -+ __ bind(L_done); -+} ++ Label slow_case; + -+void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2) { -+ bool in_heap = (decorators & IN_HEAP) != 0; -+ bool is_array = (decorators & IS_ARRAY) != 0; -+ bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; -+ bool precise = is_array || on_anonymous; ++ // Load object pointer into obj_reg c_rarg3 ++ ld(obj_reg, Address(lock_reg, obj_offset)); + -+ bool needs_post_barrier = val != noreg && in_heap; -+ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg); -+ if (needs_post_barrier) { -+ // flatten object address if needed -+ if (!precise || dst.offset() == 0) { -+ store_check(masm, dst.base(), x13); -+ } else { -+ assert_cond(masm != NULL); -+ __ la(x13, dst); -+ store_check(masm, x13, t0); ++ if (UseBiasedLocking) { ++ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case); + } -+ } -+} -diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp -new file mode 100644 -index 00000000000..686fe8fa478 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp -@@ -0,0 +1,42 @@ -+/* -+ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ + -+#ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP -+#define CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP ++ // Load (object->mark() | 1) into swap_reg ++ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); ++ ori(swap_reg, t0, 1); + -+#include "asm/macroAssembler.hpp" -+#include "gc/shared/modRefBarrierSetAssembler.hpp" ++ // Save (object->mark() | 1) into BasicLock's displaced header ++ sd(swap_reg, Address(lock_reg, mark_offset)); + -+class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler { -+protected: -+ void store_check(MacroAssembler* masm, Register obj, Register tmp); ++ assert(lock_offset == 0, ++ "displached header must be first word in BasicObjectLock"); + -+ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, -+ Register start, Register count, Register tmp, RegSet saved_regs); -+ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2); -+}; ++ if (PrintBiasedLockingStatistics) { ++ Label fail, fast; ++ cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, fast, &fail); ++ bind(fast); ++ atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), ++ t1, t0); ++ j(done); ++ bind(fail); ++ } else { ++ cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL); ++ } + -+#endif // #ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp -new file mode 100644 -index 00000000000..7aa2015f9ec ---- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp -@@ -0,0 +1,55 @@ -+/* -+ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ // Test if the oopMark is an obvious stack pointer, i.e., ++ // 1) (mark & 7) == 0, and ++ // 2) sp <= mark < mark + os::pagesize() ++ // ++ // These 3 tests can be done by evaluating the following ++ // expression: ((mark - sp) & (7 - os::vm_page_size())), ++ // assuming both stack pointer and pagesize have their ++ // least significant 3 bits clear. ++ // NOTE: the oopMark is in swap_reg x10 as the result of cmpxchg ++ sub(swap_reg, swap_reg, sp); ++ li(t0, (int64_t)(7 - os::vm_page_size())); ++ andr(swap_reg, swap_reg, t0); + -+#include "precompiled.hpp" -+#include "asm/macroAssembler.inline.hpp" -+#include "gc/shared/modRefBarrierSetAssembler.hpp" ++ // Save the test result, for recursive case, the result is zero ++ sd(swap_reg, Address(lock_reg, mark_offset)); + -+#define __ masm-> ++ if (PrintBiasedLockingStatistics) { ++ bnez(swap_reg, slow_case); ++ atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), ++ t1, t0); ++ } ++ beqz(swap_reg, done); + -+void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, -+ Register src, Register dst, Register count, RegSet saved_regs) { ++ bind(slow_case); + -+ if (is_oop) { -+ gen_write_ref_array_pre_barrier(masm, decorators, dst, count, saved_regs); -+ } -+} ++ // Call the runtime routine for slow case ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), ++ lock_reg); + -+void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, -+ Register start, Register count, Register tmp, -+ RegSet saved_regs) { -+ if (is_oop) { -+ gen_write_ref_array_post_barrier(masm, decorators, start, count, tmp, saved_regs); ++ bind(done); + } +} + -+void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2) { -+ if (is_reference_type(type)) { -+ oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2); ++ ++// Unlocks an object. Used in monitorexit bytecode and ++// remove_activation. Throws an IllegalMonitorException if object is ++// not locked by current thread. ++// ++// Args: ++// c_rarg1: BasicObjectLock for lock ++// ++// Kills: ++// x10 ++// c_rarg0, c_rarg1, c_rarg2, c_rarg3, ... (param regs) ++// t0, t1 (temp regs) ++void InterpreterMacroAssembler::unlock_object(Register lock_reg) ++{ ++ assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1"); ++ ++ if (UseHeavyMonitors) { ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), ++ lock_reg); + } else { -+ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); -+ } -+} -diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp -new file mode 100644 -index 00000000000..00419c3163c ---- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp -@@ -0,0 +1,55 @@ -+/* -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ Label done; + -+#ifndef CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP -+#define CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP ++ const Register swap_reg = x10; ++ const Register header_reg = c_rarg2; // Will contain the old oopMark ++ const Register obj_reg = c_rarg3; // Will contain the oop + -+#include "asm/macroAssembler.hpp" -+#include "gc/shared/barrierSetAssembler.hpp" ++ save_bcp(); // Save in case of exception + -+// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other -+// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected -+// accesses, which are overridden in the concrete BarrierSetAssembler. ++ // Convert from BasicObjectLock structure to object and BasicLock ++ // structure Store the BasicLock address into x10 ++ la(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes())); + -+class ModRefBarrierSetAssembler: public BarrierSetAssembler { -+protected: -+ virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, -+ Register addr, Register count, RegSet saved_regs) {} -+ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, -+ Register start, Register count, Register tmp, RegSet saved_regs) {} ++ // Load oop into obj_reg(c_rarg3) ++ ld(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); + -+ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2) = 0; ++ // Free entry ++ sd(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); + -+public: -+ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, -+ Register src, Register dst, Register count, RegSet saved_regs); -+ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, -+ Register start, Register count, Register tmp, RegSet saved_regs); -+ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2); -+}; ++ if (UseBiasedLocking) { ++ biased_locking_exit(obj_reg, header_reg, done); ++ } + -+#endif // CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp -new file mode 100644 -index 00000000000..cd568cc723f ---- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp -@@ -0,0 +1,117 @@ -+/* -+ * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ // Load the old header from BasicLock structure ++ ld(header_reg, Address(swap_reg, ++ BasicLock::displaced_header_offset_in_bytes())); + -+#include "precompiled.hpp" -+#include "c1/c1_LIRAssembler.hpp" -+#include "c1/c1_MacroAssembler.hpp" -+#include "gc/shared/gc_globals.hpp" -+#include "gc/shenandoah/shenandoahBarrierSet.hpp" -+#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" -+#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" ++ // Test for recursion ++ beqz(header_reg, done); + -+#define __ masm->masm()-> ++ // Atomic swap back the old header ++ cmpxchg_obj_header(swap_reg, header_reg, obj_reg, t0, done, /*fallthrough*/NULL); + -+void LIR_OpShenandoahCompareAndSwap::emit_code(LIR_Assembler* masm) { -+ Register addr = _addr->as_register_lo(); -+ Register newval = _new_value->as_register(); -+ Register cmpval = _cmp_value->as_register(); -+ Register tmp1 = _tmp1->as_register(); -+ Register tmp2 = _tmp2->as_register(); -+ Register result = result_opr()->as_register(); ++ // Call the runtime routine for slow case. ++ sd(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), ++ lock_reg); + -+ ShenandoahBarrierSet::assembler()->iu_barrier(masm->masm(), newval, t1); ++ bind(done); + -+ if (UseCompressedOops) { -+ __ encode_heap_oop(tmp1, cmpval); -+ cmpval = tmp1; -+ __ encode_heap_oop(tmp2, newval); -+ newval = tmp2; ++ restore_bcp(); + } -+ -+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /* acquire */ Assembler::aq, -+ /* release */ Assembler::rl, /* is_cae */ false, result); +} + -+#undef __ -+ -+#ifdef ASSERT -+#define __ gen->lir(__FILE__, __LINE__)-> -+#else -+#define __ gen->lir()-> -+#endif + -+LIR_Opr ShenandoahBarrierSetC1::atomic_cmpxchg_at_resolved(LIRAccess& access, LIRItem& cmp_value, LIRItem& new_value) { -+ BasicType bt = access.type(); -+ if (access.is_oop()) { -+ LIRGenerator *gen = access.gen(); -+ if (ShenandoahSATBBarrier) { -+ pre_barrier(gen, access.access_emit_info(), access.decorators(), access.resolved_addr(), -+ LIR_OprFact::illegalOpr /* pre_val */); -+ } -+ if (ShenandoahCASBarrier) { -+ cmp_value.load_item(); -+ new_value.load_item(); ++void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, ++ Label& zero_continue) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ ld(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); ++ beqz(mdp, zero_continue); ++} + -+ LIR_Opr tmp1 = gen->new_register(T_OBJECT); -+ LIR_Opr tmp2 = gen->new_register(T_OBJECT); -+ LIR_Opr addr = access.resolved_addr()->as_address_ptr()->base(); -+ LIR_Opr result = gen->new_register(T_INT); ++// Set the method data pointer for the current bcp. ++void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ Label set_mdp; ++ push_reg(0xc00, sp); // save x10, x11 + -+ __ append(new LIR_OpShenandoahCompareAndSwap(addr, cmp_value.result(), new_value.result(), tmp1, tmp2, result)); -+ return result; -+ } -+ } -+ return BarrierSetC1::atomic_cmpxchg_at_resolved(access, cmp_value, new_value); ++ // Test MDO to avoid the call if it is NULL. ++ ld(x10, Address(xmethod, in_bytes(Method::method_data_offset()))); ++ beqz(x10, set_mdp); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), xmethod, xbcp); ++ // x10: mdi ++ // mdo is guaranteed to be non-zero here, we checked for it before the call. ++ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset()))); ++ la(x11, Address(x11, in_bytes(MethodData::data_offset()))); ++ add(x10, x11, x10); ++ sd(x10, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); ++ bind(set_mdp); ++ pop_reg(0xc00, sp); +} + -+LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRItem& value) { -+ LIRGenerator* gen = access.gen(); -+ BasicType type = access.type(); ++void InterpreterMacroAssembler::verify_method_data_pointer() { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++#ifdef ASSERT ++ Label verify_continue; ++ add(sp, sp, -4 * wordSize); ++ sd(x10, Address(sp, 0)); ++ sd(x11, Address(sp, wordSize)); ++ sd(x12, Address(sp, 2 * wordSize)); ++ sd(x13, Address(sp, 3 * wordSize)); ++ test_method_data_pointer(x13, verify_continue); // If mdp is zero, continue ++ get_method(x11); + -+ LIR_Opr result = gen->new_register(type); -+ value.load_item(); -+ LIR_Opr value_opr = value.result(); ++ // If the mdp is valid, it will point to a DataLayout header which is ++ // consistent with the bcp. The converse is highly probable also. ++ lh(x12, Address(x13, in_bytes(DataLayout::bci_offset()))); ++ ld(t0, Address(x11, Method::const_offset())); ++ add(x12, x12, t0); ++ la(x12, Address(x12, ConstMethod::codes_offset())); ++ beq(x12, xbcp, verify_continue); ++ // x10: method ++ // xbcp: bcp // xbcp == 22 ++ // x13: mdp ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), ++ x11, xbcp, x13); ++ bind(verify_continue); ++ ld(x10, Address(sp, 0)); ++ ld(x11, Address(sp, wordSize)); ++ ld(x12, Address(sp, 2 * wordSize)); ++ ld(x13, Address(sp, 3 * wordSize)); ++ add(sp, sp, 4 * wordSize); ++#endif // ASSERT ++} + -+ if (access.is_oop()) { -+ value_opr = iu_barrier(access.gen(), value_opr, access.access_emit_info(), access.decorators()); -+ } + -+ assert(type == T_INT || is_reference_type(type) LP64_ONLY( || type == T_LONG ), "unexpected type"); -+ LIR_Opr tmp = gen->new_register(T_INT); -+ __ xchg(access.resolved_addr(), value_opr, result, tmp); ++void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, ++ int constant, ++ Register value) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ Address data(mdp_in, constant); ++ sd(value, data); ++} + -+ if (access.is_oop()) { -+ result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), access.decorators()); -+ LIR_Opr tmp_opr = gen->new_register(type); -+ __ move(result, tmp_opr); -+ result = tmp_opr; -+ if (ShenandoahSATBBarrier) { -+ pre_barrier(access.gen(), access.access_emit_info(), access.decorators(), LIR_OprFact::illegalOpr, -+ result /* pre_val */); -+ } -+ } + -+ return result; ++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, ++ int constant, ++ bool decrement) { ++ increment_mdp_data_at(mdp_in, noreg, constant, decrement); +} -diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp -new file mode 100644 -index 00000000000..d0ac6e52436 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp -@@ -0,0 +1,712 @@ -+/* -+ * Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+#include "precompiled.hpp" -+#include "gc/shenandoah/shenandoahBarrierSet.hpp" -+#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" -+#include "gc/shenandoah/shenandoahForwarding.hpp" -+#include "gc/shenandoah/shenandoahHeap.inline.hpp" -+#include "gc/shenandoah/shenandoahHeapRegion.hpp" -+#include "gc/shenandoah/shenandoahRuntime.hpp" -+#include "gc/shenandoah/shenandoahThreadLocalData.hpp" -+#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" -+#include "interpreter/interpreter.hpp" -+#include "interpreter/interp_masm.hpp" -+#include "runtime/sharedRuntime.hpp" -+#include "runtime/thread.hpp" -+#ifdef COMPILER1 -+#include "c1/c1_LIRAssembler.hpp" -+#include "c1/c1_MacroAssembler.hpp" -+#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" -+#endif + -+#define __ masm-> ++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, ++ Register reg, ++ int constant, ++ bool decrement) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ // %%% this does 64bit counters at best it is wasting space ++ // at worst it is a rare bug when counters overflow + -+void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, -+ Register src, Register dst, Register count, RegSet saved_regs) { -+ if (is_oop) { -+ bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; -+ if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahIUBarrier || ShenandoahLoadRefBarrier) { ++ assert_different_registers(t1, t0, mdp_in, reg); + -+ Label done; ++ Address addr1(mdp_in, constant); ++ Address addr2(t1, 0); ++ Address &addr = addr1; ++ if (reg != noreg) { ++ la(t1, addr1); ++ add(t1, t1, reg); ++ addr = addr2; ++ } + -+ // Avoid calling runtime if count == 0 -+ __ beqz(count, done); ++ if (decrement) { ++ ld(t0, addr); ++ addi(t0, t0, -DataLayout::counter_increment); ++ Label L; ++ bltz(t0, L); // skip store if counter underflow ++ sd(t0, addr); ++ bind(L); ++ } else { ++ assert(DataLayout::counter_increment == 1, ++ "flow-free idiom only works with 1"); ++ ld(t0, addr); ++ addi(t0, t0, DataLayout::counter_increment); ++ Label L; ++ blez(t0, L); // skip store if counter overflow ++ sd(t0, addr); ++ bind(L); ++ } ++} + -+ // Is GC active? -+ Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); -+ assert_different_registers(src, dst, count, t0); ++void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, ++ int flag_byte_constant) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ int flags_offset = in_bytes(DataLayout::flags_offset()); ++ // Set the flag ++ lbu(t1, Address(mdp_in, flags_offset)); ++ ori(t1, t1, flag_byte_constant); ++ sb(t1, Address(mdp_in, flags_offset)); ++} + -+ __ lbu(t0, gc_state); -+ if (ShenandoahSATBBarrier && dest_uninitialized) { -+ __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED); -+ __ beqz(t0, done); -+ } else { -+ __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING); -+ __ beqz(t0, done); -+ } + -+ __ push_reg(saved_regs, sp); -+ if (UseCompressedOops) { -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry), -+ src, dst, count); -+ } else { -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), src, dst, count); -+ } -+ __ pop_reg(saved_regs, sp); -+ __ bind(done); -+ } ++void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, ++ int offset, ++ Register value, ++ Register test_value_out, ++ Label& not_equal_continue) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ if (test_value_out == noreg) { ++ ld(t1, Address(mdp_in, offset)); ++ bne(value, t1, not_equal_continue); ++ } else { ++ // Put the test value into a register, so caller can use it: ++ ld(test_value_out, Address(mdp_in, offset)); ++ bne(value, test_value_out, not_equal_continue); + } +} + -+void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm, -+ Register obj, -+ Register pre_val, -+ Register thread, -+ Register tmp, -+ bool tosca_live, -+ bool expand_call) { -+ if (ShenandoahSATBBarrier) { -+ satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call); -+ } ++ ++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, ++ int offset_of_disp) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ ld(t1, Address(mdp_in, offset_of_disp)); ++ add(mdp_in, mdp_in, t1); ++ sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); +} + -+void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, -+ Register obj, -+ Register pre_val, -+ Register thread, -+ Register tmp, -+ bool tosca_live, -+ bool expand_call) { -+ // If expand_call is true then we expand the call_VM_leaf macro -+ // directly to skip generating the check by -+ // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. -+ assert(thread == xthread, "must be"); ++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, ++ Register reg, ++ int offset_of_disp) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ add(t1, mdp_in, reg); ++ ld(t1, Address(t1, offset_of_disp)); ++ add(mdp_in, mdp_in, t1); ++ sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); ++} + -+ Label done; -+ Label runtime; + -+ assert_different_registers(obj, pre_val, tmp, t0); -+ assert(pre_val != noreg && tmp != noreg, "expecting a register"); ++void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, ++ int constant) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ addi(mdp_in, mdp_in, (unsigned)constant); ++ sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); ++} + -+ Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset())); -+ Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); -+ Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); + -+ // Is marking active? -+ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { -+ __ lwu(tmp, in_progress); -+ } else { -+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); -+ __ lbu(tmp, in_progress); -+ } -+ __ beqz(tmp, done); ++void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); + -+ // Do we need to load the previous value? -+ if (obj != noreg) { -+ __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); -+ } ++ // save/restore across call_VM ++ addi(sp, sp, -2 * wordSize); ++ sd(zr, Address(sp, 0)); ++ sd(return_bci, Address(sp, wordSize)); ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), ++ return_bci); ++ ld(zr, Address(sp, 0)); ++ ld(return_bci, Address(sp, wordSize)); ++ addi(sp, sp, 2 * wordSize); ++} + -+ // Is the previous value null? -+ __ beqz(pre_val, done); ++void InterpreterMacroAssembler::profile_taken_branch(Register mdp, ++ Register bumped_count) { ++ if (ProfileInterpreter) { ++ Label profile_continue; + -+ // Can we store original value in the thread's buffer? -+ // Is index == 0? -+ // (The index field is typed as size_t.) -+ __ ld(tmp, index); // tmp := *index_adr -+ __ beqz(tmp, runtime); // tmp == 0? If yes, goto runtime ++ // If no method data exists, go to profile_continue. ++ // Otherwise, assign to mdp ++ test_method_data_pointer(mdp, profile_continue); + -+ __ sub(tmp, tmp, wordSize); // tmp := tmp - wordSize -+ __ sd(tmp, index); // *index_adr := tmp -+ __ ld(t0, buffer); -+ __ add(tmp, tmp, t0); // tmp := tmp + *buffer_adr ++ // We are taking a branch. Increment the taken count. ++ Address data(mdp, in_bytes(JumpData::taken_offset())); ++ ld(bumped_count, data); ++ assert(DataLayout::counter_increment == 1, ++ "flow-free idiom only works with 1"); ++ addi(bumped_count, bumped_count, DataLayout::counter_increment); ++ Label L; ++ // eg: bumped_count=0x7fff ffff ffff ffff + 1 < 0. so we use <= 0; ++ blez(bumped_count, L); // skip store if counter overflow, ++ sd(bumped_count, data); ++ bind(L); ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset())); ++ bind(profile_continue); ++ } ++} + -+ // Record the previous value -+ __ sd(pre_val, Address(tmp, 0)); -+ __ j(done); ++void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; + -+ __ bind(runtime); -+ // save the live input values -+ RegSet saved = RegSet::of(pre_val); -+ if (tosca_live) saved += RegSet::of(x10); -+ if (obj != noreg) saved += RegSet::of(obj); ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); + -+ __ push_reg(saved, sp); ++ // We are taking a branch. Increment the not taken count. ++ increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset())); + -+ // Calling the runtime using the regular call_VM_leaf mechanism generates -+ // code (generated by InterpreterMacroAssember::call_VM_leaf_base) -+ // that checks that the *(rfp+frame::interpreter_frame_last_sp) == NULL. -+ // -+ // If we care generating the pre-barrier without a frame (e.g. in the -+ // intrinsified Reference.get() routine) then ebp might be pointing to -+ // the caller frame and so this check will most likely fail at runtime. -+ // -+ // Expanding the call directly bypasses the generation of the check. -+ // So when we do not have have a full interpreter frame on the stack -+ // expand_call should be passed true. -+ if (expand_call) { -+ assert(pre_val != c_rarg1, "smashed arg"); -+ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); -+ } else { -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); ++ // The method data pointer needs to be updated to correspond to ++ // the next bytecode ++ update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); ++ bind(profile_continue); + } ++} + -+ __ pop_reg(saved, sp); ++void InterpreterMacroAssembler::profile_call(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; + -+ __ bind(done); -+} ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); + -+void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) { -+ assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled"); ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + -+ Label is_null; -+ __ beqz(dst, is_null); -+ resolve_forward_pointer_not_null(masm, dst, tmp); -+ __ bind(is_null); ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); ++ bind(profile_continue); ++ } +} + -+// IMPORTANT: This must preserve all registers, even t0 and t1, except those explicitely -+// passed in. -+void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) { -+ assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled"); -+ // The below loads the mark word, checks if the lowest two bits are -+ // set, and if so, clear the lowest two bits and copy the result -+ // to dst. Otherwise it leaves dst alone. -+ // Implementing this is surprisingly awkward. I do it here by: -+ // - Inverting the mark word -+ // - Test lowest two bits == 0 -+ // - If so, set the lowest two bits -+ // - Invert the result back, and copy to dst -+ RegSet saved_regs = RegSet::of(t2); -+ bool borrow_reg = (tmp == noreg); -+ if (borrow_reg) { -+ // No free registers available. Make one useful. -+ tmp = t0; -+ if (tmp == dst) { -+ tmp = t1; -+ } -+ saved_regs += RegSet::of(tmp); -+ } ++void InterpreterMacroAssembler::profile_final_call(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; + -+ assert_different_registers(tmp, dst, t2); -+ __ push_reg(saved_regs, sp); ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); + -+ Label done; -+ __ ld(tmp, Address(dst, oopDesc::mark_offset_in_bytes())); -+ __ xori(tmp, tmp, -1); // eon with 0 is equivalent to XOR with -1 -+ __ andi(t2, tmp, markWord::lock_mask_in_place); -+ __ bnez(t2, done); -+ __ ori(tmp, tmp, markWord::marked_value); -+ __ xori(dst, tmp, -1); // eon with 0 is equivalent to XOR with -1 -+ __ bind(done); ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + -+ __ pop_reg(saved_regs, sp); ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, ++ in_bytes(VirtualCallData:: ++ virtual_call_data_size())); ++ bind(profile_continue); ++ } +} + -+void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, -+ Register dst, -+ Address load_addr, -+ DecoratorSet decorators) { -+ assert(ShenandoahLoadRefBarrier, "Should be enabled"); -+ assert(dst != t1 && load_addr.base() != t1, "need t1"); -+ assert_different_registers(load_addr.base(), t0, t1); -+ -+ bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); -+ bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); -+ bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); -+ bool is_native = ShenandoahBarrierSet::is_native_access(decorators); -+ bool is_narrow = UseCompressedOops && !is_native; + -+ Label heap_stable, not_cset; -+ __ enter(); -+ Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); -+ __ lbu(t1, gc_state); ++void InterpreterMacroAssembler::profile_virtual_call(Register receiver, ++ Register mdp, ++ Register reg2, ++ bool receiver_can_be_null) { ++ if (ProfileInterpreter) { ++ Label profile_continue; + -+ // Check for heap stability -+ if (is_strong) { -+ __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED); -+ __ beqz(t1, heap_stable); -+ } else { -+ Label lrb; -+ __ andi(t0, t1, ShenandoahHeap::WEAK_ROOTS); -+ __ bnez(t0, lrb); -+ __ andi(t0, t1, ShenandoahHeap::HAS_FORWARDED); -+ __ beqz(t0, heap_stable); -+ __ bind(lrb); -+ } ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); + -+ // use x11 for load address -+ Register result_dst = dst; -+ if (dst == x11) { -+ __ mv(t1, dst); -+ dst = t1; -+ } ++ Label skip_receiver_profile; ++ if (receiver_can_be_null) { ++ Label not_null; ++ // We are making a call. Increment the count for null receiver. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ j(skip_receiver_profile); ++ bind(not_null); ++ } + -+ // Save x10 and x11, unless it is an output register -+ RegSet saved_regs = RegSet::of(x10, x11) - result_dst; -+ __ push_reg(saved_regs, sp); -+ __ la(x11, load_addr); -+ __ mv(x10, dst); ++ // Record the receiver type. ++ record_klass_in_profile(receiver, mdp, reg2, true); ++ bind(skip_receiver_profile); + -+ // Test for in-cset -+ if (is_strong) { -+ __ li(t1, (uint64_t)ShenandoahHeap::in_cset_fast_test_addr()); -+ __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint()); -+ __ add(t1, t1, t0); -+ __ lbu(t1, Address(t1)); -+ __ andi(t0, t1, 1); -+ __ beqz(t0, not_cset); -+ } ++ // The method data pointer needs to be updated to reflect the new target. + -+ __ push_call_clobbered_registers(); -+ if (is_strong) { -+ if (is_narrow) { -+ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow); -+ } else { -+ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong); -+ } -+ } else if (is_weak) { -+ if (is_narrow) { -+ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow); -+ } else { -+ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak); -+ } -+ } else { -+ assert(is_phantom, "only remaining strength"); -+ assert(!is_narrow, "phantom access cannot be narrow"); -+ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak); ++ update_mdp_by_constant(mdp, ++ in_bytes(VirtualCallData:: ++ virtual_call_data_size())); ++ bind(profile_continue); + } -+ __ jalr(ra); -+ __ mv(t0, x10); -+ __ pop_call_clobbered_registers(); -+ __ mv(x10, t0); -+ __ bind(not_cset); -+ __ mv(result_dst, x10); -+ __ pop_reg(saved_regs, sp); -+ -+ __ bind(heap_stable); -+ __ leave(); +} + -+void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register dst, Register tmp) { -+ if (ShenandoahIUBarrier) { -+ __ push_call_clobbered_registers(); ++// This routine creates a state machine for updating the multi-row ++// type profile at a virtual call site (or other type-sensitive bytecode). ++// The machine visits each row (of receiver/count) until the receiver type ++// is found, or until it runs out of rows. At the same time, it remembers ++// the location of the first empty row. (An empty row records null for its ++// receiver, and can be allocated for a newly-observed receiver type.) ++// Because there are two degrees of freedom in the state, a simple linear ++// search will not work; it must be a decision tree. Hence this helper ++// function is recursive, to generate the required tree structured code. ++// It's the interpreter, so we are trading off code space for speed. ++// See below for example code. ++void InterpreterMacroAssembler::record_klass_in_profile_helper( ++ Register receiver, Register mdp, ++ Register reg2, ++ Label& done, bool is_virtual_call) { ++ if (TypeProfileWidth == 0) { ++ if (is_virtual_call) { ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ } + -+ satb_write_barrier_pre(masm, noreg, dst, xthread, tmp, true, false); ++ } else { ++ int non_profiled_offset = -1; ++ if (is_virtual_call) { ++ non_profiled_offset = in_bytes(CounterData::count_offset()); ++ } + -+ __ pop_call_clobbered_registers(); ++ record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth, ++ &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset); + } +} + -+// -+// Arguments: -+// -+// Inputs: -+// src: oop location to load from, might be clobbered -+// -+// Output: -+// dst: oop loaded from src location -+// -+// Kill: -+// x30 (tmp reg) -+// -+// Alias: -+// dst: x30 (might use x30 as temporary output register to avoid clobbering src) -+// -+void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, -+ DecoratorSet decorators, -+ BasicType type, -+ Register dst, -+ Address src, -+ Register tmp1, -+ Register tmp_thread) { -+ // 1: non-reference load, no additional barrier is needed -+ if (!is_reference_type(type)) { -+ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); -+ return; -+ } -+ -+ // 2: load a reference from src location and apply LRB if needed -+ if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) { -+ Register result_dst = dst; ++void InterpreterMacroAssembler::record_item_in_profile_helper( ++ Register item, Register mdp, Register reg2, int start_row, Label& done, int total_rows, ++ OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, int non_profiled_offset) { ++ int last_row = total_rows - 1; ++ assert(start_row <= last_row, "must be work left to do"); ++ // Test this row for both the item and for null. ++ // Take any of three different outcomes: ++ // 1. found item => increment count and goto done ++ // 2. found null => keep looking for case 1, maybe allocate this cell ++ // 3. found something else => keep looking for cases 1 and 2 ++ // Case 3 is handled by a recursive call. ++ for (int row = start_row; row <= last_row; row++) { ++ Label next_test; ++ bool test_for_null_also = (row == start_row); + -+ // Preserve src location for LRB -+ RegSet saved_regs; -+ if (dst == src.base()) { -+ dst = (src.base() == x28) ? x29 : x28; -+ saved_regs = RegSet::of(dst); -+ __ push_reg(saved_regs, sp); -+ } -+ assert_different_registers(dst, src.base()); ++ // See if the item is item[n]. ++ int item_offset = in_bytes(item_offset_fn(row)); ++ test_mdp_data_at(mdp, item_offset, item, ++ (test_for_null_also ? reg2 : noreg), ++ next_test); ++ // (Reg2 now contains the item from the CallData.) + -+ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); ++ // The item is item[n]. Increment count[n]. ++ int count_offset = in_bytes(item_count_offset_fn(row)); ++ increment_mdp_data_at(mdp, count_offset); ++ j(done); ++ bind(next_test); + -+ load_reference_barrier(masm, dst, src, decorators); ++ if (test_for_null_also) { ++ Label found_null; ++ // Failed the equality check on item[n]... Test for null. ++ if (start_row == last_row) { ++ // The only thing left to do is handle the null case. ++ if (non_profiled_offset >= 0) { ++ beqz(reg2, found_null); ++ // Item did not match any saved item and there is no empty row for it. ++ // Increment total counter to indicate polymorphic case. ++ increment_mdp_data_at(mdp, non_profiled_offset); ++ j(done); ++ bind(found_null); ++ } else { ++ bnez(reg2, done); ++ } ++ break; ++ } ++ // Since null is rare, make it be the branch-taken case. ++ beqz(reg2, found_null); + -+ if (dst != result_dst) { -+ __ mv(result_dst, dst); -+ dst = result_dst; -+ } ++ // Put all the "Case 3" tests here. ++ record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows, ++ item_offset_fn, item_count_offset_fn, non_profiled_offset); + -+ if (saved_regs.bits() != 0) { -+ __ pop_reg(saved_regs, sp); ++ // Found a null. Keep searching for a matching item, ++ // but remember that this is an empty (unused) slot. ++ bind(found_null); + } -+ } else { -+ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); + } + -+ // 3: apply keep-alive barrier if needed -+ if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) { -+ __ enter(); -+ __ push_call_clobbered_registers(); -+ satb_write_barrier_pre(masm /* masm */, -+ noreg /* obj */, -+ dst /* pre_val */, -+ xthread /* thread */, -+ tmp1 /* tmp */, -+ true /* tosca_live */, -+ true /* expand_call */); -+ __ pop_call_clobbered_registers(); -+ __ leave(); ++ // In the fall-through case, we found no matching item, but we ++ // observed the item[start_row] is NULL. ++ // Fill in the item field and increment the count. ++ int item_offset = in_bytes(item_offset_fn(start_row)); ++ set_mdp_data_at(mdp, item_offset, item); ++ int count_offset = in_bytes(item_count_offset_fn(start_row)); ++ mv(reg2, DataLayout::counter_increment); ++ set_mdp_data_at(mdp, count_offset, reg2); ++ if (start_row > 0) { ++ j(done); + } +} + -+void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2) { -+ bool on_oop = is_reference_type(type); -+ if (!on_oop) { -+ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); -+ return; -+ } ++// Example state machine code for three profile rows: ++// # main copy of decision tree, rooted at row[1] ++// if (row[0].rec == rec) then [ ++// row[0].incr() ++// goto done ++// ] ++// if (row[0].rec != NULL) then [ ++// # inner copy of decision tree, rooted at row[1] ++// if (row[1].rec == rec) then [ ++// row[1].incr() ++// goto done ++// ] ++// if (row[1].rec != NULL) then [ ++// # degenerate decision tree, rooted at row[2] ++// if (row[2].rec == rec) then [ ++// row[2].incr() ++// goto done ++// ] ++// if (row[2].rec != NULL) then [ ++// count.incr() ++// goto done ++// ] # overflow ++// row[2].init(rec) ++// goto done ++// ] else [ ++// # remember row[1] is empty ++// if (row[2].rec == rec) then [ ++// row[2].incr() ++// goto done ++// ] ++// row[1].init(rec) ++// goto done ++// ] ++// else [ ++// # remember row[0] is empty ++// if (row[1].rec == rec) then [ ++// row[1].incr() ++// goto done ++// ] ++// if (row[2].rec == rec) then [ ++// row[2].incr() ++// goto done ++// ] ++// row[0].init(rec) ++// goto done ++// ] ++// done: + -+ // flatten object address if needed -+ if (dst.offset() == 0) { -+ if (dst.base() != x13) { -+ __ mv(x13, dst.base()); -+ } -+ } else { -+ __ la(x13, dst); -+ } ++void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, ++ Register mdp, Register reg2, ++ bool is_virtual_call) { ++ assert(ProfileInterpreter, "must be profiling"); ++ Label done; + -+ shenandoah_write_barrier_pre(masm, -+ x13 /* obj */, -+ tmp2 /* pre_val */, -+ xthread /* thread */, -+ tmp1 /* tmp */, -+ val != noreg /* tosca_live */, -+ false /* expand_call */); ++ record_klass_in_profile_helper(receiver, mdp, reg2, done, is_virtual_call); + -+ if (val == noreg) { -+ BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg); -+ } else { -+ iu_barrier(masm, val, tmp1); -+ // G1 barrier needs uncompressed oop for region cross check. -+ Register new_val = val; -+ if (UseCompressedOops) { -+ new_val = t1; -+ __ mv(new_val, val); -+ } -+ BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg); -+ } ++ bind(done); +} + -+void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, -+ Register obj, Register tmp, Label& slowpath) { -+ Label done; -+ // Resolve jobject -+ BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath); -+ -+ // Check for null. -+ __ beqz(obj, done); ++void InterpreterMacroAssembler::profile_ret(Register return_bci, Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; + -+ assert(obj != t1, "need t1"); -+ Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset()); -+ __ lbu(t1, gc_state); ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); + -+ // Check for heap in evacuation phase -+ __ andi(t0, t1, ShenandoahHeap::EVACUATION); -+ __ bnez(t0, slowpath); ++ // Update the total ret count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + -+ __ bind(done); -+} -+ -+// Special Shenandoah CAS implementation that handles false negatives due -+// to concurrent evacuation. The service is more complex than a -+// traditional CAS operation because the CAS operation is intended to -+// succeed if the reference at addr exactly matches expected or if the -+// reference at addr holds a pointer to a from-space object that has -+// been relocated to the location named by expected. There are two -+// races that must be addressed: -+// a) A parallel thread may mutate the contents of addr so that it points -+// to a different object. In this case, the CAS operation should fail. -+// b) A parallel thread may heal the contents of addr, replacing a -+// from-space pointer held in addr with the to-space pointer -+// representing the new location of the object. -+// Upon entry to cmpxchg_oop, it is assured that new_val equals NULL -+// or it refers to an object that is not being evacuated out of -+// from-space, or it refers to the to-space version of an object that -+// is being evacuated out of from-space. -+// -+// By default the value held in the result register following execution -+// of the generated code sequence is 0 to indicate failure of CAS, -+// non-zero to indicate success. If is_cae, the result is the value most -+// recently fetched from addr rather than a boolean success indicator. -+// -+// Clobbers t0, t1 -+void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, -+ Register addr, -+ Register expected, -+ Register new_val, -+ Assembler::Aqrl acquire, -+ Assembler::Aqrl release, -+ bool is_cae, -+ Register result) { -+ bool is_narrow = UseCompressedOops; -+ Assembler::operand_size size = is_narrow ? Assembler::uint32 : Assembler::int64; -+ -+ assert_different_registers(addr, expected, t0, t1); -+ assert_different_registers(addr, new_val, t0, t1); -+ -+ Label retry, success, fail, done; ++ for (uint row = 0; row < RetData::row_limit(); row++) { ++ Label next_test; + -+ __ bind(retry); ++ // See if return_bci is equal to bci[n]: ++ test_mdp_data_at(mdp, ++ in_bytes(RetData::bci_offset(row)), ++ return_bci, noreg, ++ next_test); + -+ // Step1: Try to CAS. -+ __ cmpxchg(addr, expected, new_val, size, acquire, release, /* result */ t1); ++ // return_bci is equal to bci[n]. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row))); + -+ // If success, then we are done. -+ __ beq(expected, t1, success); ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_offset(mdp, ++ in_bytes(RetData::bci_displacement_offset(row))); ++ j(profile_continue); ++ bind(next_test); ++ } + -+ // Step2: CAS failed, check the forwared pointer. -+ __ mv(t0, t1); ++ update_mdp_for_ret(return_bci); + -+ if (is_narrow) { -+ __ decode_heap_oop(t0, t0); ++ bind(profile_continue); + } -+ resolve_forward_pointer(masm, t0); -+ -+ __ encode_heap_oop(t0, t0); ++} + -+ // Report failure when the forwarded oop was not expected. -+ __ bne(t0, expected, fail); ++void InterpreterMacroAssembler::profile_null_seen(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; + -+ // Step 3: CAS again using the forwarded oop. -+ __ cmpxchg(addr, t1, new_val, size, acquire, release, /* result */ t0); ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); + -+ // Retry when failed. -+ __ bne(t0, t1, retry); ++ set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); + -+ __ bind(success); -+ if (is_cae) { -+ __ mv(result, expected); -+ } else { -+ __ addi(result, zr, 1); -+ } -+ __ j(done); ++ // The method data pointer needs to be updated. ++ int mdp_delta = in_bytes(BitData::bit_data_size()); ++ if (TypeProfileCasts) { ++ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); ++ } ++ update_mdp_by_constant(mdp, mdp_delta); + -+ __ bind(fail); -+ if (is_cae) { -+ __ mv(result, t0); -+ } else { -+ __ mv(result, zr); ++ bind(profile_continue); + } -+ -+ __ bind(done); +} + -+#undef __ -+ -+#ifdef COMPILER1 -+ -+#define __ ce->masm()-> ++void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) { ++ if (ProfileInterpreter && TypeProfileCasts) { ++ Label profile_continue; + -+void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) { -+ ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); -+ // At this point we know that marking is in progress. -+ // If do_load() is true then we have to emit the -+ // load of the previous value; otherwise it has already -+ // been loaded into _pre_val. -+ __ bind(*stub->entry()); ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); + -+ assert(stub->pre_val()->is_register(), "Precondition."); ++ int count_offset = in_bytes(CounterData::count_offset()); ++ // Back up the address, since we have already bumped the mdp. ++ count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); + -+ Register pre_val_reg = stub->pre_val()->as_register(); ++ // *Decrement* the counter. We expect to see zero or small negatives. ++ increment_mdp_data_at(mdp, count_offset, true); + -+ if (stub->do_load()) { -+ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */); ++ bind (profile_continue); + } -+ __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true); -+ ce->store_parameter(stub->pre_val()->as_register(), 0); -+ __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); -+ __ j(*stub->continuation()); +} + -+void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, -+ ShenandoahLoadReferenceBarrierStub* stub) { -+ ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); -+ __ bind(*stub->entry()); ++void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) { ++ if (ProfileInterpreter) { ++ Label profile_continue; + -+ DecoratorSet decorators = stub->decorators(); -+ bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); -+ bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); -+ bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); -+ bool is_native = ShenandoahBarrierSet::is_native_access(decorators); ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); + -+ Register obj = stub->obj()->as_register(); -+ Register res = stub->result()->as_register(); -+ Register addr = stub->addr()->as_pointer_register(); -+ Register tmp1 = stub->tmp1()->as_register(); -+ Register tmp2 = stub->tmp2()->as_register(); ++ // The method data pointer needs to be updated. ++ int mdp_delta = in_bytes(BitData::bit_data_size()); ++ if (TypeProfileCasts) { ++ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + -+ assert(res == x10, "result must arrive in x10"); -+ assert_different_registers(tmp1, tmp2, t0); ++ // Record the object type. ++ record_klass_in_profile(klass, mdp, reg2, false); ++ } ++ update_mdp_by_constant(mdp, mdp_delta); + -+ if (res != obj) { -+ __ mv(res, obj); ++ bind(profile_continue); + } ++} + -+ if (is_strong) { -+ // Check for object in cset. -+ __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr()); -+ __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint()); -+ __ add(tmp2, tmp2, tmp1); -+ __ lbu(tmp2, Address(tmp2)); -+ __ beqz(tmp2, *stub->continuation(), true /* is_far */); -+ } ++void InterpreterMacroAssembler::profile_switch_default(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; + -+ ce->store_parameter(res, 0); -+ ce->store_parameter(addr, 1); ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); + -+ if (is_strong) { -+ if (is_native) { -+ __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin())); -+ } else { -+ __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin())); -+ } -+ } else if (is_weak) { -+ __ far_call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin())); -+ } else { -+ assert(is_phantom, "only remaining strength"); -+ __ far_call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin())); -+ } ++ // Update the default case count ++ increment_mdp_data_at(mdp, ++ in_bytes(MultiBranchData::default_count_offset())); + -+ __ j(*stub->continuation()); ++ // The method data pointer needs to be updated. ++ update_mdp_by_offset(mdp, ++ in_bytes(MultiBranchData:: ++ default_displacement_offset())); ++ ++ bind(profile_continue); ++ } +} + -+#undef __ ++void InterpreterMacroAssembler::profile_switch_case(Register index, ++ Register mdp, ++ Register reg2) { ++ if (ProfileInterpreter) { ++ Label profile_continue; + -+#define __ sasm-> ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); + -+void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { -+ __ prologue("shenandoah_pre_barrier", false); ++ // Build the base (index * per_case_size_in_bytes()) + ++ // case_array_offset_in_bytes() ++ mvw(reg2, in_bytes(MultiBranchData::per_case_size())); ++ mvw(t0, in_bytes(MultiBranchData::case_array_offset())); ++ Assembler::mul(index, index, reg2); ++ Assembler::add(index, index, t0); + -+ // arg0 : previous value of memory ++ // Update the case count ++ increment_mdp_data_at(mdp, ++ index, ++ in_bytes(MultiBranchData::relative_count_offset())); + -+ BarrierSet* bs = BarrierSet::barrier_set(); ++ // The method data pointer need to be updated. ++ update_mdp_by_offset(mdp, ++ index, ++ in_bytes(MultiBranchData:: ++ relative_displacement_offset())); + -+ const Register pre_val = x10; -+ const Register thread = xthread; -+ const Register tmp = t0; ++ bind(profile_continue); ++ } ++} + -+ Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); -+ Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); ++void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ; } + -+ Label done; -+ Label runtime; ++void InterpreterMacroAssembler::notify_method_entry() { ++ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to ++ // track stack depth. If it is possible to enter interp_only_mode we add ++ // the code to check if the event should be sent. ++ if (JvmtiExport::can_post_interpreter_events()) { ++ Label L; ++ lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset())); ++ beqz(x13, L); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_method_entry)); ++ bind(L); ++ } + -+ // Is marking still active? -+ Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); -+ __ lb(tmp, gc_state); -+ __ andi(tmp, tmp, ShenandoahHeap::MARKING); -+ __ beqz(tmp, done); ++ { ++ SkipIfEqual skip(this, &DTraceMethodProbes, false); ++ get_method(c_rarg1); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), ++ xthread, c_rarg1); ++ } + -+ // Can we store original value in the thread's buffer? -+ __ ld(tmp, queue_index); -+ __ beqz(tmp, runtime); ++ // RedefineClasses() tracing support for obsolete method entry ++ if (log_is_enabled(Trace, redefine, class, obsolete)) { ++ get_method(c_rarg1); ++ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), ++ xthread, c_rarg1); ++ } ++} + -+ __ sub(tmp, tmp, wordSize); -+ __ sd(tmp, queue_index); -+ __ ld(t1, buffer); -+ __ add(tmp, tmp, t1); -+ __ load_parameter(0, t1); -+ __ sd(t1, Address(tmp, 0)); -+ __ j(done); + -+ __ bind(runtime); -+ __ push_call_clobbered_registers(); -+ __ load_parameter(0, pre_val); -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); -+ __ pop_call_clobbered_registers(); -+ __ bind(done); ++void InterpreterMacroAssembler::notify_method_exit( ++ TosState state, NotifyMethodExitMode mode) { ++ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to ++ // track stack depth. If it is possible to enter interp_only_mode we add ++ // the code to check if the event should be sent. ++ if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) { ++ Label L; ++ // Note: frame::interpreter_frame_result has a dependency on how the ++ // method result is saved across the call to post_method_exit. If this ++ // is changed then the interpreter_frame_result implementation will ++ // need to be updated too. + -+ __ epilogue(); ++ // template interpreter will leave the result on the top of the stack. ++ push(state); ++ lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset())); ++ beqz(x13, L); ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); ++ bind(L); ++ pop(state); ++ } ++ ++ { ++ SkipIfEqual skip(this, &DTraceMethodProbes, false); ++ push(state); ++ get_method(c_rarg1); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), ++ xthread, c_rarg1); ++ pop(state); ++ } +} + -+void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, -+ DecoratorSet decorators) { -+ __ prologue("shenandoah_load_reference_barrier", false); -+ // arg0 : object to be resolved + -+ __ push_call_clobbered_registers(); -+ __ load_parameter(0, x10); -+ __ load_parameter(1, x11); ++// Jump if ((*counter_addr += increment) & mask) satisfies the condition. ++void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, ++ int increment, Address mask, ++ Register tmp1, Register tmp2, ++ bool preloaded, Label* where) { ++ Label done; ++ if (!preloaded) { ++ lwu(tmp1, counter_addr); ++ } ++ add(tmp1, tmp1, increment); ++ sw(tmp1, counter_addr); ++ lwu(tmp2, mask); ++ andr(tmp1, tmp1, tmp2); ++ bnez(tmp1, done); ++ j(*where); // offset is too large so we have to use j instead of beqz here ++ bind(done); ++} + -+ bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); -+ bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); -+ bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); -+ bool is_native = ShenandoahBarrierSet::is_native_access(decorators); -+ if (is_strong) { -+ if (is_native) { -+ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong); -+ } else { -+ if (UseCompressedOops) { -+ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow); -+ } else { -+ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong); -+ } -+ } -+ } else if (is_weak) { -+ assert(!is_native, "weak must not be called off-heap"); -+ if (UseCompressedOops) { -+ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow); -+ } else { -+ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak); -+ } -+ } else { -+ assert(is_phantom, "only remaining strength"); -+ assert(is_native, "phantom must only be called off-heap"); -+ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_phantom); ++void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, ++ int number_of_arguments) { ++ // interpreter specific ++ // ++ // Note: No need to save/restore rbcp & rlocals pointer since these ++ // are callee saved registers and no blocking/ GC can happen ++ // in leaf calls. ++#ifdef ASSERT ++ { ++ Label L; ++ ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ beqz(t0, L); ++ stop("InterpreterMacroAssembler::call_VM_leaf_base:" ++ " last_sp != NULL"); ++ bind(L); + } -+ __ jalr(ra); -+ __ mv(t0, x10); -+ __ pop_call_clobbered_registers(); -+ __ mv(x10, t0); ++#endif /* ASSERT */ ++ // super call ++ MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); ++} + -+ __ epilogue(); ++void InterpreterMacroAssembler::call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ // interpreter specific ++ // ++ // Note: Could avoid restoring locals ptr (callee saved) - however doesn't ++ // really make a difference for these runtime calls, since they are ++ // slow anyway. Btw., bcp must be saved/restored since it may change ++ // due to GC. ++ save_bcp(); ++#ifdef ASSERT ++ { ++ Label L; ++ ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ beqz(t0, L); ++ stop("InterpreterMacroAssembler::call_VM_base:" ++ " last_sp != NULL"); ++ bind(L); ++ } ++#endif /* ASSERT */ ++ // super call ++ MacroAssembler::call_VM_base(oop_result, noreg, last_java_sp, ++ entry_point, number_of_arguments, ++ check_exceptions); ++// interpreter specific ++ restore_bcp(); ++ restore_locals(); +} + -+#undef __ ++void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr, Register tmp) { ++ assert_different_registers(obj, tmp, t0, mdo_addr.base()); ++ Label update, next, none; + -+#endif // COMPILER1 -diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp -new file mode 100644 -index 00000000000..a705f497667 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp -@@ -0,0 +1,88 @@ -+/* -+ * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ verify_oop(obj); + -+#ifndef CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP -+#define CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP ++ bnez(obj, update); ++ orptr(mdo_addr, TypeEntries::null_seen, t0, tmp); ++ j(next); + -+#include "asm/macroAssembler.hpp" -+#include "gc/shared/barrierSetAssembler.hpp" -+#include "gc/shenandoah/shenandoahBarrierSet.hpp" -+#ifdef COMPILER1 -+class LIR_Assembler; -+class ShenandoahPreBarrierStub; -+class ShenandoahLoadReferenceBarrierStub; -+class StubAssembler; -+#endif -+class StubCodeGenerator; ++ bind(update); ++ load_klass(obj, obj); + -+class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { -+private: ++ ld(t0, mdo_addr); ++ xorr(obj, obj, t0); ++ andi(t0, obj, TypeEntries::type_klass_mask); ++ beqz(t0, next); // klass seen before, nothing to ++ // do. The unknown bit may have been ++ // set already but no need to check. + -+ void satb_write_barrier_pre(MacroAssembler* masm, -+ Register obj, -+ Register pre_val, -+ Register thread, -+ Register tmp, -+ bool tosca_live, -+ bool expand_call); -+ void shenandoah_write_barrier_pre(MacroAssembler* masm, -+ Register obj, -+ Register pre_val, -+ Register thread, -+ Register tmp, -+ bool tosca_live, -+ bool expand_call); ++ andi(t0, obj, TypeEntries::type_unknown); ++ bnez(t0, next); ++ // already unknown. Nothing to do anymore. + -+ void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg); -+ void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg); -+ void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, DecoratorSet decorators); ++ ld(t0, mdo_addr); ++ beqz(t0, none); ++ li(tmp, (u1)TypeEntries::null_seen); ++ beq(t0, tmp, none); ++ // There is a chance that the checks above (re-reading profiling ++ // data from memory) fail if another thread has just set the ++ // profiling to this obj's klass ++ ld(t0, mdo_addr); ++ xorr(obj, obj, t0); ++ andi(t0, obj, TypeEntries::type_klass_mask); ++ beqz(t0, next); + -+public: ++ // different than before. Cannot keep accurate profile. ++ orptr(mdo_addr, TypeEntries::type_unknown, t0, tmp); ++ j(next); + -+ void iu_barrier(MacroAssembler* masm, Register dst, Register tmp); ++ bind(none); ++ // first time here. Set profile type. ++ sd(obj, mdo_addr); + -+#ifdef COMPILER1 -+ void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub); -+ void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub); -+ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); -+ void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators); -+#endif ++ bind(next); ++} + -+ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, -+ Register src, Register dst, Register count, RegSet saved_regs); ++void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) { ++ if (!ProfileInterpreter) { ++ return; ++ } + -+ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Register dst, Address src, Register tmp1, Register tmp_thread); -+ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2); ++ if (MethodData::profile_arguments() || MethodData::profile_return()) { ++ Label profile_continue; + -+ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, -+ Register obj, Register tmp, Label& slowpath); ++ test_method_data_pointer(mdp, profile_continue); + -+ void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val, -+ Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result); -+}; ++ int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size()); + -+#endif // CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad -new file mode 100644 -index 00000000000..6c855f23c2a ---- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad -@@ -0,0 +1,285 @@ -+// -+// Copyright (c) 2018, Red Hat, Inc. All rights reserved. -+// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+// -+// This code is free software; you can redistribute it and/or modify it -+// under the terms of the GNU General Public License version 2 only, as -+// published by the Free Software Foundation. -+// -+// This code is distributed in the hope that it will be useful, but WITHOUT -+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+// version 2 for more details (a copy is included in the LICENSE file that -+// accompanied this code). -+// -+// You should have received a copy of the GNU General Public License version -+// 2 along with this work; if not, write to the Free Software Foundation, -+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+// -+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+// or visit www.oracle.com if you need additional information or have any -+// questions. -+// -+// -+ -+source_hpp %{ -+#include "gc/shenandoah/shenandoahBarrierSet.hpp" -+#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" -+%} -+ -+instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ -+ match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval))); -+ ins_cost(10 * DEFAULT_COST); -+ -+ effect(TEMP tmp, KILL cr); -+ -+ format %{ -+ "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapP_shenandoah" -+ %} -+ -+ ins_encode %{ -+ Register tmp = $tmp$$Register; -+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. -+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, -+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, -+ false /* is_cae */, $res$$Register); -+ %} -+ -+ ins_pipe(pipe_slow); -+%} -+ -+instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ -+ match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval))); -+ ins_cost(10 * DEFAULT_COST); -+ -+ effect(TEMP tmp, KILL cr); -+ -+ format %{ -+ "cmpxchgw_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapN_shenandoah" -+ %} -+ -+ ins_encode %{ -+ Register tmp = $tmp$$Register; -+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. -+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, -+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, -+ false /* is_cae */, $res$$Register); -+ %} -+ -+ ins_pipe(pipe_slow); -+%} -+ -+instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ -+ predicate(needs_acquiring_load_reserved(n)); -+ match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval))); -+ ins_cost(10 * DEFAULT_COST); -+ -+ effect(TEMP tmp, KILL cr); -+ -+ format %{ -+ "cmpxchg_acq_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapPAcq_shenandoah" -+ %} -+ -+ ins_encode %{ -+ Register tmp = $tmp$$Register; -+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. -+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, -+ Assembler::aq /* acquire */, Assembler::rl /* release */, -+ false /* is_cae */, $res$$Register); -+ %} -+ -+ ins_pipe(pipe_slow); -+%} -+ -+instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ -+ predicate(needs_acquiring_load_reserved(n)); -+ match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval))); -+ ins_cost(10 * DEFAULT_COST); ++ lbu(t0, Address(mdp, in_bytes(DataLayout::tag_offset()) - off_to_start)); ++ if (is_virtual) { ++ li(tmp, (u1)DataLayout::virtual_call_type_data_tag); ++ bne(t0, tmp, profile_continue); ++ } else { ++ li(tmp, (u1)DataLayout::call_type_data_tag); ++ bne(t0, tmp, profile_continue); ++ } + -+ effect(TEMP tmp, KILL cr); ++ // calculate slot step ++ static int stack_slot_offset0 = in_bytes(TypeEntriesAtCall::stack_slot_offset(0)); ++ static int slot_step = in_bytes(TypeEntriesAtCall::stack_slot_offset(1)) - stack_slot_offset0; + -+ format %{ -+ "cmpxchgw_acq_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapNAcq_shenandoah" -+ %} ++ // calculate type step ++ static int argument_type_offset0 = in_bytes(TypeEntriesAtCall::argument_type_offset(0)); ++ static int type_step = in_bytes(TypeEntriesAtCall::argument_type_offset(1)) - argument_type_offset0; + -+ ins_encode %{ -+ Register tmp = $tmp$$Register; -+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. -+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, -+ Assembler::aq /* acquire */, Assembler::rl /* release */, -+ false /* is_cae */, $res$$Register); -+ %} ++ if (MethodData::profile_arguments()) { ++ Label done, loop, loopEnd, profileArgument, profileReturnType; ++ RegSet pushed_registers; ++ pushed_registers += x15; ++ pushed_registers += x16; ++ pushed_registers += x17; ++ Register mdo_addr = x15; ++ Register index = x16; ++ Register off_to_args = x17; ++ push_reg(pushed_registers, sp); + -+ ins_pipe(pipe_slow); -+%} ++ mv(off_to_args, in_bytes(TypeEntriesAtCall::args_data_offset())); ++ mv(t0, TypeProfileArgsLimit); ++ beqz(t0, loopEnd); + -+instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ -+ match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval))); -+ ins_cost(10 * DEFAULT_COST); -+ effect(TEMP_DEF res, TEMP tmp, KILL cr); ++ mv(index, zr); // index < TypeProfileArgsLimit ++ bind(loop); ++ bgtz(index, profileReturnType); ++ li(t0, (int)MethodData::profile_return()); ++ beqz(t0, profileArgument); // (index > 0 || MethodData::profile_return()) == false ++ bind(profileReturnType); ++ // If return value type is profiled we may have no argument to profile ++ ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset()))); ++ mv(t1, - TypeStackSlotEntries::per_arg_count()); ++ mul(t1, index, t1); ++ add(tmp, tmp, t1); ++ li(t1, TypeStackSlotEntries::per_arg_count()); ++ add(t0, mdp, off_to_args); ++ blt(tmp, t1, done); + -+ format %{ -+ "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN_shenandoah" -+ %} ++ bind(profileArgument); + -+ ins_encode %{ -+ Register tmp = $tmp$$Register; -+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. -+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, -+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, -+ true /* is_cae */, $res$$Register); -+ %} ++ ld(tmp, Address(callee, Method::const_offset())); ++ load_unsigned_short(tmp, Address(tmp, ConstMethod::size_of_parameters_offset())); ++ // stack offset o (zero based) from the start of the argument ++ // list, for n arguments translates into offset n - o - 1 from ++ // the end of the argument list ++ li(t0, stack_slot_offset0); ++ li(t1, slot_step); ++ mul(t1, index, t1); ++ add(t0, t0, t1); ++ add(t0, mdp, t0); ++ ld(t0, Address(t0)); ++ sub(tmp, tmp, t0); ++ addi(tmp, tmp, -1); ++ Address arg_addr = argument_address(tmp); ++ ld(tmp, arg_addr); + -+ ins_pipe(pipe_slow); -+%} ++ li(t0, argument_type_offset0); ++ li(t1, type_step); ++ mul(t1, index, t1); ++ add(t0, t0, t1); ++ add(mdo_addr, mdp, t0); ++ Address mdo_arg_addr(mdo_addr, 0); ++ profile_obj_type(tmp, mdo_arg_addr, t1); + -+instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ -+ match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval))); -+ ins_cost(10 * DEFAULT_COST); ++ int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); ++ addi(off_to_args, off_to_args, to_add); + -+ effect(TEMP_DEF res, TEMP tmp, KILL cr); -+ format %{ -+ "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndExchangeP_shenandoah" -+ %} ++ // increment index by 1 ++ addi(index, index, 1); ++ li(t1, TypeProfileArgsLimit); ++ blt(index, t1, loop); ++ bind(loopEnd); + -+ ins_encode %{ -+ Register tmp = $tmp$$Register; -+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. -+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, -+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, -+ true /* is_cae */, $res$$Register); -+ %} ++ if (MethodData::profile_return()) { ++ ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset()))); ++ addi(tmp, tmp, -TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count()); ++ } + -+ ins_pipe(pipe_slow); -+%} ++ add(t0, mdp, off_to_args); ++ bind(done); ++ mv(mdp, t0); + -+instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ -+ match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval))); -+ ins_cost(10 * DEFAULT_COST); ++ // unspill the clobbered registers ++ pop_reg(pushed_registers, sp); + -+ effect(TEMP tmp, KILL cr); -+ format %{ -+ "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapN_shenandoah" -+ "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)" -+ %} ++ if (MethodData::profile_return()) { ++ // We're right after the type profile for the last ++ // argument. tmp is the number of cells left in the ++ // CallTypeData/VirtualCallTypeData to reach its end. Non null ++ // if there's a return to profile. ++ assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); ++ shadd(mdp, tmp, mdp, tmp, exact_log2(DataLayout::cell_size)); ++ } ++ sd(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); ++ } else { ++ assert(MethodData::profile_return(), "either profile call args or call ret"); ++ update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size())); ++ } + -+ ins_encode %{ -+ Register tmp = $tmp$$Register; -+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. -+ // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop -+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, -+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, -+ false /* is_cae */, $res$$Register); -+ %} ++ // mdp points right after the end of the ++ // CallTypeData/VirtualCallTypeData, right after the cells for the ++ // return value type if there's one + -+ ins_pipe(pipe_slow); -+%} ++ bind(profile_continue); ++ } ++} + -+instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ -+ predicate(needs_acquiring_load_reserved(n)); -+ match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval))); -+ ins_cost(10 * DEFAULT_COST); ++void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) { ++ assert_different_registers(mdp, ret, tmp, xbcp, t0, t1); ++ if (ProfileInterpreter && MethodData::profile_return()) { ++ Label profile_continue, done; + -+ effect(TEMP_DEF res, TEMP tmp, KILL cr); -+ format %{ -+ "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq_shenandoah" -+ %} ++ test_method_data_pointer(mdp, profile_continue); + -+ ins_encode %{ -+ Register tmp = $tmp$$Register; -+ __ mv(tmp, $oldval$$Register); -+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, -+ Assembler::aq /* acquire */, Assembler::rl /* release */, -+ true /* is_cae */, $res$$Register); -+ %} ++ if (MethodData::profile_return_jsr292_only()) { ++ assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); + -+ ins_pipe(pipe_slow); -+%} ++ // If we don't profile all invoke bytecodes we must make sure ++ // it's a bytecode we indeed profile. We can't go back to the ++ // begining of the ProfileData we intend to update to check its ++ // type because we're right after it and we don't known its ++ // length ++ Label do_profile; ++ lbu(t0, Address(xbcp, 0)); ++ li(tmp, (u1)Bytecodes::_invokedynamic); ++ beq(t0, tmp, do_profile); ++ li(tmp, (u1)Bytecodes::_invokehandle); ++ beq(t0, tmp, do_profile); ++ get_method(tmp); ++ lhu(t0, Address(tmp, Method::intrinsic_id_offset_in_bytes())); ++ li(t1, vmIntrinsics::_compiledLambdaForm); ++ bne(t0, t1, profile_continue); ++ bind(do_profile); ++ } + -+instruct compareAndExchangePAcq_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ -+ predicate(needs_acquiring_load_reserved(n)); -+ match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval))); -+ ins_cost(10 * DEFAULT_COST); ++ Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); ++ mv(tmp, ret); ++ profile_obj_type(tmp, mdo_ret_addr, t1); + -+ effect(TEMP_DEF res, TEMP tmp, KILL cr); -+ format %{ -+ "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq_shenandoah" -+ %} ++ bind(profile_continue); ++ } ++} + -+ ins_encode %{ -+ Register tmp = $tmp$$Register; -+ __ mv(tmp, $oldval$$Register); -+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, -+ Assembler::aq /* acquire */, Assembler::rl /* release */, -+ true /* is_cae */, $res$$Register); -+ %} ++void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3) { ++ assert_different_registers(t0, t1, mdp, tmp1, tmp2, tmp3); ++ if (ProfileInterpreter && MethodData::profile_parameters()) { ++ Label profile_continue, done; + -+ ins_pipe(pipe_slow); -+%} ++ test_method_data_pointer(mdp, profile_continue); + -+instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ -+ match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval))); -+ ins_cost(10 * DEFAULT_COST); ++ // Load the offset of the area within the MDO used for ++ // parameters. If it's negative we're not profiling any parameters ++ lwu(tmp1, Address(mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()))); ++ srli(tmp2, tmp1, 31); ++ bnez(tmp2, profile_continue); // i.e. sign bit set + -+ effect(TEMP tmp, KILL cr); -+ format %{ -+ "cmpxchg_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapP_shenandoah" -+ %} ++ // Compute a pointer to the area for parameters from the offset ++ // and move the pointer to the slot for the last ++ // parameters. Collect profiling from last parameter down. ++ // mdo start + parameters offset + array length - 1 ++ add(mdp, mdp, tmp1); ++ ld(tmp1, Address(mdp, ArrayData::array_len_offset())); ++ add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count()); + -+ ins_encode %{ -+ Register tmp = $tmp$$Register; -+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. -+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, -+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, -+ false /* is_cae */, $res$$Register); -+ %} ++ Label loop; ++ bind(loop); + -+ ins_pipe(pipe_slow); -+%} ++ int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0)); ++ int type_base = in_bytes(ParametersTypeData::type_offset(0)); ++ int per_arg_scale = exact_log2(DataLayout::cell_size); ++ add(t0, mdp, off_base); ++ add(t1, mdp, type_base); + -+instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ -+ predicate(needs_acquiring_load_reserved(n)); -+ match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval))); -+ ins_cost(10 * DEFAULT_COST); ++ shadd(tmp2, tmp1, t0, tmp2, per_arg_scale); ++ // load offset on the stack from the slot for this parameter ++ ld(tmp2, Address(tmp2, 0)); ++ neg(tmp2, tmp2); + -+ effect(TEMP tmp, KILL cr); -+ format %{ -+ "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapNAcq_shenandoah" -+ "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)" -+ %} ++ // read the parameter from the local area ++ shadd(tmp2, tmp2, xlocals, tmp2, Interpreter::logStackElementSize); ++ ld(tmp2, Address(tmp2, 0)); + -+ ins_encode %{ -+ Register tmp = $tmp$$Register; -+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. -+ // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop -+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, -+ Assembler::aq /* acquire */, Assembler::rl /* release */, -+ false /* is_cae */, $res$$Register); -+ %} ++ // profile the parameter ++ shadd(t1, tmp1, t1, t0, per_arg_scale); ++ Address arg_type(t1, 0); ++ profile_obj_type(tmp2, arg_type, tmp3); + -+ ins_pipe(pipe_slow); -+%} ++ // go to next parameter ++ add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count()); ++ bgez(tmp1, loop); + -+instruct weakCompareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ -+ predicate(needs_acquiring_load_reserved(n)); -+ match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval))); -+ ins_cost(10 * DEFAULT_COST); ++ bind(profile_continue); ++ } ++} + -+ effect(TEMP tmp, KILL cr); -+ format %{ -+ "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapPAcq_shenandoah" -+ "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)" -+ %} ++void InterpreterMacroAssembler::get_method_counters(Register method, ++ Register mcs, Label& skip) { ++ Label has_counters; ++ ld(mcs, Address(method, Method::method_counters_offset())); ++ bnez(mcs, has_counters); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::build_method_counters), method); ++ ld(mcs, Address(method, Method::method_counters_offset())); ++ beqz(mcs, skip); // No MethodCounters allocated, OutOfMemory ++ bind(has_counters); ++} + -+ ins_encode %{ -+ Register tmp = $tmp$$Register; -+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. -+ // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop -+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, -+ Assembler::aq /* acquire */, Assembler::rl /* release */, -+ false /* is_cae */, $res$$Register); -+ %} ++#ifdef ASSERT ++void InterpreterMacroAssembler::verify_access_flags(Register access_flags, uint32_t flag_bits, ++ const char* msg, bool stop_by_hit) { ++ Label L; ++ andi(t0, access_flags, flag_bits); ++ if (stop_by_hit) { ++ beqz(t0, L); ++ } else { ++ bnez(t0, L); ++ } ++ stop(msg); ++ bind(L); ++} + -+ ins_pipe(pipe_slow); -+%} -diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp ++void InterpreterMacroAssembler::verify_frame_setup() { ++ Label L; ++ const Address monitor_block_top(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ ld(t0, monitor_block_top); ++ beq(esp, t0, L); ++ stop("broken stack frame setup in interpreter"); ++ bind(L); ++} ++#endif +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp new file mode 100644 -index 00000000000..3d3f4d4d774 +index 0000000000..4126e8ee70 --- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp -@@ -0,0 +1,441 @@ ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp +@@ -0,0 +1,283 @@ +/* -+ * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -18106,430 +17798,573 @@ index 00000000000..3d3f4d4d774 + * + */ + -+#include "precompiled.hpp" -+#include "asm/macroAssembler.inline.hpp" -+#include "code/codeBlob.hpp" -+#include "code/vmreg.inline.hpp" -+#include "gc/z/zBarrier.inline.hpp" -+#include "gc/z/zBarrierSet.hpp" -+#include "gc/z/zBarrierSetAssembler.hpp" -+#include "gc/z/zBarrierSetRuntime.hpp" -+#include "gc/z/zThreadLocalData.hpp" -+#include "memory/resourceArea.hpp" -+#include "runtime/sharedRuntime.hpp" -+#include "utilities/macros.hpp" -+#ifdef COMPILER1 -+#include "c1/c1_LIRAssembler.hpp" -+#include "c1/c1_MacroAssembler.hpp" -+#include "gc/z/c1/zBarrierSetC1.hpp" -+#endif // COMPILER1 -+#ifdef COMPILER2 -+#include "gc/z/c2/zBarrierSetC2.hpp" -+#endif // COMPILER2 -+ -+#ifdef PRODUCT -+#define BLOCK_COMMENT(str) /* nothing */ -+#else -+#define BLOCK_COMMENT(str) __ block_comment(str) -+#endif ++#ifndef CPU_RISCV_INTERP_MASM_RISCV_HPP ++#define CPU_RISCV_INTERP_MASM_RISCV_HPP + -+#undef __ -+#define __ masm-> ++#include "asm/macroAssembler.hpp" ++#include "interpreter/invocationCounter.hpp" ++#include "runtime/frame.hpp" + -+void ZBarrierSetAssembler::load_at(MacroAssembler* masm, -+ DecoratorSet decorators, -+ BasicType type, -+ Register dst, -+ Address src, -+ Register tmp1, -+ Register tmp_thread) { -+ if (!ZBarrierSet::barrier_needed(decorators, type)) { -+ // Barrier not needed -+ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); -+ return; -+ } ++// This file specializes the assember with interpreter-specific macros + -+ assert_different_registers(t1, src.base()); -+ assert_different_registers(t0, t1, dst); ++typedef ByteSize (*OffsetFunction)(uint); + -+ Label done; ++class InterpreterMacroAssembler: public MacroAssembler { ++ protected: ++ // Interpreter specific version of call_VM_base ++ using MacroAssembler::call_VM_leaf_base; + -+ // Load bad mask into temp register. -+ __ la(t0, src); -+ __ ld(t1, address_bad_mask_from_thread(xthread)); -+ __ ld(dst, Address(t0)); ++ virtual void call_VM_leaf_base(address entry_point, ++ int number_of_arguments); + -+ // Test reference against bad mask. If mask bad, then we need to fix it up. -+ __ andr(t1, dst, t1); -+ __ beqz(t1, done); ++ virtual void call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions); + -+ __ enter(); ++ // base routine for all dispatches ++ void dispatch_base(TosState state, address* table, bool verifyoop = true, ++ bool generate_poll = false, Register Rs = t0); + -+ __ push_call_clobbered_registers_except(RegSet::of(dst)); ++ public: ++ InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {} ++ virtual ~InterpreterMacroAssembler() {} + -+ if (c_rarg0 != dst) { -+ __ mv(c_rarg0, dst); -+ } ++ void load_earlyret_value(TosState state); + -+ __ mv(c_rarg1, t0); ++ void jump_to_entry(address entry); + -+ __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2); ++ virtual void check_and_handle_popframe(Register java_thread); ++ virtual void check_and_handle_earlyret(Register java_thread); + -+ // Make sure dst has the return value. -+ if (dst != x10) { -+ __ mv(dst, x10); ++ // Interpreter-specific registers ++ void save_bcp() { ++ sd(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize)); + } + -+ __ pop_call_clobbered_registers_except(RegSet::of(dst)); -+ __ leave(); ++ void restore_bcp() { ++ ld(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize)); ++ } + -+ __ bind(done); -+} ++ void restore_locals() { ++ ld(xlocals, Address(fp, frame::interpreter_frame_locals_offset * wordSize)); ++ } + -+#ifdef ASSERT ++ void restore_constant_pool_cache() { ++ ld(xcpool, Address(fp, frame::interpreter_frame_cache_offset * wordSize)); ++ } + -+void ZBarrierSetAssembler::store_at(MacroAssembler* masm, -+ DecoratorSet decorators, -+ BasicType type, -+ Address dst, -+ Register val, -+ Register tmp1, -+ Register tmp2) { -+ // Verify value -+ if (is_reference_type(type)) { -+ // Note that src could be noreg, which means we -+ // are storing null and can skip verification. -+ if (val != noreg) { -+ Label done; ++ void get_dispatch(); + -+ // tmp1 and tmp2 are often set to noreg. -+ RegSet savedRegs = RegSet::of(t0); -+ __ push_reg(savedRegs, sp); ++ // Helpers for runtime call arguments/results ++ void get_method(Register reg) { ++ ld(reg, Address(fp, frame::interpreter_frame_method_offset * wordSize)); ++ } + -+ __ ld(t0, address_bad_mask_from_thread(xthread)); -+ __ andr(t0, val, t0); -+ __ beqz(t0, done); -+ __ stop("Verify oop store failed"); -+ __ should_not_reach_here(); -+ __ bind(done); -+ __ pop_reg(savedRegs, sp); -+ } ++ void get_const(Register reg) { ++ get_method(reg); ++ ld(reg, Address(reg, in_bytes(Method::const_offset()))); + } + -+ // Store value -+ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); -+} ++ void get_constant_pool(Register reg) { ++ get_const(reg); ++ ld(reg, Address(reg, in_bytes(ConstMethod::constants_offset()))); ++ } + -+#endif // ASSERT ++ void get_constant_pool_cache(Register reg) { ++ get_constant_pool(reg); ++ ld(reg, Address(reg, ConstantPool::cache_offset_in_bytes())); ++ } + -+void ZBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, -+ DecoratorSet decorators, -+ bool is_oop, -+ Register src, -+ Register dst, -+ Register count, -+ RegSet saved_regs) { -+ if (!is_oop) { -+ // Barrier not needed -+ return; ++ void get_cpool_and_tags(Register cpool, Register tags) { ++ get_constant_pool(cpool); ++ ld(tags, Address(cpool, ConstantPool::tags_offset_in_bytes())); + } + -+ BLOCK_COMMENT("ZBarrierSetAssembler::arraycopy_prologue {"); ++ void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset); ++ void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_method_counters(Register method, Register mcs, Label& skip); ++ ++ // Load cpool->resolved_references(index). ++ void load_resolved_reference_at_index(Register result, Register index, Register tmp = x15); + -+ assert_different_registers(src, count, t0); ++ // Load cpool->resolved_klass_at(index). ++ void load_resolved_klass_at_offset(Register cpool, Register index, Register klass, Register temp); + -+ __ push_reg(saved_regs, sp); ++ void pop_ptr(Register r = x10); ++ void pop_i(Register r = x10); ++ void pop_l(Register r = x10); ++ void pop_f(FloatRegister r = f10); ++ void pop_d(FloatRegister r = f10); ++ void push_ptr(Register r = x10); ++ void push_i(Register r = x10); ++ void push_l(Register r = x10); ++ void push_f(FloatRegister r = f10); ++ void push_d(FloatRegister r = f10); + -+ if (count == c_rarg0 && src == c_rarg1) { -+ // exactly backwards!! -+ __ xorr(c_rarg0, c_rarg0, c_rarg1); -+ __ xorr(c_rarg1, c_rarg0, c_rarg1); -+ __ xorr(c_rarg0, c_rarg0, c_rarg1); -+ } else { -+ __ mv(c_rarg0, src); -+ __ mv(c_rarg1, count); ++ void pop(TosState state); // transition vtos -> state ++ void push(TosState state); // transition state -> vtos ++ ++ void empty_expression_stack() { ++ ld(esp, Address(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize)); ++ // NULL last_sp until next java call ++ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); + } + -+ __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_array_addr(), 2); ++ // Helpers for swap and dup ++ void load_ptr(int n, Register val); ++ void store_ptr(int n, Register val); + -+ __ pop_reg(saved_regs, sp); ++ // Load float value from 'address'. The value is loaded onto the FPU register v0. ++ void load_float(Address src); ++ void load_double(Address src); + -+ BLOCK_COMMENT("} ZBarrierSetAssembler::arraycopy_prologue"); -+} ++ // Generate a subtype check: branch to ok_is_subtype if sub_klass is ++ // a subtype of super_klass. ++ void gen_subtype_check( Register sub_klass, Label &ok_is_subtype ); + -+void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, -+ Register jni_env, -+ Register robj, -+ Register tmp, -+ Label& slowpath) { -+ BLOCK_COMMENT("ZBarrierSetAssembler::try_resolve_jobject_in_native {"); ++ // Dispatching ++ void dispatch_prolog(TosState state, int step = 0); ++ void dispatch_epilog(TosState state, int step = 0); ++ // dispatch via t0 ++ void dispatch_only(TosState state, bool generate_poll = false, Register Rs = t0); ++ // dispatch normal table via t0 (assume t0 is loaded already) ++ void dispatch_only_normal(TosState state, Register Rs = t0); ++ void dispatch_only_noverify(TosState state, Register Rs = t0); ++ // load t0 from [xbcp + step] and dispatch via t0 ++ void dispatch_next(TosState state, int step = 0, bool generate_poll = false); ++ // load t0 from [xbcp] and dispatch via t0 and table ++ void dispatch_via (TosState state, address* table); + -+ assert_different_registers(jni_env, robj, tmp); ++ // jump to an invoked target ++ void prepare_to_jump_from_interpreted(); ++ void jump_from_interpreted(Register method); + -+ // Resolve jobject -+ BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, robj, tmp, slowpath); + -+ // Compute the offset of address bad mask from the field of jni_environment -+ long int bad_mask_relative_offset = (long int) (in_bytes(ZThreadLocalData::address_bad_mask_offset()) - -+ in_bytes(JavaThread::jni_environment_offset())); ++ // Returning from interpreted functions ++ // ++ // Removes the current activation (incl. unlocking of monitors) ++ // and sets up the return address. This code is also used for ++ // exception unwindwing. In that case, we do not want to throw ++ // IllegalMonitorStateExceptions, since that might get us into an ++ // infinite rethrow exception loop. ++ // Additionally this code is used for popFrame and earlyReturn. ++ // In popFrame case we want to skip throwing an exception, ++ // installing an exception, and notifying jvmdi. ++ // In earlyReturn case we only want to skip throwing an exception ++ // and installing an exception. ++ void remove_activation(TosState state, ++ bool throw_monitor_exception = true, ++ bool install_monitor_exception = true, ++ bool notify_jvmdi = true); + -+ // Load the address bad mask -+ __ ld(tmp, Address(jni_env, bad_mask_relative_offset)); ++ // FIXME: Give us a valid frame at a null check. ++ virtual void null_check(Register reg, int offset = -1) { ++ MacroAssembler::null_check(reg, offset); ++ } + -+ // Check address bad mask -+ __ andr(tmp, robj, tmp); -+ __ bnez(tmp, slowpath); ++ // Object locking ++ void lock_object (Register lock_reg); ++ void unlock_object(Register lock_reg); + -+ BLOCK_COMMENT("} ZBarrierSetAssembler::try_resolve_jobject_in_native"); -+} ++ // Interpreter profiling operations ++ void set_method_data_pointer_for_bcp(); ++ void test_method_data_pointer(Register mdp, Label& zero_continue); ++ void verify_method_data_pointer(); + -+#ifdef COMPILER2 ++ void set_mdp_data_at(Register mdp_in, int constant, Register value); ++ void increment_mdp_data_at(Address data, bool decrement = false); ++ void increment_mdp_data_at(Register mdp_in, int constant, ++ bool decrement = false); ++ void increment_mdp_data_at(Register mdp_in, Register reg, int constant, ++ bool decrement = false); ++ void increment_mask_and_jump(Address counter_addr, ++ int increment, Address mask, ++ Register tmp1, Register tmp2, ++ bool preloaded, Label* where); + -+OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) { -+ if (!OptoReg::is_reg(opto_reg)) { -+ return OptoReg::Bad; -+ } ++ void set_mdp_flag_at(Register mdp_in, int flag_constant); ++ void test_mdp_data_at(Register mdp_in, int offset, Register value, ++ Register test_value_out, ++ Label& not_equal_continue); + -+ const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); -+ if (vm_reg->is_FloatRegister()) { -+ return opto_reg & ~1; -+ } ++ void record_klass_in_profile(Register receiver, Register mdp, ++ Register reg2, bool is_virtual_call); ++ void record_klass_in_profile_helper(Register receiver, Register mdp, ++ Register reg2, ++ Label& done, bool is_virtual_call); ++ void record_item_in_profile_helper(Register item, Register mdp, ++ Register reg2, int start_row, Label& done, int total_rows, ++ OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, ++ int non_profiled_offset); + -+ return opto_reg; -+} ++ void update_mdp_by_offset(Register mdp_in, int offset_of_offset); ++ void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp); ++ void update_mdp_by_constant(Register mdp_in, int constant); ++ void update_mdp_for_ret(Register return_bci); + -+#undef __ -+#define __ _masm-> ++ // narrow int return value ++ void narrow(Register result); + -+class ZSaveLiveRegisters { -+private: -+ MacroAssembler* const _masm; -+ RegSet _gp_regs; -+ FloatRegSet _fp_regs; -+ VectorRegSet _vp_regs; ++ void profile_taken_branch(Register mdp, Register bumped_count); ++ void profile_not_taken_branch(Register mdp); ++ void profile_call(Register mdp); ++ void profile_final_call(Register mdp); ++ void profile_virtual_call(Register receiver, Register mdp, ++ Register t1, ++ bool receiver_can_be_null = false); ++ void profile_ret(Register return_bci, Register mdp); ++ void profile_null_seen(Register mdp); ++ void profile_typecheck(Register mdp, Register klass, Register temp); ++ void profile_typecheck_failed(Register mdp); ++ void profile_switch_default(Register mdp); ++ void profile_switch_case(Register index_in_scratch, Register mdp, ++ Register temp); + -+public: -+ void initialize(ZLoadBarrierStubC2* stub) { -+ // Record registers that needs to be saved/restored -+ RegMaskIterator rmi(stub->live()); -+ while (rmi.has_next()) { -+ const OptoReg::Name opto_reg = rmi.next(); -+ if (OptoReg::is_reg(opto_reg)) { -+ const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); -+ if (vm_reg->is_Register()) { -+ _gp_regs += RegSet::of(vm_reg->as_Register()); -+ } else if (vm_reg->is_FloatRegister()) { -+ _fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister()); -+ } else if (vm_reg->is_VectorRegister()) { -+ const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~(VectorRegisterImpl::max_slots_per_register - 1)); -+ _vp_regs += VectorRegSet::of(vm_reg_base->as_VectorRegister()); -+ } else { -+ fatal("Unknown register type"); -+ } -+ } -+ } ++ void profile_obj_type(Register obj, const Address& mdo_addr, Register tmp); ++ void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual); ++ void profile_return_type(Register mdp, Register ret, Register tmp); ++ void profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3); + -+ // Remove C-ABI SOE registers, tmp regs and _ref register that will be updated -+ _gp_regs -= RegSet::range(x18, x27) + RegSet::of(x2) + RegSet::of(x8, x9) + RegSet::of(x5, stub->ref()); -+ } ++ // Debugging ++ // only if +VerifyFPU && (state == ftos || state == dtos) ++ void verify_FPU(int stack_depth, TosState state = ftos); + -+ ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) : -+ _masm(masm), -+ _gp_regs(), -+ _fp_regs(), -+ _vp_regs() { -+ // Figure out what registers to save/restore -+ initialize(stub); ++ typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; + -+ // Save registers -+ __ push_reg(_gp_regs, sp); -+ __ push_fp(_fp_regs, sp); -+ __ push_vp(_vp_regs, sp); -+ } ++ // support for jvmti/dtrace ++ void notify_method_entry(); ++ void notify_method_exit(TosState state, NotifyMethodExitMode mode); + -+ ~ZSaveLiveRegisters() { -+ // Restore registers -+ __ pop_vp(_vp_regs, sp); -+ __ pop_fp(_fp_regs, sp); -+ __ pop_reg(_gp_regs, sp); ++ virtual void _call_Unimplemented(address call_site) { ++ save_bcp(); ++ set_last_Java_frame(esp, fp, (address) pc(), t0); ++ MacroAssembler::_call_Unimplemented(call_site); + } ++ ++#ifdef ASSERT ++ void verify_access_flags(Register access_flags, uint32_t flag_bits, ++ const char* msg, bool stop_by_hit = true); ++ void verify_frame_setup(); ++#endif +}; + -+class ZSetupArguments { -+private: -+ MacroAssembler* const _masm; -+ const Register _ref; -+ const Address _ref_addr; ++#endif // CPU_RISCV_INTERP_MASM_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp +new file mode 100644 +index 0000000000..776b078723 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp +@@ -0,0 +1,295 @@ ++/* ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+public: -+ ZSetupArguments(MacroAssembler* masm, ZLoadBarrierStubC2* stub) : -+ _masm(masm), -+ _ref(stub->ref()), -+ _ref_addr(stub->ref_addr()) { -+ -+ // Setup arguments -+ if (_ref_addr.base() == noreg) { -+ // No self healing -+ if (_ref != c_rarg0) { -+ __ mv(c_rarg0, _ref); -+ } -+ __ mv(c_rarg1, zr); -+ } else { -+ // Self healing -+ if (_ref == c_rarg0) { -+ // _ref is already at correct place -+ __ la(c_rarg1, _ref_addr); -+ } else if (_ref != c_rarg1) { -+ // _ref is in wrong place, but not in c_rarg1, so fix it first -+ __ la(c_rarg1, _ref_addr); -+ __ mv(c_rarg0, _ref); -+ } else if (_ref_addr.base() != c_rarg0) { -+ assert(_ref == c_rarg1, "Mov ref first, vacating c_rarg0"); -+ __ mv(c_rarg0, _ref); -+ __ la(c_rarg1, _ref_addr); -+ } else { -+ assert(_ref == c_rarg1, "Need to vacate c_rarg1 and _ref_addr is using c_rarg0"); -+ if (_ref_addr.base() == c_rarg0) { -+ __ mv(t1, c_rarg1); -+ __ la(c_rarg1, _ref_addr); -+ __ mv(c_rarg0, t1); -+ } else { -+ ShouldNotReachHere(); -+ } -+ } -+ } ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "memory/allocation.inline.hpp" ++#include "memory/universe.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/icache.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/signature.hpp" ++ ++#define __ _masm-> ++ ++// Implementation of SignatureHandlerGenerator ++Register InterpreterRuntime::SignatureHandlerGenerator::from() { return xlocals; } ++Register InterpreterRuntime::SignatureHandlerGenerator::to() { return sp; } ++Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return t0; } ++ ++Register InterpreterRuntime::SignatureHandlerGenerator::next_gpr() { ++ if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) { ++ return g_INTArgReg[++_num_reg_int_args]; + } ++ return noreg; ++} + -+ ~ZSetupArguments() { -+ // Transfer result -+ if (_ref != x10) { -+ __ mv(_ref, x10); -+ } ++FloatRegister InterpreterRuntime::SignatureHandlerGenerator::next_fpr() { ++ if (_num_reg_fp_args < Argument::n_float_register_parameters_c) { ++ return g_FPArgReg[_num_reg_fp_args++]; ++ } else { ++ return fnoreg; + } -+}; ++} + -+#undef __ -+#define __ masm-> ++int InterpreterRuntime::SignatureHandlerGenerator::next_stack_offset() { ++ int ret = _stack_offset; ++ _stack_offset += wordSize; ++ return ret; ++} + -+void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const { -+ BLOCK_COMMENT("ZLoadBarrierStubC2"); ++InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator( ++ const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) { ++ _masm = new MacroAssembler(buffer); // allocate on resourse area by default ++ _num_reg_int_args = (method->is_static() ? 1 : 0); ++ _num_reg_fp_args = 0; ++ _stack_offset = 0; ++} + -+ // Stub entry -+ __ bind(*stub->entry()); ++void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { ++ const Address src(from(), Interpreter::local_offset_in_bytes(offset())); + -+ { -+ ZSaveLiveRegisters save_live_registers(masm, stub); -+ ZSetupArguments setup_arguments(masm, stub); -+ int32_t offset = 0; -+ __ la_patchable(t0, stub->slow_path(), offset); -+ __ jalr(x1, t0, offset); ++ Register reg = next_gpr(); ++ if (reg != noreg) { ++ __ lw(reg, src); ++ } else { ++ __ lw(x10, src); ++ __ sw(x10, Address(to(), next_stack_offset())); + } -+ -+ // Stub exit -+ __ j(*stub->continuation()); +} + -+#undef __ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { ++ const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1)); + -+#endif // COMPILER2 ++ Register reg = next_gpr(); ++ if (reg != noreg) { ++ __ ld(reg, src); ++ } else { ++ __ ld(x10, src); ++ __ sd(x10, Address(to(), next_stack_offset())); ++ } ++} + -+#ifdef COMPILER1 -+#undef __ -+#define __ ce->masm()-> ++void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { ++ const Address src(from(), Interpreter::local_offset_in_bytes(offset())); + -+void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce, -+ LIR_Opr ref) const { -+ assert_different_registers(xthread, ref->as_register(), t1); -+ __ ld(t1, address_bad_mask_from_thread(xthread)); -+ __ andr(t1, t1, ref->as_register()); ++ FloatRegister reg = next_fpr(); ++ if (reg != fnoreg) { ++ __ flw(reg, src); ++ } else { ++ // a floating-point argument is passed according to the integer calling ++ // convention if no floating-point argument register available ++ pass_int(); ++ } +} + -+void ZBarrierSetAssembler::generate_c1_load_barrier_stub(LIR_Assembler* ce, -+ ZLoadBarrierStubC1* stub) const { -+ // Stub entry -+ __ bind(*stub->entry()); ++void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { ++ const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1)); + -+ Register ref = stub->ref()->as_register(); -+ Register ref_addr = noreg; -+ Register tmp = noreg; ++ FloatRegister reg = next_fpr(); ++ if (reg != fnoreg) { ++ __ fld(reg, src); ++ } else { ++ // a floating-point argument is passed according to the integer calling ++ // convention if no floating-point argument register available ++ pass_long(); ++ } ++} + -+ if (stub->tmp()->is_valid()) { -+ // Load address into tmp register -+ ce->leal(stub->ref_addr(), stub->tmp()); -+ ref_addr = tmp = stub->tmp()->as_pointer_register(); ++void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { ++ Register reg = next_gpr(); ++ if (reg == c_rarg1) { ++ assert(offset() == 0, "argument register 1 can only be (non-null) receiver"); ++ __ addi(c_rarg1, from(), Interpreter::local_offset_in_bytes(offset())); ++ } else if (reg != noreg) { ++ // c_rarg2-c_rarg7 ++ __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ mv(reg, zr); //_num_reg_int_args:c_rarg -> 1:c_rarg2, 2:c_rarg3... ++ __ ld(temp(), x10); ++ Label L; ++ __ beqz(temp(), L); ++ __ mv(reg, x10); ++ __ bind(L); + } else { -+ // Address already in register -+ ref_addr = stub->ref_addr()->as_address_ptr()->base()->as_pointer_register(); ++ //to stack ++ __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ ld(temp(), x10); ++ Label L; ++ __ bnez(temp(), L); ++ __ mv(x10, zr); ++ __ bind(L); ++ assert(sizeof(jobject) == wordSize, ""); ++ __ sd(x10, Address(to(), next_stack_offset())); + } ++} + -+ assert_different_registers(ref, ref_addr, noreg); ++void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { ++ // generate code to handle arguments ++ iterate(fingerprint); + -+ // Save x10 unless it is the result or tmp register -+ // Set up SP to accomodate parameters and maybe x10. -+ if (ref != x10 && tmp != x10) { -+ __ sub(sp, sp, 32); -+ __ sd(x10, Address(sp, 16)); -+ } else { -+ __ sub(sp, sp, 16); ++ // return result handler ++ __ la(x10, ExternalAddress(Interpreter::result_handler(method()->result_type()))); ++ __ ret(); ++ ++ __ flush(); ++} ++ ++ ++// Implementation of SignatureHandlerLibrary ++ ++void SignatureHandlerLibrary::pd_set_handler(address handler) {} ++ ++ ++class SlowSignatureHandler ++ : public NativeSignatureIterator { ++ private: ++ address _from; ++ intptr_t* _to; ++ intptr_t* _int_args; ++ intptr_t* _fp_args; ++ intptr_t* _fp_identifiers; ++ unsigned int _num_reg_int_args; ++ unsigned int _num_reg_fp_args; ++ ++ intptr_t* single_slot_addr() { ++ intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ return from_addr; + } + -+ // Setup arguments and call runtime stub -+ ce->store_parameter(ref_addr, 1); -+ ce->store_parameter(ref, 0); ++ intptr_t* double_slot_addr() { ++ intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(1)); ++ _from -= 2 * Interpreter::stackElementSize; ++ return from_addr; ++ } + -+ __ far_call(stub->runtime_stub()); ++ int pass_gpr(intptr_t value) { ++ if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) { ++ *_int_args++ = value; ++ return _num_reg_int_args++; ++ } ++ return -1; ++ } + -+ // Verify result -+ __ verify_oop(x10, "Bad oop"); ++ int pass_fpr(intptr_t value) { ++ if (_num_reg_fp_args < Argument::n_float_register_parameters_c) { ++ *_fp_args++ = value; ++ return _num_reg_fp_args++; ++ } ++ return -1; ++ } + ++ void pass_stack(intptr_t value) { ++ *_to++ = value; ++ } + -+ // Move result into place -+ if (ref != x10) { -+ __ mv(ref, x10); ++ virtual void pass_int() { ++ jint value = *(jint*)single_slot_addr(); ++ if (pass_gpr(value) < 0) { ++ pass_stack(value); ++ } + } + -+ // Restore x10 unless it is the result or tmp register -+ if (ref != x10 && tmp != x10) { -+ __ ld(x10, Address(sp, 16)); -+ __ add(sp, sp, 32); -+ } else { -+ __ add(sp, sp, 16); ++ virtual void pass_long() { ++ intptr_t value = *double_slot_addr(); ++ if (pass_gpr(value) < 0) { ++ pass_stack(value); ++ } + } + -+ // Stub exit -+ __ j(*stub->continuation()); -+} ++ virtual void pass_object() { ++ intptr_t* addr = single_slot_addr(); ++ intptr_t value = *addr == 0 ? NULL : (intptr_t)addr; ++ if (pass_gpr(value) < 0) { ++ pass_stack(value); ++ } ++ } + -+#undef __ -+#define __ sasm-> ++ virtual void pass_float() { ++ jint value = *(jint*) single_slot_addr(); ++ // a floating-point argument is passed according to the integer calling ++ // convention if no floating-point argument register available ++ if (pass_fpr(value) < 0 && pass_gpr(value) < 0) { ++ pass_stack(value); ++ } ++ } ++ ++ virtual void pass_double() { ++ intptr_t value = *double_slot_addr(); ++ int arg = pass_fpr(value); ++ if (0 <= arg) { ++ *_fp_identifiers |= (1ull << arg); // mark as double ++ } else if (pass_gpr(value) < 0) { // no need to mark if passing by integer registers or stack ++ pass_stack(value); ++ } ++ } + -+void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler* sasm, -+ DecoratorSet decorators) const { -+ __ prologue("zgc_load_barrier stub", false); ++ public: ++ SlowSignatureHandler(const methodHandle& method, address from, intptr_t* to) ++ : NativeSignatureIterator(method) ++ { ++ _from = from; ++ _to = to; + -+ __ push_call_clobbered_registers_except(RegSet::of(x10)); ++ _int_args = to - (method->is_static() ? 16 : 17); ++ _fp_args = to - 8; ++ _fp_identifiers = to - 9; ++ *(int*) _fp_identifiers = 0; ++ _num_reg_int_args = (method->is_static() ? 1 : 0); ++ _num_reg_fp_args = 0; ++ } + -+ // Setup arguments -+ __ load_parameter(0, c_rarg0); -+ __ load_parameter(1, c_rarg1); ++ ~SlowSignatureHandler() ++ { ++ _from = NULL; ++ _to = NULL; ++ _int_args = NULL; ++ _fp_args = NULL; ++ _fp_identifiers = NULL; ++ } ++}; + -+ __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2); + -+ __ pop_call_clobbered_registers_except(RegSet::of(x10)); ++IRT_ENTRY(address, ++ InterpreterRuntime::slow_signature_handler(JavaThread* thread, ++ Method* method, ++ intptr_t* from, ++ intptr_t* to)) ++ methodHandle m(thread, (Method*)method); ++ assert(m->is_native(), "sanity check"); + -+ __ epilogue(); -+} ++ // handle arguments ++ SlowSignatureHandler ssh(m, (address)from, to); ++ ssh.iterate(UCONST64(-1)); + -+#undef __ -+#endif // COMPILER1 -diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp ++ // return result handler ++ return Interpreter::result_handler(m->result_type()); ++IRT_END +diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp new file mode 100644 -index 00000000000..dc07ab635fe +index 0000000000..05df63ba2a --- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp -@@ -0,0 +1,101 @@ ++++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp +@@ -0,0 +1,68 @@ +/* -+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -18553,91 +18388,57 @@ index 00000000000..dc07ab635fe + * + */ + -+#ifndef CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP -+#define CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP ++#ifndef CPU_RISCV_INTERPRETERRT_RISCV_HPP ++#define CPU_RISCV_INTERPRETERRT_RISCV_HPP + -+#include "code/vmreg.hpp" -+#include "oops/accessDecorators.hpp" -+#ifdef COMPILER2 -+#include "opto/optoreg.hpp" -+#endif // COMPILER2 -+ -+#ifdef COMPILER1 -+class LIR_Assembler; -+class LIR_Opr; -+class StubAssembler; -+class ZLoadBarrierStubC1; -+#endif // COMPILER1 -+ -+#ifdef COMPILER2 -+class Node; -+class ZLoadBarrierStubC2; -+#endif // COMPILER2 -+ -+class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase { -+public: -+ virtual void load_at(MacroAssembler* masm, -+ DecoratorSet decorators, -+ BasicType type, -+ Register dst, -+ Address src, -+ Register tmp1, -+ Register tmp_thread); -+ -+#ifdef ASSERT -+ virtual void store_at(MacroAssembler* masm, -+ DecoratorSet decorators, -+ BasicType type, -+ Address dst, -+ Register val, -+ Register tmp1, -+ Register tmp2); -+#endif // ASSERT ++// This is included in the middle of class Interpreter. ++// Do not include files here. + -+ virtual void arraycopy_prologue(MacroAssembler* masm, -+ DecoratorSet decorators, -+ bool is_oop, -+ Register src, -+ Register dst, -+ Register count, -+ RegSet saved_regs); ++// native method calls + -+ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, -+ Register jni_env, -+ Register robj, -+ Register tmp, -+ Label& slowpath); ++class SignatureHandlerGenerator: public NativeSignatureIterator { ++ private: ++ MacroAssembler* _masm; ++ unsigned int _num_reg_fp_args; ++ unsigned int _num_reg_int_args; ++ int _stack_offset; + -+#ifdef COMPILER1 -+ void generate_c1_load_barrier_test(LIR_Assembler* ce, -+ LIR_Opr ref) const; ++ void pass_int(); ++ void pass_long(); ++ void pass_float(); ++ void pass_double(); ++ void pass_object(); + -+ void generate_c1_load_barrier_stub(LIR_Assembler* ce, -+ ZLoadBarrierStubC1* stub) const; ++ Register next_gpr(); ++ FloatRegister next_fpr(); ++ int next_stack_offset(); + -+ void generate_c1_load_barrier_runtime_stub(StubAssembler* sasm, -+ DecoratorSet decorators) const; -+#endif // COMPILER1 ++ public: ++ // Creation ++ SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer); ++ virtual ~SignatureHandlerGenerator() { ++ _masm = NULL; ++ } + -+#ifdef COMPILER2 -+ OptoReg::Name refine_register(const Node* node, -+ OptoReg::Name opto_reg); ++ // Code generation ++ void generate(uint64_t fingerprint); + -+ void generate_c2_load_barrier_stub(MacroAssembler* masm, -+ ZLoadBarrierStubC2* stub) const; -+#endif // COMPILER2 ++ // Code generation support ++ static Register from(); ++ static Register to(); ++ static Register temp(); +}; + -+#endif // CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp ++#endif // CPU_RISCV_INTERPRETERRT_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp new file mode 100644 -index 00000000000..d14997790af +index 0000000000..5a0c9b812f --- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp -@@ -0,0 +1,212 @@ ++++ b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp +@@ -0,0 +1,89 @@ +/* -+ * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -18660,201 +18461,79 @@ index 00000000000..d14997790af + * + */ + -+#include "precompiled.hpp" -+#include "gc/shared/gcLogPrecious.hpp" -+#include "gc/shared/gc_globals.hpp" -+#include "gc/z/zGlobals.hpp" -+#include "runtime/globals.hpp" -+#include "runtime/os.hpp" -+#include "utilities/globalDefinitions.hpp" -+#include "utilities/powerOfTwo.hpp" ++#ifndef CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP ++#define CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP + -+#ifdef LINUX -+#include -+#endif // LINUX ++private: + -+// -+// The heap can have three different layouts, depending on the max heap size. -+// -+// Address Space & Pointer Layout 1 -+// -------------------------------- -+// -+// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB) -+// . . -+// . . -+// . . -+// +--------------------------------+ 0x0000014000000000 (20TB) -+// | Remapped View | -+// +--------------------------------+ 0x0000010000000000 (16TB) -+// . . -+// +--------------------------------+ 0x00000c0000000000 (12TB) -+// | Marked1 View | -+// +--------------------------------+ 0x0000080000000000 (8TB) -+// | Marked0 View | -+// +--------------------------------+ 0x0000040000000000 (4TB) -+// . . -+// +--------------------------------+ 0x0000000000000000 -+// -+// 6 4 4 4 4 -+// 3 6 5 2 1 0 -+// +--------------------+----+-----------------------------------------------+ -+// |00000000 00000000 00|1111|11 11111111 11111111 11111111 11111111 11111111| -+// +--------------------+----+-----------------------------------------------+ -+// | | | -+// | | * 41-0 Object Offset (42-bits, 4TB address space) -+// | | -+// | * 45-42 Metadata Bits (4-bits) 0001 = Marked0 (Address view 4-8TB) -+// | 0010 = Marked1 (Address view 8-12TB) -+// | 0100 = Remapped (Address view 16-20TB) -+// | 1000 = Finalizable (Address view N/A) -+// | -+// * 63-46 Fixed (18-bits, always zero) -+// -+// -+// Address Space & Pointer Layout 2 -+// -------------------------------- -+// -+// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB) -+// . . -+// . . -+// . . -+// +--------------------------------+ 0x0000280000000000 (40TB) -+// | Remapped View | -+// +--------------------------------+ 0x0000200000000000 (32TB) -+// . . -+// +--------------------------------+ 0x0000180000000000 (24TB) -+// | Marked1 View | -+// +--------------------------------+ 0x0000100000000000 (16TB) -+// | Marked0 View | -+// +--------------------------------+ 0x0000080000000000 (8TB) -+// . . -+// +--------------------------------+ 0x0000000000000000 -+// -+// 6 4 4 4 4 -+// 3 7 6 3 2 0 -+// +------------------+-----+------------------------------------------------+ -+// |00000000 00000000 0|1111|111 11111111 11111111 11111111 11111111 11111111| -+// +-------------------+----+------------------------------------------------+ -+// | | | -+// | | * 42-0 Object Offset (43-bits, 8TB address space) -+// | | -+// | * 46-43 Metadata Bits (4-bits) 0001 = Marked0 (Address view 8-16TB) -+// | 0010 = Marked1 (Address view 16-24TB) -+// | 0100 = Remapped (Address view 32-40TB) -+// | 1000 = Finalizable (Address view N/A) -+// | -+// * 63-47 Fixed (17-bits, always zero) -+// -+// -+// Address Space & Pointer Layout 3 -+// -------------------------------- -+// -+// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB) -+// . . -+// . . -+// . . -+// +--------------------------------+ 0x0000500000000000 (80TB) -+// | Remapped View | -+// +--------------------------------+ 0x0000400000000000 (64TB) -+// . . -+// +--------------------------------+ 0x0000300000000000 (48TB) -+// | Marked1 View | -+// +--------------------------------+ 0x0000200000000000 (32TB) -+// | Marked0 View | -+// +--------------------------------+ 0x0000100000000000 (16TB) -+// . . -+// +--------------------------------+ 0x0000000000000000 -+// -+// 6 4 4 4 4 -+// 3 8 7 4 3 0 -+// +------------------+----+-------------------------------------------------+ -+// |00000000 00000000 |1111|1111 11111111 11111111 11111111 11111111 11111111| -+// +------------------+----+-------------------------------------------------+ -+// | | | -+// | | * 43-0 Object Offset (44-bits, 16TB address space) -+// | | -+// | * 47-44 Metadata Bits (4-bits) 0001 = Marked0 (Address view 16-32TB) -+// | 0010 = Marked1 (Address view 32-48TB) -+// | 0100 = Remapped (Address view 64-80TB) -+// | 1000 = Finalizable (Address view N/A) -+// | -+// * 63-48 Fixed (16-bits, always zero) -+// ++ // FP value associated with _last_Java_sp: ++ intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to + -+// Default value if probing is not implemented for a certain platform: 128TB -+static const size_t DEFAULT_MAX_ADDRESS_BIT = 47; -+// Minimum value returned, if probing fails: 64GB -+static const size_t MINIMUM_MAX_ADDRESS_BIT = 36; -+ -+static size_t probe_valid_max_address_bit() { -+#ifdef LINUX -+ size_t max_address_bit = 0; -+ const size_t page_size = os::vm_page_size(); -+ for (size_t i = DEFAULT_MAX_ADDRESS_BIT; i > MINIMUM_MAX_ADDRESS_BIT; --i) { -+ const uintptr_t base_addr = ((uintptr_t) 1U) << i; -+ if (msync((void*)base_addr, page_size, MS_ASYNC) == 0) { -+ // msync suceeded, the address is valid, and maybe even already mapped. -+ max_address_bit = i; -+ break; -+ } -+ if (errno != ENOMEM) { -+ // Some error occured. This should never happen, but msync -+ // has some undefined behavior, hence ignore this bit. -+#ifdef ASSERT -+ fatal("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno)); -+#else // ASSERT -+ log_warning_p(gc)("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno)); -+#endif // ASSERT -+ continue; -+ } -+ // Since msync failed with ENOMEM, the page might not be mapped. -+ // Try to map it, to see if the address is valid. -+ void* const result_addr = mmap((void*) base_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0); -+ if (result_addr != MAP_FAILED) { -+ munmap(result_addr, page_size); -+ } -+ if ((uintptr_t) result_addr == base_addr) { -+ // address is valid -+ max_address_bit = i; -+ break; -+ } ++public: ++ // Each arch must define reset, save, restore ++ // These are used by objects that only care about: ++ // 1 - initializing a new state (thread creation, javaCalls) ++ // 2 - saving a current state (javaCalls) ++ // 3 - restoring an old state (javaCalls) ++ ++ void clear(void) { ++ // clearing _last_Java_sp must be first ++ _last_Java_sp = NULL; ++ OrderAccess::release(); ++ _last_Java_fp = NULL; ++ _last_Java_pc = NULL; + } -+ if (max_address_bit == 0) { -+ // probing failed, allocate a very high page and take that bit as the maximum -+ const uintptr_t high_addr = ((uintptr_t) 1U) << DEFAULT_MAX_ADDRESS_BIT; -+ void* const result_addr = mmap((void*) high_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0); -+ if (result_addr != MAP_FAILED) { -+ max_address_bit = BitsPerSize_t - count_leading_zeros((size_t) result_addr) - 1; -+ munmap(result_addr, page_size); ++ ++ void copy(JavaFrameAnchor* src) { ++ // In order to make sure the transition state is valid for "this" ++ // We must clear _last_Java_sp before copying the rest of the new data ++ // ++ // Hack Alert: Temporary bugfix for 4717480/4721647 ++ // To act like previous version (pd_cache_state) don't NULL _last_Java_sp ++ // unless the value is changing ++ // ++ assert(src != NULL, "Src should not be NULL."); ++ if (_last_Java_sp != src->_last_Java_sp) { ++ _last_Java_sp = NULL; ++ OrderAccess::release(); + } ++ _last_Java_fp = src->_last_Java_fp; ++ _last_Java_pc = src->_last_Java_pc; ++ // Must be last so profiler will always see valid frame if has_last_frame() is true ++ _last_Java_sp = src->_last_Java_sp; + } -+ log_info_p(gc, init)("Probing address space for the highest valid bit: " SIZE_FORMAT, max_address_bit); -+ return MAX2(max_address_bit, MINIMUM_MAX_ADDRESS_BIT); -+#else // LINUX -+ return DEFAULT_MAX_ADDRESS_BIT; -+#endif // LINUX -+} + -+size_t ZPlatformAddressOffsetBits() { -+ const static size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1; -+ const size_t max_address_offset_bits = valid_max_address_offset_bits - 3; -+ const size_t min_address_offset_bits = max_address_offset_bits - 2; -+ const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio); -+ const size_t address_offset_bits = log2i_exact(address_offset); -+ return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits); -+} ++ bool walkable(void) { return _last_Java_sp != NULL && _last_Java_pc != NULL; } ++ void make_walkable(JavaThread* thread); ++ void capture_last_Java_pc(void); + -+size_t ZPlatformAddressMetadataShift() { -+ return ZPlatformAddressOffsetBits(); -+} -diff --git a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp ++ intptr_t* last_Java_sp(void) const { return _last_Java_sp; } ++ ++ const address last_Java_pc(void) { return _last_Java_pc; } ++ ++private: ++ ++ static ByteSize last_Java_fp_offset() { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); } ++ ++public: ++ ++ void set_last_Java_sp(intptr_t* java_sp) { _last_Java_sp = java_sp; OrderAccess::release(); } ++ ++ intptr_t* last_Java_fp(void) { return _last_Java_fp; } ++ ++ // Assert (last_Java_sp == NULL || fp == NULL) ++ void set_last_Java_fp(intptr_t* fp) { OrderAccess::release(); _last_Java_fp = fp; } ++ ++#endif // CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp new file mode 100644 -index 00000000000..f20ecd9b073 +index 0000000000..f6e7351c4f --- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp -@@ -0,0 +1,36 @@ ++++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp +@@ -0,0 +1,194 @@ +/* -+ * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2004, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -18878,265 +18557,182 @@ index 00000000000..f20ecd9b073 + * + */ + -+#ifndef CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP -+#define CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP -+ -+const size_t ZPlatformGranuleSizeShift = 21; // 2MB -+const size_t ZPlatformHeapViews = 3; -+const size_t ZPlatformCacheLineSize = 64; -+ -+size_t ZPlatformAddressOffsetBits(); -+size_t ZPlatformAddressMetadataShift(); ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "memory/resourceArea.hpp" ++#include "prims/jniFastGetField.hpp" ++#include "prims/jvm_misc.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "runtime/safepoint.hpp" + -+#endif // CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad b/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad -new file mode 100644 -index 00000000000..6b6f87814a5 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad -@@ -0,0 +1,233 @@ -+// -+// Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved. -+// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+// -+// This code is free software; you can redistribute it and/or modify it -+// under the terms of the GNU General Public License version 2 only, as -+// published by the Free Software Foundation. -+// -+// This code is distributed in the hope that it will be useful, but WITHOUT -+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+// version 2 for more details (a copy is included in the LICENSE file that -+// accompanied this code). -+// -+// You should have received a copy of the GNU General Public License version -+// 2 along with this work; if not, write to the Free Software Foundation, -+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+// -+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+// or visit www.oracle.com if you need additional information or have any -+// questions. -+// ++#define __ masm-> + -+source_hpp %{ ++#define BUFFER_SIZE 30*wordSize + -+#include "gc/shared/gc_globals.hpp" -+#include "gc/z/c2/zBarrierSetC2.hpp" -+#include "gc/z/zThreadLocalData.hpp" ++// Instead of issuing a LoadLoad barrier we create an address ++// dependency between loads; this might be more efficient. + -+%} ++// Common register usage: ++// x10/f10: result ++// c_rarg0: jni env ++// c_rarg1: obj ++// c_rarg2: jfield id + -+source %{ ++static const Register robj = x13; ++static const Register rcounter = x14; ++static const Register roffset = x15; ++static const Register rcounter_addr = x16; ++static const Register result = x17; + -+static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, int barrier_data) { -+ if (barrier_data == ZLoadBarrierElided) { -+ return; ++address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { ++ const char *name; ++ switch (type) { ++ case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; ++ case T_BYTE: name = "jni_fast_GetByteField"; break; ++ case T_CHAR: name = "jni_fast_GetCharField"; break; ++ case T_SHORT: name = "jni_fast_GetShortField"; break; ++ case T_INT: name = "jni_fast_GetIntField"; break; ++ case T_LONG: name = "jni_fast_GetLongField"; break; ++ case T_FLOAT: name = "jni_fast_GetFloatField"; break; ++ case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; ++ default: ShouldNotReachHere(); ++ name = NULL; // unreachable + } -+ ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, barrier_data); -+ __ ld(tmp, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); -+ __ andr(tmp, tmp, ref); -+ __ bnez(tmp, *stub->entry(), true /* far */); -+ __ bind(*stub->continuation()); -+} -+ -+static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) { -+ ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, ZLoadBarrierStrong); -+ __ j(*stub->entry()); -+ __ bind(*stub->continuation()); -+} -+ -+%} -+ -+// Load Pointer -+instruct zLoadP(iRegPNoSp dst, memory mem) -+%{ -+ match(Set dst (LoadP mem)); -+ predicate(UseZGC && (n->as_Load()->barrier_data() != 0)); -+ effect(TEMP dst); -+ -+ ins_cost(4 * DEFAULT_COST); -+ -+ format %{ "ld $dst, $mem, #@zLoadP" %} -+ -+ ins_encode %{ -+ const Address ref_addr (as_Register($mem$$base), $mem$$disp); -+ __ ld($dst$$Register, ref_addr); -+ z_load_barrier(_masm, this, ref_addr, $dst$$Register, t0 /* tmp */, barrier_data()); -+ %} -+ -+ ins_pipe(iload_reg_mem); -+%} -+ -+instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ -+ match(Set res (CompareAndSwapP mem (Binary oldval newval))); -+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); -+ predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); -+ effect(KILL cr, TEMP_DEF res); -+ -+ ins_cost(2 * VOLATILE_REF_COST); -+ -+ format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapP\n\t" -+ "mv $res, $res == $oldval" %} ++ ResourceMark rm; ++ BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); ++ CodeBuffer cbuf(blob); ++ MacroAssembler* masm = new MacroAssembler(&cbuf); ++ address fast_entry = __ pc(); + -+ ins_encode %{ -+ Label failed; -+ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); -+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, -+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, -+ true /* result_as_bool */); -+ __ beqz($res$$Register, failed); -+ __ mv(t0, $oldval$$Register); -+ __ bind(failed); -+ if (barrier_data() != ZLoadBarrierElided) { -+ Label good; -+ __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */); -+ __ andr(t1, t1, t0); -+ __ beqz(t1, good); -+ z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */); -+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, -+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, -+ true /* result_as_bool */); -+ __ bind(good); -+ } -+ %} ++ Label slow; ++ int32_t offset = 0; ++ __ la_patchable(rcounter_addr, SafepointSynchronize::safepoint_counter_addr(), offset); ++ __ addi(rcounter_addr, rcounter_addr, offset); + -+ ins_pipe(pipe_slow); -+%} ++ Address safepoint_counter_addr(rcounter_addr, 0); ++ __ lwu(rcounter, safepoint_counter_addr); ++ // An even value means there are no ongoing safepoint operations ++ __ andi(t0, rcounter, 1); ++ __ bnez(t0, slow); ++ __ xorr(robj, c_rarg1, rcounter); ++ __ xorr(robj, robj, rcounter); // obj, since ++ // robj ^ rcounter ^ rcounter == robj ++ // robj is address dependent on rcounter. + -+instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ -+ match(Set res (CompareAndSwapP mem (Binary oldval newval))); -+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); -+ predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong)); -+ effect(KILL cr, TEMP_DEF res); ++ // Both robj and t0 are clobbered by try_resolve_jobject_in_native. ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ assert_cond(bs != NULL); ++ bs->try_resolve_jobject_in_native(masm, c_rarg0, robj, t0, slow); + -+ ins_cost(2 * VOLATILE_REF_COST); ++ __ srli(roffset, c_rarg2, 2); // offset + -+ format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapPAcq\n\t" -+ "mv $res, $res == $oldval" %} ++ assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); ++ speculative_load_pclist[count] = __ pc(); // Used by the segfault handler ++ __ add(roffset, robj, roffset); + -+ ins_encode %{ -+ Label failed; -+ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); -+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, -+ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, -+ true /* result_as_bool */); -+ __ beqz($res$$Register, failed); -+ __ mv(t0, $oldval$$Register); -+ __ bind(failed); -+ if (barrier_data() != ZLoadBarrierElided) { -+ Label good; -+ __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */); -+ __ andr(t1, t1, t0); -+ __ beqz(t1, good); -+ z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */); -+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, -+ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, -+ true /* result_as_bool */); -+ __ bind(good); ++ switch (type) { ++ case T_BOOLEAN: __ lbu(result, Address(roffset, 0)); break; ++ case T_BYTE: __ lb(result, Address(roffset, 0)); break; ++ case T_CHAR: __ lhu(result, Address(roffset, 0)); break; ++ case T_SHORT: __ lh(result, Address(roffset, 0)); break; ++ case T_INT: __ lw(result, Address(roffset, 0)); break; ++ case T_LONG: __ ld(result, Address(roffset, 0)); break; ++ case T_FLOAT: { ++ __ flw(f28, Address(roffset, 0)); // f28 as temporaries ++ __ fmv_x_w(result, f28); // f{31--0}-->x ++ break; + } -+ %} -+ -+ ins_pipe(pipe_slow); -+%} -+ -+instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{ -+ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); -+ predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); -+ effect(TEMP_DEF res); -+ -+ ins_cost(2 * VOLATILE_REF_COST); -+ -+ format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangeP" %} -+ -+ ins_encode %{ -+ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); -+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, -+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register); -+ if (barrier_data() != ZLoadBarrierElided) { -+ Label good; -+ __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); -+ __ andr(t0, t0, $res$$Register); -+ __ beqz(t0, good); -+ z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */); -+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, -+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register); -+ __ bind(good); ++ case T_DOUBLE: { ++ __ fld(f28, Address(roffset, 0)); // f28 as temporaries ++ __ fmv_x_d(result, f28); // d{63--0}-->x ++ break; + } -+ %} -+ -+ ins_pipe(pipe_slow); -+%} ++ default: ShouldNotReachHere(); ++ } + -+instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{ -+ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); -+ predicate(UseZGC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); -+ effect(TEMP_DEF res); ++ __ xorr(rcounter_addr, rcounter_addr, result); ++ __ xorr(rcounter_addr, rcounter_addr, result); ++ __ lw(t0, safepoint_counter_addr); ++ __ bne(rcounter, t0, slow); + -+ ins_cost(2 * VOLATILE_REF_COST); ++ switch (type) { ++ case T_FLOAT: __ fmv_w_x(f10, result); break; ++ case T_DOUBLE: __ fmv_d_x(f10, result); break; ++ default: __ mv(x10, result); break; ++ } ++ __ ret(); + -+ format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangePAcq" %} ++ slowcase_entry_pclist[count++] = __ pc(); ++ __ bind(slow); ++ address slow_case_addr; ++ switch (type) { ++ case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; ++ case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; ++ case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; ++ case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; ++ case T_INT: slow_case_addr = jni_GetIntField_addr(); break; ++ case T_LONG: slow_case_addr = jni_GetLongField_addr(); break; ++ case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; ++ case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; ++ default: ShouldNotReachHere(); ++ slow_case_addr = NULL; // unreachable ++ } + -+ ins_encode %{ -+ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); -+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, -+ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register); -+ if (barrier_data() != ZLoadBarrierElided) { -+ Label good; -+ __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); -+ __ andr(t0, t0, $res$$Register); -+ __ beqz(t0, good); -+ z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */); -+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, -+ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register); -+ __ bind(good); -+ } -+ %} ++ { ++ __ enter(); ++ int32_t tmp_offset = 0; ++ __ la_patchable(t0, ExternalAddress(slow_case_addr), tmp_offset); ++ __ jalr(x1, t0, tmp_offset); ++ __ leave(); ++ __ ret(); ++ } ++ __ flush(); + -+ ins_pipe(pipe_slow); -+%} ++ return fast_entry; ++} + -+instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{ -+ match(Set prev (GetAndSetP mem newv)); -+ predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0); -+ effect(TEMP_DEF prev, KILL cr); + -+ ins_cost(2 * VOLATILE_REF_COST); ++address JNI_FastGetField::generate_fast_get_boolean_field() { ++ return generate_fast_get_int_field0(T_BOOLEAN); ++} + -+ format %{ "atomic_xchg $prev, $newv, [$mem], #@zGetAndSetP" %} ++address JNI_FastGetField::generate_fast_get_byte_field() { ++ return generate_fast_get_int_field0(T_BYTE); ++} + -+ ins_encode %{ -+ __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); -+ z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data()); -+ %} ++address JNI_FastGetField::generate_fast_get_char_field() { ++ return generate_fast_get_int_field0(T_CHAR); ++} + -+ ins_pipe(pipe_serial); -+%} ++address JNI_FastGetField::generate_fast_get_short_field() { ++ return generate_fast_get_int_field0(T_SHORT); ++} + -+instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{ -+ match(Set prev (GetAndSetP mem newv)); -+ predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() != 0)); -+ effect(TEMP_DEF prev, KILL cr); ++address JNI_FastGetField::generate_fast_get_int_field() { ++ return generate_fast_get_int_field0(T_INT); ++} + -+ ins_cost(VOLATILE_REF_COST); ++address JNI_FastGetField::generate_fast_get_long_field() { ++ return generate_fast_get_int_field0(T_LONG); ++} + -+ format %{ "atomic_xchg_acq $prev, $newv, [$mem], #@zGetAndSetPAcq" %} ++address JNI_FastGetField::generate_fast_get_float_field() { ++ return generate_fast_get_int_field0(T_FLOAT); ++} + -+ ins_encode %{ -+ __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base)); -+ z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data()); -+ %} -+ ins_pipe(pipe_serial); -+%} -diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp ++address JNI_FastGetField::generate_fast_get_double_field() { ++ return generate_fast_get_int_field0(T_DOUBLE); ++} +diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp new file mode 100644 -index 00000000000..2936837d951 +index 0000000000..df3c0267ee --- /dev/null -+++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp -@@ -0,0 +1,52 @@ ++++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp +@@ -0,0 +1,106 @@ +/* -+ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. ++ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -19160,40 +18756,96 @@ index 00000000000..2936837d951 + * + */ + -+#ifndef CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP -+#define CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP ++#ifndef CPU_RISCV_JNITYPES_RISCV_HPP ++#define CPU_RISCV_JNITYPES_RISCV_HPP + -+const int StackAlignmentInBytes = 16; ++#include "jni.h" ++#include "oops/oop.hpp" + -+// Indicates whether the C calling conventions require that -+// 32-bit integer argument values are extended to 64 bits. -+const bool CCallingConventionRequiresIntsAsLongs = false; ++// This file holds platform-dependent routines used to write primitive jni ++// types to the array of arguments passed into JavaCalls::call + -+// RISCV has adopted a multicopy atomic model closely following -+// that of ARMv8. -+#define CPU_MULTI_COPY_ATOMIC ++class JNITypes : private AllStatic { ++ // These functions write a java primitive type (in native format) ++ // to a java stack slot array to be passed as an argument to JavaCalls:calls. ++ // I.e., they are functionally 'push' operations if they have a 'pos' ++ // formal parameter. Note that jlong's and jdouble's are written ++ // _in reverse_ of the order in which they appear in the interpreter ++ // stack. This is because call stubs (see stubGenerator_sparc.cpp) ++ // reverse the argument list constructed by JavaCallArguments (see ++ // javaCalls.hpp). + -+// To be safe, we deoptimize when we come across an access that needs -+// patching. This is similar to what is done on aarch64. -+#define DEOPTIMIZE_WHEN_PATCHING ++public: ++ // Ints are stored in native format in one JavaCallArgument slot at *to. ++ static inline void put_int(jint from, intptr_t *to) { *(jint *)(to + 0 ) = from; } ++ static inline void put_int(jint from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = from; } ++ static inline void put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; } + -+#define SUPPORTS_NATIVE_CX8 ++ // Longs are stored in native format in one JavaCallArgument slot at ++ // *(to+1). ++ static inline void put_long(jlong from, intptr_t *to) { ++ *(jlong*) (to + 1) = from; ++ } + -+#define SUPPORT_RESERVED_STACK_AREA ++ static inline void put_long(jlong from, intptr_t *to, int& pos) { ++ *(jlong*) (to + 1 + pos) = from; ++ pos += 2; ++ } + -+#define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS false ++ static inline void put_long(jlong *from, intptr_t *to, int& pos) { ++ *(jlong*) (to + 1 + pos) = *from; ++ pos += 2; ++ } + -+#define USE_POINTERS_TO_REGISTER_IMPL_ARRAY ++ // Oops are stored in native format in one JavaCallArgument slot at *to. ++ static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; } ++ static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; } ++ static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; } + -+#endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp ++ // Floats are stored in native format in one JavaCallArgument slot at *to. ++ static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } ++ static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; } ++ static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; } ++ ++#undef _JNI_SLOT_OFFSET ++#define _JNI_SLOT_OFFSET 1 ++ // Doubles are stored in native word format in one JavaCallArgument ++ // slot at *(to+1). ++ static inline void put_double(jdouble from, intptr_t *to) { ++ *(jdouble*) (to + 1) = from; ++ } ++ ++ static inline void put_double(jdouble from, intptr_t *to, int& pos) { ++ *(jdouble*) (to + 1 + pos) = from; ++ pos += 2; ++ } ++ ++ static inline void put_double(jdouble *from, intptr_t *to, int& pos) { ++ *(jdouble*) (to + 1 + pos) = *from; ++ pos += 2; ++ } ++ ++ // The get_xxx routines, on the other hand, actually _do_ fetch ++ // java primitive types from the interpreter stack. ++ // No need to worry about alignment on Intel. ++ static inline jint get_int (intptr_t *from) { return *(jint *) from; } ++ static inline jlong get_long (intptr_t *from) { return *(jlong *) (from + _JNI_SLOT_OFFSET); } ++ static inline oop get_obj (intptr_t *from) { return *(oop *) from; } ++ static inline jfloat get_float (intptr_t *from) { return *(jfloat *) from; } ++ static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); } ++#undef _JNI_SLOT_OFFSET ++}; ++ ++#endif // CPU_RISCV_JNITYPES_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp new file mode 100644 -index 00000000000..cbfc0583883 +index 0000000000..e18bd3d8e2 --- /dev/null -+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp -@@ -0,0 +1,99 @@ ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -0,0 +1,5410 @@ +/* -+ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -19217,7334 +18869,6924 @@ index 00000000000..cbfc0583883 + * + */ + -+#ifndef CPU_RISCV_GLOBALS_RISCV_HPP -+#define CPU_RISCV_GLOBALS_RISCV_HPP -+ -+#include "utilities/globalDefinitions.hpp" -+#include "utilities/macros.hpp" -+ -+// Sets the default values for platform dependent flags used by the runtime system. -+// (see globals.hpp) -+ -+define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks -+define_pd_global(bool, TrapBasedNullChecks, false); -+define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs past to check cast -+ -+define_pd_global(uintx, CodeCacheSegmentSize, 64 COMPILER1_AND_COMPILER2_PRESENT(+64)); // Tiered compilation has large code-entry alignment. -+define_pd_global(intx, CodeEntryAlignment, 64); -+define_pd_global(intx, OptoLoopAlignment, 16); -+ -+#define DEFAULT_STACK_YELLOW_PAGES (2) -+#define DEFAULT_STACK_RED_PAGES (1) -+// Java_java_net_SocketOutputStream_socketWrite0() uses a 64k buffer on the -+// stack if compiled for unix and LP64. To pass stack overflow tests we need -+// 20 shadow pages. -+#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+5)) -+#define DEFAULT_STACK_RESERVED_PAGES (1) -+ -+#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES -+#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES -+#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES -+#define MIN_STACK_RESERVED_PAGES (0) ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "compiler/disassembler.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/cardTable.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "interpreter/bytecodeHistogram.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/resourceArea.hpp" ++#include "memory/universe.hpp" ++#include "nativeInst_riscv.hpp" ++#include "oops/accessDecorators.hpp" ++#include "oops/compressedOops.inline.hpp" ++#include "oops/klass.inline.hpp" ++#include "oops/oop.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/jniHandles.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.hpp" ++#ifdef COMPILER2 ++#include "opto/compile.hpp" ++#include "opto/intrinsicnode.hpp" ++#include "opto/node.hpp" ++#include "opto/output.hpp" ++#endif + -+define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES); -+define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES); -+define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); -+define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#else ++#define BLOCK_COMMENT(str) block_comment(str) ++#endif ++#define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":") + -+define_pd_global(bool, RewriteBytecodes, true); -+define_pd_global(bool, RewriteFrequentPairs, true); ++static void pass_arg0(MacroAssembler* masm, Register arg) { ++ if (c_rarg0 != arg) { ++ assert_cond(masm != NULL); ++ masm->mv(c_rarg0, arg); ++ } ++} + -+define_pd_global(bool, PreserveFramePointer, false); ++static void pass_arg1(MacroAssembler* masm, Register arg) { ++ if (c_rarg1 != arg) { ++ assert_cond(masm != NULL); ++ masm->mv(c_rarg1, arg); ++ } ++} + -+define_pd_global(uintx, TypeProfileLevel, 111); ++static void pass_arg2(MacroAssembler* masm, Register arg) { ++ if (c_rarg2 != arg) { ++ assert_cond(masm != NULL); ++ masm->mv(c_rarg2, arg); ++ } ++} + -+define_pd_global(bool, CompactStrings, true); ++static void pass_arg3(MacroAssembler* masm, Register arg) { ++ if (c_rarg3 != arg) { ++ assert_cond(masm != NULL); ++ masm->mv(c_rarg3, arg); ++ } ++} + -+// Clear short arrays bigger than one word in an arch-specific way -+define_pd_global(intx, InitArrayShortSize, BytesPerLong); ++void MacroAssembler::align(int modulus, int extra_offset) { ++ CompressibleRegion cr(this); ++ while ((offset() + extra_offset) % modulus != 0) { nop(); } ++} + -+define_pd_global(intx, InlineSmallCode, 1000); ++void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { ++ call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions); ++} + -+#define ARCH_FLAGS(develop, \ -+ product, \ -+ notproduct, \ -+ range, \ -+ constraint) \ -+ \ -+ product(bool, NearCpool, true, \ -+ "constant pool is close to instructions") \ -+ product(intx, BlockZeroingLowLimit, 256, \ -+ "Minimum size in bytes when block zeroing will be used") \ -+ range(1, max_jint) \ -+ product(bool, TraceTraps, false, "Trace all traps the signal handler") \ -+ /* For now we're going to be safe and add the I/O bits to userspace fences. */ \ -+ product(bool, UseConservativeFence, true, \ -+ "Extend i for r and o for w in the pred/succ flags of fence;" \ -+ "Extend fence.i to fence.i + fence.") \ -+ product(bool, AvoidUnalignedAccesses, true, \ -+ "Avoid generating unaligned memory accesses") \ -+ product(bool, UseRVV, false, EXPERIMENTAL, "Use RVV instructions") \ -+ product(bool, UseRVB, false, EXPERIMENTAL, "Use RVB instructions") \ -+ product(bool, UseRVC, false, EXPERIMENTAL, "Use RVC instructions") \ -+ product(bool, UseRVVForBigIntegerShiftIntrinsics, true, \ -+ "Use RVV instructions for left/right shift of BigInteger") ++// Implementation of call_VM versions + -+#endif // CPU_RISCV_GLOBALS_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/icBuffer_riscv.cpp b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp -new file mode 100644 -index 00000000000..cc93103dc55 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp -@@ -0,0 +1,79 @@ -+/* -+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ bool check_exceptions) { ++ call_VM_helper(oop_result, entry_point, 0, check_exceptions); ++} + -+#include "precompiled.hpp" -+#include "asm/macroAssembler.hpp" -+#include "asm/macroAssembler.inline.hpp" -+#include "code/icBuffer.hpp" -+#include "gc/shared/collectedHeap.inline.hpp" -+#include "interpreter/bytecodes.hpp" -+#include "memory/resourceArea.hpp" -+#include "nativeInst_riscv.hpp" -+#include "oops/oop.inline.hpp" ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions) { ++ pass_arg1(this, arg_1); ++ call_VM_helper(oop_result, entry_point, 1, check_exceptions); ++} + -+int InlineCacheBuffer::ic_stub_code_size() { -+ // 6: auipc + ld + auipc + jalr + address(2 * instruction_size) -+ // 5: auipc + ld + j + address(2 * instruction_size) -+ return (MacroAssembler::far_branches() ? 6 : 5) * NativeInstruction::instruction_size; ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ bool check_exceptions) { ++ assert(arg_1 != c_rarg2, "smashed arg"); ++ pass_arg2(this, arg_2); ++ pass_arg1(this, arg_1); ++ call_VM_helper(oop_result, entry_point, 2, check_exceptions); +} + -+#define __ masm-> ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3, ++ bool check_exceptions) { ++ assert(arg_1 != c_rarg3, "smashed arg"); ++ assert(arg_2 != c_rarg3, "smashed arg"); ++ pass_arg3(this, arg_3); + -+void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) { -+ assert_cond(code_begin != NULL && entry_point != NULL); -+ ResourceMark rm; -+ CodeBuffer code(code_begin, ic_stub_code_size()); -+ MacroAssembler* masm = new MacroAssembler(&code); -+ // Note: even though the code contains an embedded value, we do not need reloc info -+ // because -+ // (1) the value is old (i.e., doesn't matter for scavenges) -+ // (2) these ICStubs are removed *before* a GC happens, so the roots disappear ++ assert(arg_1 != c_rarg2, "smashed arg"); ++ pass_arg2(this, arg_2); + -+ address start = __ pc(); -+ Label l; -+ __ ld(t1, l); -+ __ far_jump(ExternalAddress(entry_point)); -+ __ align(wordSize); -+ __ bind(l); -+ __ emit_int64((intptr_t)cached_value); -+ // Only need to invalidate the 1st two instructions - not the whole ic stub -+ ICache::invalidate_range(code_begin, InlineCacheBuffer::ic_stub_code_size()); -+ assert(__ pc() - start == ic_stub_code_size(), "must be"); ++ pass_arg1(this, arg_1); ++ call_VM_helper(oop_result, entry_point, 3, check_exceptions); +} + -+address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { -+ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object -+ NativeJump* jump = nativeJump_at(move->next_instruction_address()); -+ return jump->jump_destination(); ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions); +} + -+ -+void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { -+ // The word containing the cached value is at the end of this IC buffer -+ uintptr_t *p = (uintptr_t *)(code_begin + ic_stub_code_size() - wordSize); -+ void* o = (void*)*p; -+ return o; ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions) { ++ pass_arg1(this, arg_1); ++ call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); +} -diff --git a/src/hotspot/cpu/riscv/icache_riscv.cpp b/src/hotspot/cpu/riscv/icache_riscv.cpp -new file mode 100644 -index 00000000000..922a80f9f3e ---- /dev/null -+++ b/src/hotspot/cpu/riscv/icache_riscv.cpp -@@ -0,0 +1,51 @@ -+/* -+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+#include "precompiled.hpp" -+#include "asm/macroAssembler.hpp" -+#include "runtime/icache.hpp" + -+#define __ _masm-> ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ bool check_exceptions) { + -+static int icache_flush(address addr, int lines, int magic) { -+ os::icache_flush((long int) addr, (long int) (addr + (lines << ICache::log2_line_size))); -+ return magic; ++ assert(arg_1 != c_rarg2, "smashed arg"); ++ pass_arg2(this, arg_2); ++ pass_arg1(this, arg_1); ++ call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); +} + -+void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) { -+ address start = (address)icache_flush; -+ *flush_icache_stub = (ICache::flush_icache_stub_t)start; -+ -+ // ICache::invalidate_range() contains explicit condition that the first -+ // call is invoked on the generated icache flush stub code range. -+ ICache::invalidate_range(start, 0); -+ -+ { -+ StubCodeMark mark(this, "ICache", "fake_stub_for_inlined_icache_flush"); -+ __ ret(); -+ } ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3, ++ bool check_exceptions) { ++ assert(arg_1 != c_rarg3, "smashed arg"); ++ assert(arg_2 != c_rarg3, "smashed arg"); ++ pass_arg3(this, arg_3); ++ assert(arg_1 != c_rarg2, "smashed arg"); ++ pass_arg2(this, arg_2); ++ pass_arg1(this, arg_1); ++ call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); +} + -+#undef __ -diff --git a/src/hotspot/cpu/riscv/icache_riscv.hpp b/src/hotspot/cpu/riscv/icache_riscv.hpp -new file mode 100644 -index 00000000000..5bf40ca8204 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/icache_riscv.hpp -@@ -0,0 +1,42 @@ -+/* -+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++// these are no-ops overridden by InterpreterMacroAssembler ++void MacroAssembler::check_and_handle_earlyret(Register java_thread) {} ++void MacroAssembler::check_and_handle_popframe(Register java_thread) {} + -+#ifndef CPU_RISCV_ICACHE_RISCV_HPP -+#define CPU_RISCV_ICACHE_RISCV_HPP ++RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset) { ++ intptr_t value = *delayed_value_addr; ++ if (value != 0) ++ return RegisterOrConstant(value + offset); + -+// Interface for updating the instruction cache. Whenever the VM -+// modifies code, part of the processor instruction cache potentially -+// has to be flushed. ++ // load indirectly to solve generation ordering problem ++ ld(tmp, ExternalAddress((address) delayed_value_addr)); + -+class ICache : public AbstractICache { -+public: -+ enum { -+ stub_size = 16, // Size of the icache flush stub in bytes -+ line_size = BytesPerWord, // conservative -+ log2_line_size = LogBytesPerWord // log2(line_size) -+ }; -+}; ++ if (offset != 0) ++ add(tmp, tmp, offset); + -+#endif // CPU_RISCV_ICACHE_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -new file mode 100644 -index 00000000000..d12dcb2af19 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -@@ -0,0 +1,1940 @@ -+/* -+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ return RegisterOrConstant(tmp); ++} + -+#include "precompiled.hpp" -+#include "asm/macroAssembler.inline.hpp" -+#include "gc/shared/barrierSet.hpp" -+#include "gc/shared/barrierSetAssembler.hpp" -+#include "interp_masm_riscv.hpp" -+#include "interpreter/interpreter.hpp" -+#include "interpreter/interpreterRuntime.hpp" -+#include "logging/log.hpp" -+#include "oops/arrayOop.hpp" -+#include "oops/markWord.hpp" -+#include "oops/method.hpp" -+#include "oops/methodData.hpp" -+#include "prims/jvmtiExport.hpp" -+#include "prims/jvmtiThreadState.hpp" -+#include "runtime/basicLock.hpp" -+#include "runtime/frame.inline.hpp" -+#include "runtime/safepointMechanism.hpp" -+#include "runtime/sharedRuntime.hpp" -+#include "runtime/thread.inline.hpp" -+#include "utilities/powerOfTwo.hpp" ++// Calls to C land ++// ++// When entering C land, the fp, & esp of the last Java frame have to be recorded ++// in the (thread-local) JavaThread object. When leaving C land, the last Java fp ++// has to be reset to 0. This is required to allow proper stack traversal. ++void MacroAssembler::set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ Register last_java_pc, ++ Register tmp) { + -+void InterpreterMacroAssembler::narrow(Register result) { -+ // Get method->_constMethod->_result_type -+ ld(t0, Address(fp, frame::interpreter_frame_method_offset * wordSize)); -+ ld(t0, Address(t0, Method::const_offset())); -+ lbu(t0, Address(t0, ConstMethod::result_type_offset())); ++ if (last_java_pc->is_valid()) { ++ sd(last_java_pc, Address(xthread, ++ JavaThread::frame_anchor_offset() + ++ JavaFrameAnchor::last_Java_pc_offset())); ++ } + -+ Label done, notBool, notByte, notChar; ++ // determine last_java_sp register ++ if (last_java_sp == sp) { ++ mv(tmp, sp); ++ last_java_sp = tmp; ++ } else if (!last_java_sp->is_valid()) { ++ last_java_sp = esp; ++ } + -+ // common case first -+ mv(t1, T_INT); -+ beq(t0, t1, done); ++ sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset())); + -+ // mask integer result to narrower return type. -+ mv(t1, T_BOOLEAN); -+ bne(t0, t1, notBool); ++ // last_java_fp is optional ++ if (last_java_fp->is_valid()) { ++ sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset())); ++ } ++} + -+ andi(result, result, 0x1); -+ j(done); ++void MacroAssembler::set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ address last_java_pc, ++ Register tmp) { ++ assert(last_java_pc != NULL, "must provide a valid PC"); + -+ bind(notBool); -+ mv(t1, T_BYTE); -+ bne(t0, t1, notByte); -+ sign_extend(result, result, 8); -+ j(done); ++ la(tmp, last_java_pc); ++ sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); + -+ bind(notByte); -+ mv(t1, T_CHAR); -+ bne(t0, t1, notChar); -+ zero_extend(result, result, 16); -+ j(done); ++ set_last_Java_frame(last_java_sp, last_java_fp, noreg, tmp); ++} + -+ bind(notChar); -+ sign_extend(result, result, 16); ++void MacroAssembler::set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ Label &L, ++ Register tmp) { ++ if (L.is_bound()) { ++ set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp); ++ } else { ++ InstructionMark im(this); ++ L.add_patch_at(code(), locator()); ++ set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp); ++ } ++} + -+ // Nothing to do for T_INT -+ bind(done); -+ addw(result, result, zr); ++// Just like safepoint_poll, but use an acquiring load for thread- ++// local polling. ++// ++// We need an acquire here to ensure that any subsequent load of the ++// global SafepointSynchronize::_state flag is ordered after this load ++// of the local Thread::_polling page. We don't want this poll to ++// return false (i.e. not safepointing) and a later poll of the global ++// SafepointSynchronize::_state spuriously to return true. ++// ++// This is to avoid a race when we're in a native->Java transition ++// racing the code which wakes up from a safepoint. ++// ++void MacroAssembler::safepoint_poll_acquire(Label& slow_path) { ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ membar(MacroAssembler::AnyAny); ++ ld(t1, Address(xthread, Thread::polling_page_offset())); ++ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ andi(t0, t1, SafepointMechanism::poll_bit()); ++ bnez(t0, slow_path); ++ } else { ++ safepoint_poll(slow_path); ++ } +} + -+void InterpreterMacroAssembler::jump_to_entry(address entry) { -+ assert(entry != NULL, "Entry must have been generated by now"); -+ j(entry); ++void MacroAssembler::reset_last_Java_frame(bool clear_fp) { ++ // we must set sp to zero to clear frame ++ sd(zr, Address(xthread, JavaThread::last_Java_sp_offset())); ++ ++ // must clear fp, so that compiled frames are not confused; it is ++ // possible that we need it only for debugging ++ if (clear_fp) { ++ sd(zr, Address(xthread, JavaThread::last_Java_fp_offset())); ++ } ++ ++ // Always clear the pc because it could have been set by make_walkable() ++ sd(zr, Address(xthread, JavaThread::last_Java_pc_offset())); +} + -+void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) { -+ if (JvmtiExport::can_pop_frame()) { -+ Label L; -+ // Initiate popframe handling only if it is not already being -+ // processed. If the flag has the popframe_processing bit set, -+ // it means that this code is called *during* popframe handling - we -+ // don't want to reenter. -+ // This method is only called just after the call into the vm in -+ // call_VM_base, so the arg registers are available. -+ lwu(t1, Address(xthread, JavaThread::popframe_condition_offset())); -+ andi(t0, t1, JavaThread::popframe_pending_bit); -+ beqz(t0, L); -+ andi(t0, t1, JavaThread::popframe_processing_bit); -+ bnez(t0, L); -+ // Call Interpreter::remove_activation_preserving_args_entry() to get the -+ // address of the same-named entrypoint in the generated interpreter code. -+ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); -+ jr(x10); -+ bind(L); ++void MacroAssembler::call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++ java_thread = xthread; ++ } ++ // determine last_java_sp register ++ if (!last_java_sp->is_valid()) { ++ last_java_sp = esp; + } -+} + ++ // debugging support ++ assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); ++ assert(java_thread == xthread, "unexpected register"); + -+void InterpreterMacroAssembler::load_earlyret_value(TosState state) { -+ ld(x12, Address(xthread, JavaThread::jvmti_thread_state_offset())); -+ const Address tos_addr(x12, JvmtiThreadState::earlyret_tos_offset()); -+ const Address oop_addr(x12, JvmtiThreadState::earlyret_oop_offset()); -+ const Address val_addr(x12, JvmtiThreadState::earlyret_value_offset()); -+ switch (state) { -+ case atos: -+ ld(x10, oop_addr); -+ sd(zr, oop_addr); -+ verify_oop(x10); -+ break; -+ case ltos: -+ ld(x10, val_addr); -+ break; -+ case btos: // fall through -+ case ztos: // fall through -+ case ctos: // fall through -+ case stos: // fall through -+ case itos: -+ lwu(x10, val_addr); -+ break; -+ case ftos: -+ flw(f10, val_addr); -+ break; -+ case dtos: -+ fld(f10, val_addr); -+ break; -+ case vtos: -+ /* nothing to do */ -+ break; -+ default: -+ ShouldNotReachHere(); -+ } -+ // Clean up tos value in the thread object -+ mvw(t0, (int) ilgl); -+ sw(t0, tos_addr); -+ sw(zr, val_addr); -+} ++ assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); ++ assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); + ++ // push java thread (becomes first argument of C function) ++ mv(c_rarg0, java_thread); + -+void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) { -+ if (JvmtiExport::can_force_early_return()) { -+ Label L; -+ ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset())); -+ beqz(t0, L); // if [thread->jvmti_thread_state() == NULL] then exit ++ // set last Java frame before call ++ assert(last_java_sp != fp, "can't use fp"); + -+ // Initiate earlyret handling only if it is not already being processed. -+ // If the flag has the earlyret_processing bit set, it means that this code -+ // is called *during* earlyret handling - we don't want to reenter. -+ lwu(t0, Address(t0, JvmtiThreadState::earlyret_state_offset())); -+ mv(t1, JvmtiThreadState::earlyret_pending); -+ bne(t0, t1, L); ++ Label l; ++ set_last_Java_frame(last_java_sp, fp, l, t0); + -+ // Call Interpreter::remove_activation_early_entry() to get the address of the -+ // same-named entrypoint in the generated interpreter code. -+ ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset())); -+ lwu(t0, Address(t0, JvmtiThreadState::earlyret_tos_offset())); -+ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), t0); -+ jr(x10); -+ bind(L); ++ // do the call, remove parameters ++ MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l); ++ ++ // reset last Java frame ++ // Only interpreter should have to clear fp ++ reset_last_Java_frame(true); ++ ++ // C++ interp handles this in the interpreter ++ check_and_handle_popframe(java_thread); ++ check_and_handle_earlyret(java_thread); ++ ++ if (check_exceptions) { ++ // check for pending exceptions (java_thread is set upon return) ++ ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset()))); ++ Label ok; ++ beqz(t0, ok); ++ int32_t offset = 0; ++ la_patchable(t0, RuntimeAddress(StubRoutines::forward_exception_entry()), offset); ++ jalr(x0, t0, offset); ++ bind(ok); ++ } ++ ++ // get oop result if there is one and reset the value in the thread ++ if (oop_result->is_valid()) { ++ get_vm_result(oop_result, java_thread); + } +} + -+void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset) { -+ assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); -+ lhu(reg, Address(xbcp, bcp_offset)); -+ revb_h(reg, reg); ++void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { ++ ld(oop_result, Address(java_thread, JavaThread::vm_result_offset())); ++ sd(zr, Address(java_thread, JavaThread::vm_result_offset())); ++ verify_oop(oop_result, "broken oop in call_VM_base"); +} + -+void InterpreterMacroAssembler::get_dispatch() { -+ int32_t offset = 0; -+ la_patchable(xdispatch, ExternalAddress((address)Interpreter::dispatch_table()), offset); -+ addi(xdispatch, xdispatch, offset); ++void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { ++ ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); ++ sd(zr, Address(java_thread, JavaThread::vm_result_2_offset())); +} + -+void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, -+ int bcp_offset, -+ size_t index_size) { -+ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); -+ if (index_size == sizeof(u2)) { -+ load_unsigned_short(index, Address(xbcp, bcp_offset)); -+ } else if (index_size == sizeof(u4)) { -+ lwu(index, Address(xbcp, bcp_offset)); -+ // Check if the secondary index definition is still ~x, otherwise -+ // we have to change the following assembler code to calculate the -+ // plain index. -+ assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); -+ xori(index, index, -1); -+ addw(index, index, zr); -+ } else if (index_size == sizeof(u1)) { -+ load_unsigned_byte(index, Address(xbcp, bcp_offset)); -+ } else { -+ ShouldNotReachHere(); ++void MacroAssembler::verify_oop(Register reg, const char* s) { ++ if (!VerifyOops) { return; } ++ ++ // Pass register number to verify_oop_subroutine ++ const char* b = NULL; ++ { ++ ResourceMark rm; ++ stringStream ss; ++ ss.print("verify_oop: %s: %s", reg->name(), s); ++ b = code_string(ss.as_string()); + } -+} ++ BLOCK_COMMENT("verify_oop {"); + -+// Return -+// Rindex: index into constant pool -+// Rcache: address of cache entry - ConstantPoolCache::base_offset() -+// -+// A caller must add ConstantPoolCache::base_offset() to Rcache to get -+// the true address of the cache entry. -+// -+void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, -+ Register index, -+ int bcp_offset, -+ size_t index_size) { -+ assert_different_registers(cache, index); -+ assert_different_registers(cache, xcpool); -+ get_cache_index_at_bcp(index, bcp_offset, index_size); -+ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); -+ // Convert from field index to ConstantPoolCacheEntry -+ // riscv already has the cache in xcpool so there is no need to -+ // install it in cache. Instead we pre-add the indexed offset to -+ // xcpool and return it in cache. All clients of this method need to -+ // be modified accordingly. -+ shadd(cache, index, xcpool, cache, 5); -+} ++ push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); ++ ++ mv(c_rarg0, reg); // c_rarg0 : x10 ++ // The length of the instruction sequence emitted should be independent ++ // of the values of the local char buffer address so that the size of mach ++ // nodes for scratch emit and normal emit matches. ++ mv(t0, (address)b); + ++ // call indirectly to solve generation ordering problem ++ int32_t offset = 0; ++ la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset); ++ ld(t1, Address(t1, offset)); ++ jalr(t1); + -+void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, -+ Register index, -+ Register bytecode, -+ int byte_no, -+ int bcp_offset, -+ size_t index_size) { -+ get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); -+ // We use a 32-bit load here since the layout of 64-bit words on -+ // little-endian machines allow us that. -+ // n.b. unlike x86 cache already includes the index offset -+ la(bytecode, Address(cache, -+ ConstantPoolCache::base_offset() + -+ ConstantPoolCacheEntry::indices_offset())); -+ membar(MacroAssembler::AnyAny); -+ lwu(bytecode, bytecode); -+ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); -+ const int shift_count = (1 + byte_no) * BitsPerByte; -+ slli(bytecode, bytecode, XLEN - (shift_count + BitsPerByte)); -+ srli(bytecode, bytecode, XLEN - BitsPerByte); -+} ++ pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); + -+void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, -+ Register tmp, -+ int bcp_offset, -+ size_t index_size) { -+ assert(cache != tmp, "must use different register"); -+ get_cache_index_at_bcp(tmp, bcp_offset, index_size); -+ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); -+ // Convert from field index to ConstantPoolCacheEntry index -+ // and from word offset to byte offset -+ assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, -+ "else change next line"); -+ ld(cache, Address(fp, frame::interpreter_frame_cache_offset * wordSize)); -+ // skip past the header -+ add(cache, cache, in_bytes(ConstantPoolCache::base_offset())); -+ // construct pointer to cache entry -+ shadd(cache, tmp, cache, tmp, 2 + LogBytesPerWord); ++ BLOCK_COMMENT("} verify_oop"); +} + -+// Load object from cpool->resolved_references(index) -+void InterpreterMacroAssembler::load_resolved_reference_at_index( -+ Register result, Register index, Register tmp) { -+ assert_different_registers(result, index); ++void MacroAssembler::verify_oop_addr(Address addr, const char* s) { ++ if (!VerifyOops) { ++ return; ++ } + -+ get_constant_pool(result); -+ // Load pointer for resolved_references[] objArray -+ ld(result, Address(result, ConstantPool::cache_offset_in_bytes())); -+ ld(result, Address(result, ConstantPoolCache::resolved_references_offset_in_bytes())); -+ resolve_oop_handle(result, tmp); -+ // Add in the index -+ addi(index, index, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); -+ shadd(result, index, result, index, LogBytesPerHeapOop); -+ load_heap_oop(result, Address(result, 0)); ++ const char* b = NULL; ++ { ++ ResourceMark rm; ++ stringStream ss; ++ ss.print("verify_oop_addr: %s", s); ++ b = code_string(ss.as_string()); ++ } ++ BLOCK_COMMENT("verify_oop_addr {"); ++ ++ push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); ++ ++ if (addr.uses(sp)) { ++ la(x10, addr); ++ ld(x10, Address(x10, 4 * wordSize)); ++ } else { ++ ld(x10, addr); ++ } ++ ++ // The length of the instruction sequence emitted should be independent ++ // of the values of the local char buffer address so that the size of mach ++ // nodes for scratch emit and normal emit matches. ++ mv(t0, (address)b); ++ ++ // call indirectly to solve generation ordering problem ++ int32_t offset = 0; ++ la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset); ++ ld(t1, Address(t1, offset)); ++ jalr(t1); ++ ++ pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); ++ ++ BLOCK_COMMENT("} verify_oop_addr"); +} + -+void InterpreterMacroAssembler::load_resolved_klass_at_offset( -+ Register cpool, Register index, Register klass, Register temp) { -+ shadd(temp, index, cpool, temp, LogBytesPerWord); -+ lhu(temp, Address(temp, sizeof(ConstantPool))); // temp = resolved_klass_index -+ ld(klass, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); // klass = cpool->_resolved_klasses -+ shadd(klass, temp, klass, temp, LogBytesPerWord); -+ ld(klass, Address(klass, Array::base_offset_in_bytes())); ++Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, ++ int extra_slot_offset) { ++ // cf. TemplateTable::prepare_invoke(), if (load_receiver). ++ int stackElementSize = Interpreter::stackElementSize; ++ int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); ++#ifdef ASSERT ++ int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); ++ assert(offset1 - offset == stackElementSize, "correct arithmetic"); ++#endif ++ if (arg_slot.is_constant()) { ++ return Address(esp, arg_slot.as_constant() * stackElementSize + offset); ++ } else { ++ assert_different_registers(t0, arg_slot.as_register()); ++ shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize)); ++ return Address(t0, offset); ++ } +} + -+void InterpreterMacroAssembler::load_resolved_method_at_index(int byte_no, -+ Register method, -+ Register cache) { -+ const int method_offset = in_bytes( -+ ConstantPoolCache::base_offset() + -+ ((byte_no == TemplateTable::f2_byte) -+ ? ConstantPoolCacheEntry::f2_offset() -+ : ConstantPoolCacheEntry::f1_offset())); ++#ifndef PRODUCT ++extern "C" void findpc(intptr_t x); ++#endif + -+ ld(method, Address(cache, method_offset)); // get f1 Method* ++void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) ++{ ++ // In order to get locks to work, we need to fake a in_VM state ++ if (ShowMessageBoxOnError) { ++ JavaThread* thread = JavaThread::current(); ++ JavaThreadState saved_state = thread->thread_state(); ++ thread->set_thread_state(_thread_in_vm); ++#ifndef PRODUCT ++ if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { ++ ttyLocker ttyl; ++ BytecodeCounter::print(); ++ } ++#endif ++ if (os::message_box(msg, "Execution stopped, print registers?")) { ++ ttyLocker ttyl; ++ tty->print_cr(" pc = 0x%016lx", pc); ++#ifndef PRODUCT ++ tty->cr(); ++ findpc(pc); ++ tty->cr(); ++#endif ++ tty->print_cr(" x0 = 0x%016lx", regs[0]); ++ tty->print_cr(" x1 = 0x%016lx", regs[1]); ++ tty->print_cr(" x2 = 0x%016lx", regs[2]); ++ tty->print_cr(" x3 = 0x%016lx", regs[3]); ++ tty->print_cr(" x4 = 0x%016lx", regs[4]); ++ tty->print_cr(" x5 = 0x%016lx", regs[5]); ++ tty->print_cr(" x6 = 0x%016lx", regs[6]); ++ tty->print_cr(" x7 = 0x%016lx", regs[7]); ++ tty->print_cr(" x8 = 0x%016lx", regs[8]); ++ tty->print_cr(" x9 = 0x%016lx", regs[9]); ++ tty->print_cr("x10 = 0x%016lx", regs[10]); ++ tty->print_cr("x11 = 0x%016lx", regs[11]); ++ tty->print_cr("x12 = 0x%016lx", regs[12]); ++ tty->print_cr("x13 = 0x%016lx", regs[13]); ++ tty->print_cr("x14 = 0x%016lx", regs[14]); ++ tty->print_cr("x15 = 0x%016lx", regs[15]); ++ tty->print_cr("x16 = 0x%016lx", regs[16]); ++ tty->print_cr("x17 = 0x%016lx", regs[17]); ++ tty->print_cr("x18 = 0x%016lx", regs[18]); ++ tty->print_cr("x19 = 0x%016lx", regs[19]); ++ tty->print_cr("x20 = 0x%016lx", regs[20]); ++ tty->print_cr("x21 = 0x%016lx", regs[21]); ++ tty->print_cr("x22 = 0x%016lx", regs[22]); ++ tty->print_cr("x23 = 0x%016lx", regs[23]); ++ tty->print_cr("x24 = 0x%016lx", regs[24]); ++ tty->print_cr("x25 = 0x%016lx", regs[25]); ++ tty->print_cr("x26 = 0x%016lx", regs[26]); ++ tty->print_cr("x27 = 0x%016lx", regs[27]); ++ tty->print_cr("x28 = 0x%016lx", regs[28]); ++ tty->print_cr("x30 = 0x%016lx", regs[30]); ++ tty->print_cr("x31 = 0x%016lx", regs[31]); ++ BREAKPOINT; ++ } ++ } ++ fatal("DEBUG MESSAGE: %s", msg); +} + -+// Generate a subtype check: branch to ok_is_subtype if sub_klass is a -+// subtype of super_klass. -+// -+// Args: -+// x10: superklass -+// Rsub_klass: subklass -+// -+// Kills: -+// x12, x15 -+void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass, -+ Label& ok_is_subtype) { -+ assert(Rsub_klass != x10, "x10 holds superklass"); -+ assert(Rsub_klass != x12, "x12 holds 2ndary super array length"); -+ assert(Rsub_klass != x15, "x15 holds 2ndary super array scan ptr"); ++void MacroAssembler::resolve_jobject(Register value, Register thread, Register tmp) { ++ Label done, not_weak; ++ beqz(value, done); // Use NULL as-is. + -+ // Profile the not-null value's klass. -+ profile_typecheck(x12, Rsub_klass, x15); // blows x12, reloads x15 ++ // Test for jweak tag. ++ andi(t0, value, JNIHandles::weak_tag_mask); ++ beqz(t0, not_weak); + -+ // Do the check. -+ check_klass_subtype(Rsub_klass, x10, x12, ok_is_subtype); // blows x12 ++ // Resolve jweak. ++ access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value, ++ Address(value, -JNIHandles::weak_tag_value), tmp, thread); ++ verify_oop(value); ++ j(done); + -+ // Profile the failure of the check. -+ profile_typecheck_failed(x12); // blows x12 ++ bind(not_weak); ++ // Resolve (untagged) jobject. ++ access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread); ++ verify_oop(value); ++ bind(done); +} + -+// Java Expression Stack ++void MacroAssembler::stop(const char* msg) { ++ address ip = pc(); ++ pusha(); ++ // The length of the instruction sequence emitted should be independent ++ // of the values of msg and ip so that the size of mach nodes for scratch ++ // emit and normal emit matches. ++ mv(c_rarg0, (address)msg); ++ mv(c_rarg1, (address)ip); ++ mv(c_rarg2, sp); ++ mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64)); ++ jalr(c_rarg3); ++ ebreak(); ++} + -+void InterpreterMacroAssembler::pop_ptr(Register r) { -+ ld(r, Address(esp, 0)); -+ addi(esp, esp, wordSize); ++void MacroAssembler::unimplemented(const char* what) { ++ const char* buf = NULL; ++ { ++ ResourceMark rm; ++ stringStream ss; ++ ss.print("unimplemented: %s", what); ++ buf = code_string(ss.as_string()); ++ } ++ stop(buf); +} + -+void InterpreterMacroAssembler::pop_i(Register r) { -+ lw(r, Address(esp, 0)); // lw do signed extended -+ addi(esp, esp, wordSize); ++void MacroAssembler::emit_static_call_stub() { ++ // CompiledDirectStaticCall::set_to_interpreted knows the ++ // exact layout of this stub. ++ ++ mov_metadata(xmethod, (Metadata*)NULL); ++ ++ // Jump to the entry point of the i2c stub. ++ int32_t offset = 0; ++ movptr_with_offset(t0, 0, offset); ++ jalr(x0, t0, offset); +} + -+void InterpreterMacroAssembler::pop_l(Register r) { -+ ld(r, Address(esp, 0)); -+ addi(esp, esp, 2 * Interpreter::stackElementSize); ++void MacroAssembler::call_VM_leaf_base(address entry_point, ++ int number_of_arguments, ++ Label *retaddr) { ++ int32_t offset = 0; ++ push_reg(RegSet::of(t0, xmethod), sp); // push << t0 & xmethod >> to sp ++ movptr_with_offset(t0, entry_point, offset); ++ jalr(x1, t0, offset); ++ if (retaddr != NULL) { ++ bind(*retaddr); ++ } ++ pop_reg(RegSet::of(t0, xmethod), sp); // pop << t0 & xmethod >> from sp +} + -+void InterpreterMacroAssembler::push_ptr(Register r) { -+ addi(esp, esp, -wordSize); -+ sd(r, Address(esp, 0)); ++void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { ++ call_VM_leaf_base(entry_point, number_of_arguments); +} + -+void InterpreterMacroAssembler::push_i(Register r) { -+ addi(esp, esp, -wordSize); -+ addw(r, r, zr); // signed extended -+ sd(r, Address(esp, 0)); ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { ++ pass_arg0(this, arg_0); ++ call_VM_leaf_base(entry_point, 1); +} + -+void InterpreterMacroAssembler::push_l(Register r) { -+ addi(esp, esp, -2 * wordSize); -+ sd(zr, Address(esp, wordSize)); -+ sd(r, Address(esp)); ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { ++ pass_arg0(this, arg_0); ++ pass_arg1(this, arg_1); ++ call_VM_leaf_base(entry_point, 2); +} + -+void InterpreterMacroAssembler::pop_f(FloatRegister r) { -+ flw(r, esp, 0); -+ addi(esp, esp, wordSize); ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, ++ Register arg_1, Register arg_2) { ++ pass_arg0(this, arg_0); ++ pass_arg1(this, arg_1); ++ pass_arg2(this, arg_2); ++ call_VM_leaf_base(entry_point, 3); +} + -+void InterpreterMacroAssembler::pop_d(FloatRegister r) { -+ fld(r, esp, 0); -+ addi(esp, esp, 2 * Interpreter::stackElementSize); ++void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { ++ pass_arg0(this, arg_0); ++ MacroAssembler::call_VM_leaf_base(entry_point, 1); +} + -+void InterpreterMacroAssembler::push_f(FloatRegister r) { -+ addi(esp, esp, -wordSize); -+ fsw(r, Address(esp, 0)); ++void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { ++ ++ assert(arg_0 != c_rarg1, "smashed arg"); ++ pass_arg1(this, arg_1); ++ pass_arg0(this, arg_0); ++ MacroAssembler::call_VM_leaf_base(entry_point, 2); +} + -+void InterpreterMacroAssembler::push_d(FloatRegister r) { -+ addi(esp, esp, -2 * wordSize); -+ fsd(r, Address(esp, 0)); ++void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { ++ assert(arg_0 != c_rarg2, "smashed arg"); ++ assert(arg_1 != c_rarg2, "smashed arg"); ++ pass_arg2(this, arg_2); ++ assert(arg_0 != c_rarg1, "smashed arg"); ++ pass_arg1(this, arg_1); ++ pass_arg0(this, arg_0); ++ MacroAssembler::call_VM_leaf_base(entry_point, 3); +} + -+void InterpreterMacroAssembler::pop(TosState state) { -+ switch (state) { -+ case atos: -+ pop_ptr(); -+ verify_oop(x10); -+ break; -+ case btos: // fall through -+ case ztos: // fall through -+ case ctos: // fall through -+ case stos: // fall through -+ case itos: -+ pop_i(); -+ break; -+ case ltos: -+ pop_l(); -+ break; -+ case ftos: -+ pop_f(); -+ break; -+ case dtos: -+ pop_d(); -+ break; -+ case vtos: -+ /* nothing to do */ -+ break; -+ default: -+ ShouldNotReachHere(); -+ } ++void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { ++ assert(arg_0 != c_rarg3, "smashed arg"); ++ assert(arg_1 != c_rarg3, "smashed arg"); ++ assert(arg_2 != c_rarg3, "smashed arg"); ++ pass_arg3(this, arg_3); ++ assert(arg_0 != c_rarg2, "smashed arg"); ++ assert(arg_1 != c_rarg2, "smashed arg"); ++ pass_arg2(this, arg_2); ++ assert(arg_0 != c_rarg1, "smashed arg"); ++ pass_arg1(this, arg_1); ++ pass_arg0(this, arg_0); ++ MacroAssembler::call_VM_leaf_base(entry_point, 4); +} + -+void InterpreterMacroAssembler::push(TosState state) { -+ switch (state) { -+ case atos: -+ verify_oop(x10); -+ push_ptr(); -+ break; -+ case btos: // fall through -+ case ztos: // fall through -+ case ctos: // fall through -+ case stos: // fall through -+ case itos: -+ push_i(); -+ break; -+ case ltos: -+ push_l(); -+ break; -+ case ftos: -+ push_f(); -+ break; -+ case dtos: -+ push_d(); -+ break; -+ case vtos: -+ /* nothing to do */ -+ break; -+ default: -+ ShouldNotReachHere(); ++void MacroAssembler::nop() { ++ addi(x0, x0, 0); ++} ++ ++void MacroAssembler::mv(Register Rd, Register Rs) { ++ if (Rd != Rs) { ++ addi(Rd, Rs, 0); + } +} + -+// Helpers for swap and dup -+void InterpreterMacroAssembler::load_ptr(int n, Register val) { -+ ld(val, Address(esp, Interpreter::expr_offset_in_bytes(n))); ++void MacroAssembler::notr(Register Rd, Register Rs) { ++ xori(Rd, Rs, -1); +} + -+void InterpreterMacroAssembler::store_ptr(int n, Register val) { -+ sd(val, Address(esp, Interpreter::expr_offset_in_bytes(n))); ++void MacroAssembler::neg(Register Rd, Register Rs) { ++ sub(Rd, x0, Rs); +} + -+void InterpreterMacroAssembler::load_float(Address src) { -+ flw(f10, src); ++void MacroAssembler::negw(Register Rd, Register Rs) { ++ subw(Rd, x0, Rs); +} + -+void InterpreterMacroAssembler::load_double(Address src) { -+ fld(f10, src); ++void MacroAssembler::sext_w(Register Rd, Register Rs) { ++ addiw(Rd, Rs, 0); +} + -+void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() { -+ // set sender sp -+ mv(x30, sp); -+ // record last_sp -+ sd(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++void MacroAssembler::zext_b(Register Rd, Register Rs) { ++ andi(Rd, Rs, 0xFF); +} + -+// Jump to from_interpreted entry of a call unless single stepping is possible -+// in this thread in which case we must call the i2i entry -+void InterpreterMacroAssembler::jump_from_interpreted(Register method) { -+ prepare_to_jump_from_interpreted(); -+ if (JvmtiExport::can_post_interpreter_events()) { -+ Label run_compiled_code; -+ // JVMTI events, such as single-stepping, are implemented partly by avoiding running -+ // compiled code in threads for which the event is enabled. Check here for -+ // interp_only_mode if these events CAN be enabled. -+ lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset())); -+ beqz(t0, run_compiled_code); -+ ld(t0, Address(method, Method::interpreter_entry_offset())); -+ jr(t0); -+ bind(run_compiled_code); -+ } ++void MacroAssembler::seqz(Register Rd, Register Rs) { ++ sltiu(Rd, Rs, 1); ++} + -+ ld(t0, Address(method, Method::from_interpreted_offset())); -+ jr(t0); ++void MacroAssembler::snez(Register Rd, Register Rs) { ++ sltu(Rd, x0, Rs); +} + -+// The following two routines provide a hook so that an implementation -+// can schedule the dispatch in two parts. amd64 does not do this. -+void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { ++void MacroAssembler::sltz(Register Rd, Register Rs) { ++ slt(Rd, Rs, x0); +} + -+void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { -+ dispatch_next(state, step); ++void MacroAssembler::sgtz(Register Rd, Register Rs) { ++ slt(Rd, x0, Rs); +} + -+void InterpreterMacroAssembler::dispatch_base(TosState state, -+ address* table, -+ bool verifyoop, -+ bool generate_poll, -+ Register Rs) { -+ // Pay attention to the argument Rs, which is acquiesce in t0. -+ if (VerifyActivationFrameSize) { -+ Unimplemented(); -+ } -+ if (verifyoop && state == atos) { -+ verify_oop(x10); ++void MacroAssembler::fmv_s(FloatRegister Rd, FloatRegister Rs) { ++ if (Rd != Rs) { ++ fsgnj_s(Rd, Rs, Rs); + } ++} + -+ Label safepoint; -+ address* const safepoint_table = Interpreter::safept_table(state); -+ bool needs_thread_local_poll = generate_poll && table != safepoint_table; ++void MacroAssembler::fabs_s(FloatRegister Rd, FloatRegister Rs) { ++ fsgnjx_s(Rd, Rs, Rs); ++} + -+ if (needs_thread_local_poll) { -+ NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); -+ ld(t1, Address(xthread, JavaThread::polling_word_offset())); -+ andi(t1, t1, SafepointMechanism::poll_bit()); -+ bnez(t1, safepoint); -+ } -+ if (table == Interpreter::dispatch_table(state)) { -+ li(t1, Interpreter::distance_from_dispatch_table(state)); -+ add(t1, Rs, t1); -+ shadd(t1, t1, xdispatch, t1, 3); -+ } else { -+ mv(t1, (address)table); -+ shadd(t1, Rs, t1, Rs, 3); -+ } -+ ld(t1, Address(t1)); -+ jr(t1); ++void MacroAssembler::fneg_s(FloatRegister Rd, FloatRegister Rs) { ++ fsgnjn_s(Rd, Rs, Rs); ++} + -+ if (needs_thread_local_poll) { -+ bind(safepoint); -+ la(t1, ExternalAddress((address)safepoint_table)); -+ shadd(t1, Rs, t1, Rs, 3); -+ ld(t1, Address(t1)); -+ jr(t1); ++void MacroAssembler::fmv_d(FloatRegister Rd, FloatRegister Rs) { ++ if (Rd != Rs) { ++ fsgnj_d(Rd, Rs, Rs); + } +} + -+void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll, Register Rs) { -+ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll, Rs); ++void MacroAssembler::fabs_d(FloatRegister Rd, FloatRegister Rs) { ++ fsgnjx_d(Rd, Rs, Rs); +} + -+void InterpreterMacroAssembler::dispatch_only_normal(TosState state, Register Rs) { -+ dispatch_base(state, Interpreter::normal_table(state), Rs); ++void MacroAssembler::fneg_d(FloatRegister Rd, FloatRegister Rs) { ++ fsgnjn_d(Rd, Rs, Rs); +} + -+void InterpreterMacroAssembler::dispatch_only_noverify(TosState state, Register Rs) { -+ dispatch_base(state, Interpreter::normal_table(state), false, Rs); ++void MacroAssembler::vmnot_m(VectorRegister vd, VectorRegister vs) { ++ vmnand_mm(vd, vs, vs); +} + -+void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) { -+ // load next bytecode -+ load_unsigned_byte(t0, Address(xbcp, step)); -+ add(xbcp, xbcp, step); -+ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); ++void MacroAssembler::vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm) { ++ vnsrl_wx(vd, vs, x0, vm); +} + -+void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { -+ // load current bytecode -+ lbu(t0, Address(xbcp, 0)); -+ dispatch_base(state, table); ++void MacroAssembler::vfneg_v(VectorRegister vd, VectorRegister vs) { ++ vfsgnjn_vv(vd, vs, vs); +} + -+// remove activation -+// -+// Apply stack watermark barrier. -+// Unlock the receiver if this is a synchronized method. -+// Unlock any Java monitors from syncronized blocks. -+// Remove the activation from the stack. -+// -+// If there are locked Java monitors -+// If throw_monitor_exception -+// throws IllegalMonitorStateException -+// Else if install_monitor_exception -+// installs IllegalMonitorStateException -+// Else -+// no error processing -+void InterpreterMacroAssembler::remove_activation( -+ TosState state, -+ bool throw_monitor_exception, -+ bool install_monitor_exception, -+ bool notify_jvmdi) { -+ // Note: Registers x13 may be in use for the -+ // result check if synchronized method -+ Label unlocked, unlock, no_unlock; -+ -+ // The below poll is for the stack watermark barrier. It allows fixing up frames lazily, -+ // that would normally not be safe to use. Such bad returns into unsafe territory of -+ // the stack, will call InterpreterRuntime::at_unwind. -+ Label slow_path; -+ Label fast_path; -+ safepoint_poll(slow_path, true /* at_return */, false /* acquire */, false /* in_nmethod */); -+ j(fast_path); -+ -+ bind(slow_path); -+ push(state); -+ set_last_Java_frame(esp, fp, (address)pc(), t0); -+ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), xthread); -+ reset_last_Java_frame(true); -+ pop(state); ++void MacroAssembler::la(Register Rd, const address &dest) { ++ int64_t offset = dest - pc(); ++ if (is_offset_in_range(offset, 32)) { ++ auipc(Rd, (int32_t)offset + 0x800); //0x800, Note:the 11th sign bit ++ addi(Rd, Rd, ((int64_t)offset << 52) >> 52); ++ } else { ++ movptr(Rd, dest); ++ } ++} + -+ bind(fast_path); ++void MacroAssembler::la(Register Rd, const Address &adr) { ++ InstructionMark im(this); ++ code_section()->relocate(inst_mark(), adr.rspec()); ++ relocInfo::relocType rtype = adr.rspec().reloc()->type(); + -+ // get the value of _do_not_unlock_if_synchronized into x13 -+ const Address do_not_unlock_if_synchronized(xthread, -+ in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); -+ lbu(x13, do_not_unlock_if_synchronized); -+ sb(zr, do_not_unlock_if_synchronized); // reset the flag ++ switch (adr.getMode()) { ++ case Address::literal: { ++ if (rtype == relocInfo::none) { ++ li(Rd, (intptr_t)(adr.target())); ++ } else { ++ movptr(Rd, adr.target()); ++ } ++ break; ++ } ++ case Address::base_plus_offset: { ++ int32_t offset = 0; ++ baseOffset(Rd, adr, offset); ++ addi(Rd, Rd, offset); ++ break; ++ } ++ default: ++ ShouldNotReachHere(); ++ } ++} + -+ // get method access flags -+ ld(x11, Address(fp, frame::interpreter_frame_method_offset * wordSize)); -+ ld(x12, Address(x11, Method::access_flags_offset())); -+ andi(t0, x12, JVM_ACC_SYNCHRONIZED); -+ beqz(t0, unlocked); ++void MacroAssembler::la(Register Rd, Label &label) { ++ la(Rd, target(label)); ++} + -+ // Don't unlock anything if the _do_not_unlock_if_synchronized flag -+ // is set. -+ bnez(x13, no_unlock); ++#define INSN(NAME) \ ++ void MacroAssembler::NAME##z(Register Rs, const address &dest) { \ ++ NAME(Rs, zr, dest); \ ++ } \ ++ void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) { \ ++ NAME(Rs, zr, l, is_far); \ ++ } \ + -+ // unlock monitor -+ push(state); // save result ++ INSN(beq); ++ INSN(bne); ++ INSN(blt); ++ INSN(ble); ++ INSN(bge); ++ INSN(bgt); + -+ // BasicObjectLock will be first in list, since this is a -+ // synchronized method. However, need to check that the object has -+ // not been unlocked by an explicit monitorexit bytecode. -+ const Address monitor(fp, frame::interpreter_frame_initial_sp_offset * -+ wordSize - (int) sizeof(BasicObjectLock)); -+ // We use c_rarg1 so that if we go slow path it will be the correct -+ // register for unlock_object to pass to VM directly -+ la(c_rarg1, monitor); // address of first monitor ++#undef INSN + -+ ld(x10, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); -+ bnez(x10, unlock); ++// Float compare branch instructions + -+ pop(state); -+ if (throw_monitor_exception) { -+ // Entry already unlocked, need to throw exception -+ call_VM(noreg, CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::throw_illegal_monitor_state_exception)); -+ should_not_reach_here(); -+ } else { -+ // Monitor already unlocked during a stack unroll. If requested, -+ // install an illegal_monitor_state_exception. Continue with -+ // stack unrolling. -+ if (install_monitor_exception) { -+ call_VM(noreg, CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::new_illegal_monitor_state_exception)); -+ } -+ j(unlocked); ++#define INSN(NAME, FLOATCMP, BRANCH) \ ++ void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ ++ FLOATCMP##_s(t0, Rs1, Rs2); \ ++ BRANCH(t0, l, is_far); \ ++ } \ ++ void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ ++ FLOATCMP##_d(t0, Rs1, Rs2); \ ++ BRANCH(t0, l, is_far); \ + } + -+ bind(unlock); -+ unlock_object(c_rarg1); -+ pop(state); ++ INSN(beq, feq, bnez); ++ INSN(bne, feq, beqz); + -+ // Check that for block-structured locking (i.e., that all locked -+ // objects has been unlocked) -+ bind(unlocked); ++#undef INSN + -+ // x10: Might contain return value + -+ // Check that all monitors are unlocked -+ { -+ Label loop, exception, entry, restart; -+ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; -+ const Address monitor_block_top( -+ fp, frame::interpreter_frame_monitor_block_top_offset * wordSize); -+ const Address monitor_block_bot( -+ fp, frame::interpreter_frame_initial_sp_offset * wordSize); ++#define INSN(NAME, FLOATCMP1, FLOATCMP2) \ ++ void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ ++ bool is_far, bool is_unordered) { \ ++ if (is_unordered) { \ ++ /* jump if either source is NaN or condition is expected */ \ ++ FLOATCMP2##_s(t0, Rs2, Rs1); \ ++ beqz(t0, l, is_far); \ ++ } else { \ ++ /* jump if no NaN in source and condition is expected */ \ ++ FLOATCMP1##_s(t0, Rs1, Rs2); \ ++ bnez(t0, l, is_far); \ ++ } \ ++ } \ ++ void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ ++ bool is_far, bool is_unordered) { \ ++ if (is_unordered) { \ ++ /* jump if either source is NaN or condition is expected */ \ ++ FLOATCMP2##_d(t0, Rs2, Rs1); \ ++ beqz(t0, l, is_far); \ ++ } else { \ ++ /* jump if no NaN in source and condition is expected */ \ ++ FLOATCMP1##_d(t0, Rs1, Rs2); \ ++ bnez(t0, l, is_far); \ ++ } \ ++ } + -+ bind(restart); -+ // We use c_rarg1 so that if we go slow path it will be the correct -+ // register for unlock_object to pass to VM directly -+ ld(c_rarg1, monitor_block_top); // points to current entry, starting -+ // with top-most entry -+ la(x9, monitor_block_bot); // points to word before bottom of -+ // monitor block ++ INSN(ble, fle, flt); ++ INSN(blt, flt, fle); + -+ j(entry); ++#undef INSN + -+ // Entry already locked, need to throw exception -+ bind(exception); ++#define INSN(NAME, CMP) \ ++ void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ ++ bool is_far, bool is_unordered) { \ ++ float_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ ++ } \ ++ void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ ++ bool is_far, bool is_unordered) { \ ++ double_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ ++ } + -+ if (throw_monitor_exception) { -+ // Throw exception -+ MacroAssembler::call_VM(noreg, -+ CAST_FROM_FN_PTR(address, InterpreterRuntime:: -+ throw_illegal_monitor_state_exception)); ++ INSN(bgt, blt); ++ INSN(bge, ble); + -+ should_not_reach_here(); -+ } else { -+ // Stack unrolling. Unlock object and install illegal_monitor_exception. -+ // Unlock does not block, so don't have to worry about the frame. -+ // We don't have to preserve c_rarg1 since we are going to throw an exception. ++#undef INSN + -+ push(state); -+ unlock_object(c_rarg1); -+ pop(state); + -+ if (install_monitor_exception) { -+ call_VM(noreg, CAST_FROM_FN_PTR(address, -+ InterpreterRuntime:: -+ new_illegal_monitor_state_exception)); -+ } ++#define INSN(NAME, CSR) \ ++ void MacroAssembler::NAME(Register Rd) { \ ++ csrr(Rd, CSR); \ ++ } + -+ j(restart); -+ } ++ INSN(rdinstret, CSR_INSTERT); ++ INSN(rdcycle, CSR_CYCLE); ++ INSN(rdtime, CSR_TIME); ++ INSN(frcsr, CSR_FCSR); ++ INSN(frrm, CSR_FRM); ++ INSN(frflags, CSR_FFLAGS); + -+ bind(loop); -+ // check if current entry is used -+ add(t0, c_rarg1, BasicObjectLock::obj_offset_in_bytes()); -+ ld(t0, Address(t0, 0)); -+ bnez(t0, exception); ++#undef INSN + -+ add(c_rarg1, c_rarg1, entry_size); // otherwise advance to next entry -+ bind(entry); -+ bne(c_rarg1, x9, loop); // check if bottom reached if not at bottom then check this entry ++void MacroAssembler::csrr(Register Rd, unsigned csr) { ++ csrrs(Rd, csr, x0); ++} ++ ++#define INSN(NAME, OPFUN) \ ++ void MacroAssembler::NAME(unsigned csr, Register Rs) { \ ++ OPFUN(x0, csr, Rs); \ + } + -+ bind(no_unlock); ++ INSN(csrw, csrrw); ++ INSN(csrs, csrrs); ++ INSN(csrc, csrrc); + -+ // jvmti support -+ if (notify_jvmdi) { -+ notify_method_exit(state, NotifyJVMTI); // preserve TOSCA ++#undef INSN + -+ } else { -+ notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA ++#define INSN(NAME, OPFUN) \ ++ void MacroAssembler::NAME(unsigned csr, unsigned imm) { \ ++ OPFUN(x0, csr, imm); \ + } + -+ // remove activation -+ // get sender esp -+ ld(t1, -+ Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); -+ if (StackReservedPages > 0) { -+ // testing if reserved zone needs to be re-enabled -+ Label no_reserved_zone_enabling; ++ INSN(csrwi, csrrwi); ++ INSN(csrsi, csrrsi); ++ INSN(csrci, csrrci); + -+ ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset())); -+ ble(t1, t0, no_reserved_zone_enabling); ++#undef INSN + -+ call_VM_leaf( -+ CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), xthread); -+ call_VM(noreg, CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::throw_delayed_StackOverflowError)); -+ should_not_reach_here(); ++#define INSN(NAME, CSR) \ ++ void MacroAssembler::NAME(Register Rd, Register Rs) { \ ++ csrrw(Rd, CSR, Rs); \ ++ } + -+ bind(no_reserved_zone_enabling); ++ INSN(fscsr, CSR_FCSR); ++ INSN(fsrm, CSR_FRM); ++ INSN(fsflags, CSR_FFLAGS); ++ ++#undef INSN ++ ++#define INSN(NAME) \ ++ void MacroAssembler::NAME(Register Rs) { \ ++ NAME(x0, Rs); \ + } + -+ // restore sender esp -+ mv(esp, t1); ++ INSN(fscsr); ++ INSN(fsrm); ++ INSN(fsflags); + -+ // remove frame anchor -+ leave(); -+ // If we're returning to interpreted code we will shortly be -+ // adjusting SP to allow some space for ESP. If we're returning to -+ // compiled code the saved sender SP was saved in sender_sp, so this -+ // restores it. -+ andi(sp, esp, -16); ++#undef INSN ++ ++void MacroAssembler::fsrmi(Register Rd, unsigned imm) { ++ guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register"); ++ csrrwi(Rd, CSR_FRM, imm); +} + -+// Lock object -+// -+// Args: -+// c_rarg1: BasicObjectLock to be used for locking -+// -+// Kills: -+// x10 -+// c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs) -+// t0, t1 (temp regs) -+void InterpreterMacroAssembler::lock_object(Register lock_reg) -+{ -+ assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1"); -+ if (UseHeavyMonitors) { -+ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), -+ lock_reg); -+ } else { -+ Label done; -+ -+ const Register swap_reg = x10; -+ const Register tmp = c_rarg2; -+ const Register obj_reg = c_rarg3; // Will contain the oop -+ -+ const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); -+ const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); -+ const int mark_offset = lock_offset + -+ BasicLock::displaced_header_offset_in_bytes(); -+ -+ Label slow_case; -+ -+ // Load object pointer into obj_reg c_rarg3 -+ ld(obj_reg, Address(lock_reg, obj_offset)); -+ -+ if (DiagnoseSyncOnValueBasedClasses != 0) { -+ load_klass(tmp, obj_reg); -+ lwu(tmp, Address(tmp, Klass::access_flags_offset())); -+ andi(tmp, tmp, JVM_ACC_IS_VALUE_BASED_CLASS); -+ bnez(tmp, slow_case); -+ } ++void MacroAssembler::fsflagsi(Register Rd, unsigned imm) { ++ csrrwi(Rd, CSR_FFLAGS, imm); ++} + -+ // Load (object->mark() | 1) into swap_reg -+ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); -+ ori(swap_reg, t0, 1); ++#define INSN(NAME) \ ++ void MacroAssembler::NAME(unsigned imm) { \ ++ NAME(x0, imm); \ ++ } + -+ // Save (object->mark() | 1) into BasicLock's displaced header -+ sd(swap_reg, Address(lock_reg, mark_offset)); ++ INSN(fsrmi); ++ INSN(fsflagsi); + -+ assert(lock_offset == 0, -+ "displached header must be first word in BasicObjectLock"); ++#undef INSN + -+ cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL); ++void MacroAssembler::push_reg(Register Rs) ++{ ++ addi(esp, esp, 0 - wordSize); ++ sd(Rs, Address(esp, 0)); ++} + -+ // Test if the oopMark is an obvious stack pointer, i.e., -+ // 1) (mark & 7) == 0, and -+ // 2) sp <= mark < mark + os::pagesize() -+ // -+ // These 3 tests can be done by evaluating the following -+ // expression: ((mark - sp) & (7 - os::vm_page_size())), -+ // assuming both stack pointer and pagesize have their -+ // least significant 3 bits clear. -+ // NOTE: the oopMark is in swap_reg x10 as the result of cmpxchg -+ sub(swap_reg, swap_reg, sp); -+ li(t0, (int64_t)(7 - os::vm_page_size())); -+ andr(swap_reg, swap_reg, t0); ++void MacroAssembler::pop_reg(Register Rd) ++{ ++ ld(Rd, esp, 0); ++ addi(esp, esp, wordSize); ++} + -+ // Save the test result, for recursive case, the result is zero -+ sd(swap_reg, Address(lock_reg, mark_offset)); -+ beqz(swap_reg, done); ++int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) { ++ int count = 0; ++ // Scan bitset to accumulate register pairs ++ for (int reg = 31; reg >= 0; reg--) { ++ if ((1U << 31) & bitset) { ++ regs[count++] = reg; ++ } ++ bitset <<= 1; ++ } ++ return count; ++} + -+ bind(slow_case); ++// Push lots of registers in the bit set supplied. Don't push sp. ++// Return the number of words pushed ++int MacroAssembler::push_reg(unsigned int bitset, Register stack) { ++ DEBUG_ONLY(int words_pushed = 0;) ++ CompressibleRegion cr(this); + -+ // Call the runtime routine for slow case -+ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), -+ lock_reg); ++ unsigned char regs[32]; ++ int count = bitset_to_regs(bitset, regs); ++ // reserve one slot to align for odd count ++ int offset = is_even(count) ? 0 : wordSize; + -+ bind(done); ++ if (count) { ++ addi(stack, stack, - count * wordSize - offset); ++ } ++ for (int i = count - 1; i >= 0; i--) { ++ sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); ++ DEBUG_ONLY(words_pushed ++;) + } -+} -+ + -+// Unlocks an object. Used in monitorexit bytecode and -+// remove_activation. Throws an IllegalMonitorException if object is -+// not locked by current thread. -+// -+// Args: -+// c_rarg1: BasicObjectLock for lock -+// -+// Kills: -+// x10 -+// c_rarg0, c_rarg1, c_rarg2, c_rarg3, ... (param regs) -+// t0, t1 (temp regs) -+void InterpreterMacroAssembler::unlock_object(Register lock_reg) -+{ -+ assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1"); ++ assert(words_pushed == count, "oops, pushed != count"); + -+ if (UseHeavyMonitors) { -+ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); -+ } else { -+ Label done; ++ return count; ++} + -+ const Register swap_reg = x10; -+ const Register header_reg = c_rarg2; // Will contain the old oopMark -+ const Register obj_reg = c_rarg3; // Will contain the oop ++int MacroAssembler::pop_reg(unsigned int bitset, Register stack) { ++ DEBUG_ONLY(int words_popped = 0;) ++ CompressibleRegion cr(this); + -+ save_bcp(); // Save in case of exception ++ unsigned char regs[32]; ++ int count = bitset_to_regs(bitset, regs); ++ // reserve one slot to align for odd count ++ int offset = is_even(count) ? 0 : wordSize; + -+ // Convert from BasicObjectLock structure to object and BasicLock -+ // structure Store the BasicLock address into x10 -+ la(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes())); ++ for (int i = count - 1; i >= 0; i--) { ++ ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); ++ DEBUG_ONLY(words_popped ++;) ++ } + -+ // Load oop into obj_reg(c_rarg3) -+ ld(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); ++ if (count) { ++ addi(stack, stack, count * wordSize + offset); ++ } ++ assert(words_popped == count, "oops, popped != count"); + -+ // Free entry -+ sd(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); ++ return count; ++} + -+ // Load the old header from BasicLock structure -+ ld(header_reg, Address(swap_reg, -+ BasicLock::displaced_header_offset_in_bytes())); ++// Push float registers in the bitset, except sp. ++// Return the number of heapwords pushed. ++int MacroAssembler::push_fp(unsigned int bitset, Register stack) { ++ CompressibleRegion cr(this); ++ int words_pushed = 0; ++ unsigned char regs[32]; ++ int count = bitset_to_regs(bitset, regs); ++ int push_slots = count + (count & 1); + -+ // Test for recursion -+ beqz(header_reg, done); ++ if (count) { ++ addi(stack, stack, -push_slots * wordSize); ++ } + -+ // Atomic swap back the old header -+ cmpxchg_obj_header(swap_reg, header_reg, obj_reg, t0, done, /*fallthrough*/NULL); ++ for (int i = count - 1; i >= 0; i--) { ++ fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize)); ++ words_pushed++; ++ } + -+ // Call the runtime routine for slow case. -+ sd(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj -+ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); ++ assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count); ++ return count; ++} + -+ bind(done); ++int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { ++ CompressibleRegion cr(this); ++ int words_popped = 0; ++ unsigned char regs[32]; ++ int count = bitset_to_regs(bitset, regs); ++ int pop_slots = count + (count & 1); + -+ restore_bcp(); ++ for (int i = count - 1; i >= 0; i--) { ++ fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize)); ++ words_popped++; + } -+} + ++ if (count) { ++ addi(stack, stack, pop_slots * wordSize); ++ } + -+void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, -+ Label& zero_continue) { -+ assert(ProfileInterpreter, "must be profiling interpreter"); -+ ld(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); -+ beqz(mdp, zero_continue); ++ assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count); ++ return count; +} + -+// Set the method data pointer for the current bcp. -+void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { -+ assert(ProfileInterpreter, "must be profiling interpreter"); -+ Label set_mdp; -+ push_reg(0xc00, sp); // save x10, x11 ++void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) { ++ CompressibleRegion cr(this); ++ // Push integer registers x7, x10-x17, x28-x31. ++ push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); + -+ // Test MDO to avoid the call if it is NULL. -+ ld(x10, Address(xmethod, in_bytes(Method::method_data_offset()))); -+ beqz(x10, set_mdp); -+ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), xmethod, xbcp); -+ // x10: mdi -+ // mdo is guaranteed to be non-zero here, we checked for it before the call. -+ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset()))); -+ la(x11, Address(x11, in_bytes(MethodData::data_offset()))); -+ add(x10, x11, x10); -+ sd(x10, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); -+ bind(set_mdp); -+ pop_reg(0xc00, sp); ++ // Push float registers f0-f7, f10-f17, f28-f31. ++ addi(sp, sp, - wordSize * 20); ++ int offset = 0; ++ for (int i = 0; i < 32; i++) { ++ if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { ++ fsd(as_FloatRegister(i), Address(sp, wordSize * (offset ++))); ++ } ++ } +} + -+void InterpreterMacroAssembler::verify_method_data_pointer() { -+ assert(ProfileInterpreter, "must be profiling interpreter"); -+#ifdef ASSERT -+ Label verify_continue; -+ add(sp, sp, -4 * wordSize); -+ sd(x10, Address(sp, 0)); -+ sd(x11, Address(sp, wordSize)); -+ sd(x12, Address(sp, 2 * wordSize)); -+ sd(x13, Address(sp, 3 * wordSize)); -+ test_method_data_pointer(x13, verify_continue); // If mdp is zero, continue -+ get_method(x11); ++void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) { ++ CompressibleRegion cr(this); ++ int offset = 0; ++ for (int i = 0; i < 32; i++) { ++ if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { ++ fld(as_FloatRegister(i), Address(sp, wordSize * (offset ++))); ++ } ++ } ++ addi(sp, sp, wordSize * 20); + -+ // If the mdp is valid, it will point to a DataLayout header which is -+ // consistent with the bcp. The converse is highly probable also. -+ lh(x12, Address(x13, in_bytes(DataLayout::bci_offset()))); -+ ld(t0, Address(x11, Method::const_offset())); -+ add(x12, x12, t0); -+ la(x12, Address(x12, ConstMethod::codes_offset())); -+ beq(x12, xbcp, verify_continue); -+ // x10: method -+ // xbcp: bcp // xbcp == 22 -+ // x13: mdp -+ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), -+ x11, xbcp, x13); -+ bind(verify_continue); -+ ld(x10, Address(sp, 0)); -+ ld(x11, Address(sp, wordSize)); -+ ld(x12, Address(sp, 2 * wordSize)); -+ ld(x13, Address(sp, 3 * wordSize)); -+ add(sp, sp, 4 * wordSize); -+#endif // ASSERT ++ pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); +} + ++// Push all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4). ++void MacroAssembler::pusha() { ++ CompressibleRegion cr(this); ++ push_reg(0xffffffe2, sp); ++} + -+void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, -+ int constant, -+ Register value) { -+ assert(ProfileInterpreter, "must be profiling interpreter"); -+ Address data(mdp_in, constant); -+ sd(value, data); ++// Pop all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4). ++void MacroAssembler::popa() { ++ CompressibleRegion cr(this); ++ pop_reg(0xffffffe2, sp); +} + ++void MacroAssembler::push_CPU_state() { ++ CompressibleRegion cr(this); ++ // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) ++ push_reg(0xffffffe0, sp); + -+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, -+ int constant, -+ bool decrement) { -+ increment_mdp_data_at(mdp_in, noreg, constant, decrement); ++ // float registers ++ addi(sp, sp, - 32 * wordSize); ++ for (int i = 0; i < 32; i++) { ++ fsd(as_FloatRegister(i), Address(sp, i * wordSize)); ++ } +} + -+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, -+ Register reg, -+ int constant, -+ bool decrement) { -+ assert(ProfileInterpreter, "must be profiling interpreter"); -+ // %%% this does 64bit counters at best it is wasting space -+ // at worst it is a rare bug when counters overflow -+ -+ assert_different_registers(t1, t0, mdp_in, reg); ++void MacroAssembler::pop_CPU_state() { ++ CompressibleRegion cr(this); + -+ Address addr1(mdp_in, constant); -+ Address addr2(t1, 0); -+ Address &addr = addr1; -+ if (reg != noreg) { -+ la(t1, addr1); -+ add(t1, t1, reg); -+ addr = addr2; ++ // float registers ++ for (int i = 0; i < 32; i++) { ++ fld(as_FloatRegister(i), Address(sp, i * wordSize)); + } ++ addi(sp, sp, 32 * wordSize); + -+ if (decrement) { -+ ld(t0, addr); -+ addi(t0, t0, -DataLayout::counter_increment); -+ Label L; -+ bltz(t0, L); // skip store if counter underflow -+ sd(t0, addr); -+ bind(L); -+ } else { -+ assert(DataLayout::counter_increment == 1, -+ "flow-free idiom only works with 1"); -+ ld(t0, addr); -+ addi(t0, t0, DataLayout::counter_increment); -+ Label L; -+ blez(t0, L); // skip store if counter overflow -+ sd(t0, addr); -+ bind(L); -+ } ++ // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) ++ pop_reg(0xffffffe0, sp); +} + -+void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, -+ int flag_byte_constant) { -+ assert(ProfileInterpreter, "must be profiling interpreter"); -+ int flags_offset = in_bytes(DataLayout::flags_offset()); -+ // Set the flag -+ lbu(t1, Address(mdp_in, flags_offset)); -+ ori(t1, t1, flag_byte_constant); -+ sb(t1, Address(mdp_in, flags_offset)); ++static int patch_offset_in_jal(address branch, int64_t offset) { ++ assert(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction!\n"); ++ Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1); // offset[20] ==> branch[31] ++ Assembler::patch(branch, 30, 21, (offset >> 1) & 0x3ff); // offset[10:1] ==> branch[30:21] ++ Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1); // offset[11] ==> branch[20] ++ Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff); // offset[19:12] ==> branch[19:12] ++ return NativeInstruction::instruction_size; // only one instruction +} + -+ -+void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, -+ int offset, -+ Register value, -+ Register test_value_out, -+ Label& not_equal_continue) { -+ assert(ProfileInterpreter, "must be profiling interpreter"); -+ if (test_value_out == noreg) { -+ ld(t1, Address(mdp_in, offset)); -+ bne(value, t1, not_equal_continue); -+ } else { -+ // Put the test value into a register, so caller can use it: -+ ld(test_value_out, Address(mdp_in, offset)); -+ bne(value, test_value_out, not_equal_continue); -+ } ++static int patch_offset_in_conditional_branch(address branch, int64_t offset) { ++ assert(is_imm_in_range(offset, 12, 1), "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne insrusction!\n"); ++ Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1); // offset[12] ==> branch[31] ++ Assembler::patch(branch, 30, 25, (offset >> 5) & 0x3f); // offset[10:5] ==> branch[30:25] ++ Assembler::patch(branch, 7, 7, (offset >> 11) & 0x1); // offset[11] ==> branch[7] ++ Assembler::patch(branch, 11, 8, (offset >> 1) & 0xf); // offset[4:1] ==> branch[11:8] ++ return NativeInstruction::instruction_size; // only one instruction +} + ++static int patch_offset_in_pc_relative(address branch, int64_t offset) { ++ const int PC_RELATIVE_INSTRUCTION_NUM = 2; // auipc, addi/jalr/load ++ Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff); // Auipc. offset[31:12] ==> branch[31:12] ++ Assembler::patch(branch + 4, 31, 20, offset & 0xfff); // Addi/Jalr/Load. offset[11:0] ==> branch[31:20] ++ return PC_RELATIVE_INSTRUCTION_NUM * NativeInstruction::instruction_size; ++} + -+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, -+ int offset_of_disp) { -+ assert(ProfileInterpreter, "must be profiling interpreter"); -+ ld(t1, Address(mdp_in, offset_of_disp)); -+ add(mdp_in, mdp_in, t1); -+ sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); ++static int patch_addr_in_movptr(address branch, address target) { ++ const int MOVPTR_INSTRUCTIONS_NUM = 6; // lui + addi + slli + addi + slli + addi/jalr/load ++ int32_t lower = ((intptr_t)target << 35) >> 35; ++ int64_t upper = ((intptr_t)target - lower) >> 29; ++ Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[48:29] + target[28] ==> branch[31:12] ++ Assembler::patch(branch + 4, 31, 20, (lower >> 17) & 0xfff); // Addi. target[28:17] ==> branch[31:20] ++ Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff); // Addi. target[16: 6] ==> branch[31:20] ++ Assembler::patch(branch + 20, 31, 20, lower & 0x3f); // Addi/Jalr/Load. target[ 5: 0] ==> branch[31:20] ++ return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; +} + -+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, -+ Register reg, -+ int offset_of_disp) { -+ assert(ProfileInterpreter, "must be profiling interpreter"); -+ add(t1, mdp_in, reg); -+ ld(t1, Address(t1, offset_of_disp)); -+ add(mdp_in, mdp_in, t1); -+ sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); ++static int patch_imm_in_li64(address branch, address target) { ++ const int LI64_INSTRUCTIONS_NUM = 8; // lui + addi + slli + addi + slli + addi + slli + addi ++ int64_t lower = (intptr_t)target & 0xffffffff; ++ lower = lower - ((lower << 44) >> 44); ++ int64_t tmp_imm = ((uint64_t)((intptr_t)target & 0xffffffff00000000)) + (uint64_t)lower; ++ int32_t upper = (tmp_imm - (int32_t)lower) >> 32; ++ int64_t tmp_upper = upper, tmp_lower = upper; ++ tmp_lower = (tmp_lower << 52) >> 52; ++ tmp_upper -= tmp_lower; ++ tmp_upper >>= 12; ++ // Load upper 32 bits. Upper = target[63:32], but if target[31] = 1 or (target[31:28] == 0x7ff && target[19] == 1), ++ // upper = target[63:32] + 1. ++ Assembler::patch(branch + 0, 31, 12, tmp_upper & 0xfffff); // Lui. ++ Assembler::patch(branch + 4, 31, 20, tmp_lower & 0xfff); // Addi. ++ // Load the rest 32 bits. ++ Assembler::patch(branch + 12, 31, 20, ((int32_t)lower >> 20) & 0xfff); // Addi. ++ Assembler::patch(branch + 20, 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff); // Addi. ++ Assembler::patch(branch + 28, 31, 20, (intptr_t)target & 0xff); // Addi. ++ return LI64_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; +} + ++static int patch_imm_in_li32(address branch, int32_t target) { ++ const int LI32_INSTRUCTIONS_NUM = 2; // lui + addiw ++ int64_t upper = (intptr_t)target; ++ int32_t lower = (((int32_t)target) << 20) >> 20; ++ upper -= lower; ++ upper = (int32_t)upper; ++ Assembler::patch(branch + 0, 31, 12, (upper >> 12) & 0xfffff); // Lui. ++ Assembler::patch(branch + 4, 31, 20, lower & 0xfff); // Addiw. ++ return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; ++} + -+void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, -+ int constant) { -+ assert(ProfileInterpreter, "must be profiling interpreter"); -+ addi(mdp_in, mdp_in, (unsigned)constant); -+ sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); ++static long get_offset_of_jal(address insn_addr) { ++ assert_cond(insn_addr != NULL); ++ long offset = 0; ++ unsigned insn = *(unsigned*)insn_addr; ++ long val = (long)Assembler::sextract(insn, 31, 12); ++ offset |= ((val >> 19) & 0x1) << 20; ++ offset |= (val & 0xff) << 12; ++ offset |= ((val >> 8) & 0x1) << 11; ++ offset |= ((val >> 9) & 0x3ff) << 1; ++ offset = (offset << 43) >> 43; ++ return offset; +} + ++static long get_offset_of_conditional_branch(address insn_addr) { ++ long offset = 0; ++ assert_cond(insn_addr != NULL); ++ unsigned insn = *(unsigned*)insn_addr; ++ offset = (long)Assembler::sextract(insn, 31, 31); ++ offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11); ++ offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5); ++ offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1); ++ offset = (offset << 41) >> 41; ++ return offset; ++} + -+void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { -+ assert(ProfileInterpreter, "must be profiling interpreter"); ++static long get_offset_of_pc_relative(address insn_addr) { ++ long offset = 0; ++ assert_cond(insn_addr != NULL); ++ offset = ((long)(Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12))) << 12; // Auipc. ++ offset += ((long)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)); // Addi/Jalr/Load. ++ offset = (offset << 32) >> 32; ++ return offset; ++} + -+ // save/restore across call_VM -+ addi(sp, sp, -2 * wordSize); -+ sd(zr, Address(sp, 0)); -+ sd(return_bci, Address(sp, wordSize)); -+ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), -+ return_bci); -+ ld(zr, Address(sp, 0)); -+ ld(return_bci, Address(sp, wordSize)); -+ addi(sp, sp, 2 * wordSize); ++static address get_target_of_movptr(address insn_addr) { ++ assert_cond(insn_addr != NULL); ++ intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 29; // Lui. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 17; // Addi. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 6; // Addi. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)); // Addi/Jalr/Load. ++ return (address) target_address; +} + -+void InterpreterMacroAssembler::profile_taken_branch(Register mdp, -+ Register bumped_count) { -+ if (ProfileInterpreter) { -+ Label profile_continue; ++static address get_target_of_li64(address insn_addr) { ++ assert_cond(insn_addr != NULL); ++ intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 44; // Lui. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 32; // Addi. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 20; // Addi. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)) << 8; // Addi. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[7], 31, 20)); // Addi. ++ return (address)target_address; ++} + -+ // If no method data exists, go to profile_continue. -+ // Otherwise, assign to mdp -+ test_method_data_pointer(mdp, profile_continue); ++static address get_target_of_li32(address insn_addr) { ++ assert_cond(insn_addr != NULL); ++ intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 12; // Lui. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)); // Addiw. ++ return (address)target_address; ++} + -+ // We are taking a branch. Increment the taken count. -+ Address data(mdp, in_bytes(JumpData::taken_offset())); -+ ld(bumped_count, data); -+ assert(DataLayout::counter_increment == 1, -+ "flow-free idiom only works with 1"); -+ addi(bumped_count, bumped_count, DataLayout::counter_increment); -+ Label L; -+ // eg: bumped_count=0x7fff ffff ffff ffff + 1 < 0. so we use <= 0; -+ blez(bumped_count, L); // skip store if counter overflow, -+ sd(bumped_count, data); -+ bind(L); -+ // The method data pointer needs to be updated to reflect the new target. -+ update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset())); -+ bind(profile_continue); ++// Patch any kind of instruction; there may be several instructions. ++// Return the total length (in bytes) of the instructions. ++int MacroAssembler::pd_patch_instruction_size(address branch, address target) { ++ assert_cond(branch != NULL); ++ int64_t offset = target - branch; ++ if (NativeInstruction::is_jal_at(branch)) { // jal ++ return patch_offset_in_jal(branch, offset); ++ } else if (NativeInstruction::is_branch_at(branch)) { // beq/bge/bgeu/blt/bltu/bne ++ return patch_offset_in_conditional_branch(branch, offset); ++ } else if (NativeInstruction::is_pc_relative_at(branch)) { // auipc, addi/jalr/load ++ return patch_offset_in_pc_relative(branch, offset); ++ } else if (NativeInstruction::is_movptr_at(branch)) { // movptr ++ return patch_addr_in_movptr(branch, target); ++ } else if (NativeInstruction::is_li64_at(branch)) { // li64 ++ return patch_imm_in_li64(branch, target); ++ } else if (NativeInstruction::is_li32_at(branch)) { // li32 ++ int64_t imm = (intptr_t)target; ++ return patch_imm_in_li32(branch, (int32_t)imm); ++ } else { ++#ifdef ASSERT ++ tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n", ++ *(unsigned*)branch, p2i(branch)); ++ Disassembler::decode(branch - 16, branch + 16); ++#endif ++ ShouldNotReachHere(); ++ return -1; + } +} + -+void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { -+ if (ProfileInterpreter) { -+ Label profile_continue; -+ -+ // If no method data exists, go to profile_continue. -+ test_method_data_pointer(mdp, profile_continue); -+ -+ // We are taking a branch. Increment the not taken count. -+ increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset())); -+ -+ // The method data pointer needs to be updated to correspond to -+ // the next bytecode -+ update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); -+ bind(profile_continue); ++address MacroAssembler::target_addr_for_insn(address insn_addr) { ++ long offset = 0; ++ assert_cond(insn_addr != NULL); ++ if (NativeInstruction::is_jal_at(insn_addr)) { // jal ++ offset = get_offset_of_jal(insn_addr); ++ } else if (NativeInstruction::is_branch_at(insn_addr)) { // beq/bge/bgeu/blt/bltu/bne ++ offset = get_offset_of_conditional_branch(insn_addr); ++ } else if (NativeInstruction::is_pc_relative_at(insn_addr)) { // auipc, addi/jalr/load ++ offset = get_offset_of_pc_relative(insn_addr); ++ } else if (NativeInstruction::is_movptr_at(insn_addr)) { // movptr ++ return get_target_of_movptr(insn_addr); ++ } else if (NativeInstruction::is_li64_at(insn_addr)) { // li64 ++ return get_target_of_li64(insn_addr); ++ } else if (NativeInstruction::is_li32_at(insn_addr)) { // li32 ++ return get_target_of_li32(insn_addr); ++ } else { ++ ShouldNotReachHere(); + } ++ return address(((uintptr_t)insn_addr + offset)); +} + -+void InterpreterMacroAssembler::profile_call(Register mdp) { -+ if (ProfileInterpreter) { -+ Label profile_continue; -+ -+ // If no method data exists, go to profile_continue. -+ test_method_data_pointer(mdp, profile_continue); -+ -+ // We are making a call. Increment the count. -+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); -+ -+ // The method data pointer needs to be updated to reflect the new target. -+ update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); -+ bind(profile_continue); ++int MacroAssembler::patch_oop(address insn_addr, address o) { ++ // OOPs are either narrow (32 bits) or wide (48 bits). We encode ++ // narrow OOPs by setting the upper 16 bits in the first ++ // instruction. ++ if (NativeInstruction::is_li32_at(insn_addr)) { ++ // Move narrow OOP ++ narrowOop n = CompressedOops::encode((oop)o); ++ return patch_imm_in_li32(insn_addr, (int32_t)n); ++ } else if (NativeInstruction::is_movptr_at(insn_addr)) { ++ // Move wide OOP ++ return patch_addr_in_movptr(insn_addr, o); + } ++ ShouldNotReachHere(); ++ return -1; +} + -+void InterpreterMacroAssembler::profile_final_call(Register mdp) { -+ if (ProfileInterpreter) { -+ Label profile_continue; -+ -+ // If no method data exists, go to profile_continue. -+ test_method_data_pointer(mdp, profile_continue); -+ -+ // We are making a call. Increment the count. -+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); -+ -+ // The method data pointer needs to be updated to reflect the new target. -+ update_mdp_by_constant(mdp, -+ in_bytes(VirtualCallData:: -+ virtual_call_data_size())); -+ bind(profile_continue); ++void MacroAssembler::reinit_heapbase() { ++ if (UseCompressedOops) { ++ if (Universe::is_fully_initialized()) { ++ mv(xheapbase, Universe::narrow_ptrs_base()); ++ } else { ++ int32_t offset = 0; ++ la_patchable(xheapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()), offset); ++ ld(xheapbase, Address(xheapbase, offset)); ++ } + } +} + -+ -+void InterpreterMacroAssembler::profile_virtual_call(Register receiver, -+ Register mdp, -+ Register reg2, -+ bool receiver_can_be_null) { -+ if (ProfileInterpreter) { -+ Label profile_continue; -+ -+ // If no method data exists, go to profile_continue. -+ test_method_data_pointer(mdp, profile_continue); -+ -+ Label skip_receiver_profile; -+ if (receiver_can_be_null) { -+ Label not_null; -+ // We are making a call. Increment the count for null receiver. -+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); -+ j(skip_receiver_profile); -+ bind(not_null); -+ } -+ -+ // Record the receiver type. -+ record_klass_in_profile(receiver, mdp, reg2, true); -+ bind(skip_receiver_profile); -+ -+ // The method data pointer needs to be updated to reflect the new target. -+ -+ update_mdp_by_constant(mdp, -+ in_bytes(VirtualCallData:: -+ virtual_call_data_size())); -+ bind(profile_continue); -+ } ++void MacroAssembler::mv(Register Rd, Address dest) { ++ assert(dest.getMode() == Address::literal, "Address mode should be Address::literal"); ++ code_section()->relocate(pc(), dest.rspec()); ++ movptr(Rd, dest.target()); +} + -+// This routine creates a state machine for updating the multi-row -+// type profile at a virtual call site (or other type-sensitive bytecode). -+// The machine visits each row (of receiver/count) until the receiver type -+// is found, or until it runs out of rows. At the same time, it remembers -+// the location of the first empty row. (An empty row records null for its -+// receiver, and can be allocated for a newly-observed receiver type.) -+// Because there are two degrees of freedom in the state, a simple linear -+// search will not work; it must be a decision tree. Hence this helper -+// function is recursive, to generate the required tree structured code. -+// It's the interpreter, so we are trading off code space for speed. -+// See below for example code. -+void InterpreterMacroAssembler::record_klass_in_profile_helper( -+ Register receiver, Register mdp, -+ Register reg2, -+ Label& done, bool is_virtual_call) { -+ if (TypeProfileWidth == 0) { -+ if (is_virtual_call) { -+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); -+ } ++void MacroAssembler::mv(Register Rd, address addr) { ++ // Here in case of use with relocation, use fix length instruction ++ // movptr instead of li ++ movptr(Rd, addr); ++} + ++void MacroAssembler::mv(Register Rd, RegisterOrConstant src) { ++ if (src.is_register()) { ++ mv(Rd, src.as_register()); + } else { -+ int non_profiled_offset = -1; -+ if (is_virtual_call) { -+ non_profiled_offset = in_bytes(CounterData::count_offset()); -+ } -+ -+ record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth, -+ &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset); ++ mv(Rd, src.as_constant()); + } +} + -+void InterpreterMacroAssembler::record_item_in_profile_helper( -+ Register item, Register mdp, Register reg2, int start_row, Label& done, int total_rows, -+ OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, int non_profiled_offset) { -+ int last_row = total_rows - 1; -+ assert(start_row <= last_row, "must be work left to do"); -+ // Test this row for both the item and for null. -+ // Take any of three different outcomes: -+ // 1. found item => increment count and goto done -+ // 2. found null => keep looking for case 1, maybe allocate this cell -+ // 3. found something else => keep looking for cases 1 and 2 -+ // Case 3 is handled by a recursive call. -+ for (int row = start_row; row <= last_row; row++) { -+ Label next_test; -+ bool test_for_null_also = (row == start_row); -+ -+ // See if the item is item[n]. -+ int item_offset = in_bytes(item_offset_fn(row)); -+ test_mdp_data_at(mdp, item_offset, item, -+ (test_for_null_also ? reg2 : noreg), -+ next_test); -+ // (Reg2 now contains the item from the CallData.) -+ -+ // The item is item[n]. Increment count[n]. -+ int count_offset = in_bytes(item_count_offset_fn(row)); -+ increment_mdp_data_at(mdp, count_offset); -+ j(done); -+ bind(next_test); -+ -+ if (test_for_null_also) { -+ Label found_null; -+ // Failed the equality check on item[n]... Test for null. -+ if (start_row == last_row) { -+ // The only thing left to do is handle the null case. -+ if (non_profiled_offset >= 0) { -+ beqz(reg2, found_null); -+ // Item did not match any saved item and there is no empty row for it. -+ // Increment total counter to indicate polymorphic case. -+ increment_mdp_data_at(mdp, non_profiled_offset); -+ j(done); -+ bind(found_null); -+ } else { -+ bnez(reg2, done); -+ } -+ break; -+ } -+ // Since null is rare, make it be the branch-taken case. -+ beqz(reg2, found_null); -+ -+ // Put all the "Case 3" tests here. -+ record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows, -+ item_offset_fn, item_count_offset_fn, non_profiled_offset); -+ -+ // Found a null. Keep searching for a matching item, -+ // but remember that this is an empty (unused) slot. -+ bind(found_null); -+ } -+ } -+ -+ // In the fall-through case, we found no matching item, but we -+ // observed the item[start_row] is NULL. -+ // Fill in the item field and increment the count. -+ int item_offset = in_bytes(item_offset_fn(start_row)); -+ set_mdp_data_at(mdp, item_offset, item); -+ int count_offset = in_bytes(item_count_offset_fn(start_row)); -+ mv(reg2, DataLayout::counter_increment); -+ set_mdp_data_at(mdp, count_offset, reg2); -+ if (start_row > 0) { -+ j(done); -+ } ++void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) { ++ andr(Rd, Rs1, Rs2); ++ // addw: The result is clipped to 32 bits, then the sign bit is extended, ++ // and the result is stored in Rd ++ addw(Rd, Rd, zr); +} + -+// Example state machine code for three profile rows: -+// # main copy of decision tree, rooted at row[1] -+// if (row[0].rec == rec) then [ -+// row[0].incr() -+// goto done -+// ] -+// if (row[0].rec != NULL) then [ -+// # inner copy of decision tree, rooted at row[1] -+// if (row[1].rec == rec) then [ -+// row[1].incr() -+// goto done -+// ] -+// if (row[1].rec != NULL) then [ -+// # degenerate decision tree, rooted at row[2] -+// if (row[2].rec == rec) then [ -+// row[2].incr() -+// goto done -+// ] -+// if (row[2].rec != NULL) then [ -+// count.incr() -+// goto done -+// ] # overflow -+// row[2].init(rec) -+// goto done -+// ] else [ -+// # remember row[1] is empty -+// if (row[2].rec == rec) then [ -+// row[2].incr() -+// goto done -+// ] -+// row[1].init(rec) -+// goto done -+// ] -+// else [ -+// # remember row[0] is empty -+// if (row[1].rec == rec) then [ -+// row[1].incr() -+// goto done -+// ] -+// if (row[2].rec == rec) then [ -+// row[2].incr() -+// goto done -+// ] -+// row[0].init(rec) -+// goto done -+// ] -+// done: -+ -+void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, -+ Register mdp, Register reg2, -+ bool is_virtual_call) { -+ assert(ProfileInterpreter, "must be profiling"); -+ Label done; -+ -+ record_klass_in_profile_helper(receiver, mdp, reg2, done, is_virtual_call); -+ -+ bind(done); ++void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) { ++ orr(Rd, Rs1, Rs2); ++ // addw: The result is clipped to 32 bits, then the sign bit is extended, ++ // and the result is stored in Rd ++ addw(Rd, Rd, zr); +} + -+void InterpreterMacroAssembler::profile_ret(Register return_bci, Register mdp) { -+ if (ProfileInterpreter) { -+ Label profile_continue; -+ -+ // If no method data exists, go to profile_continue. -+ test_method_data_pointer(mdp, profile_continue); -+ -+ // Update the total ret count. -+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); -+ -+ for (uint row = 0; row < RetData::row_limit(); row++) { -+ Label next_test; ++void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) { ++ xorr(Rd, Rs1, Rs2); ++ // addw: The result is clipped to 32 bits, then the sign bit is extended, ++ // and the result is stored in Rd ++ addw(Rd, Rd, zr); ++} + -+ // See if return_bci is equal to bci[n]: -+ test_mdp_data_at(mdp, -+ in_bytes(RetData::bci_offset(row)), -+ return_bci, noreg, -+ next_test); ++// Note: load_unsigned_short used to be called load_unsigned_word. ++int MacroAssembler::load_unsigned_short(Register dst, Address src) { ++ int off = offset(); ++ lhu(dst, src); ++ return off; ++} + -+ // return_bci is equal to bci[n]. Increment the count. -+ increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row))); ++int MacroAssembler::load_unsigned_byte(Register dst, Address src) { ++ int off = offset(); ++ lbu(dst, src); ++ return off; ++} + -+ // The method data pointer needs to be updated to reflect the new target. -+ update_mdp_by_offset(mdp, -+ in_bytes(RetData::bci_displacement_offset(row))); -+ j(profile_continue); -+ bind(next_test); -+ } ++int MacroAssembler::load_signed_short(Register dst, Address src) { ++ int off = offset(); ++ lh(dst, src); ++ return off; ++} + -+ update_mdp_for_ret(return_bci); ++int MacroAssembler::load_signed_byte(Register dst, Address src) { ++ int off = offset(); ++ lb(dst, src); ++ return off; ++} + -+ bind(profile_continue); ++void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { ++ switch (size_in_bytes) { ++ case 8: ld(dst, src); break; ++ case 4: is_signed ? lw(dst, src) : lwu(dst, src); break; ++ case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; ++ case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; ++ default: ShouldNotReachHere(); + } +} + -+void InterpreterMacroAssembler::profile_null_seen(Register mdp) { -+ if (ProfileInterpreter) { -+ Label profile_continue; -+ -+ // If no method data exists, go to profile_continue. -+ test_method_data_pointer(mdp, profile_continue); -+ -+ set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); -+ -+ // The method data pointer needs to be updated. -+ int mdp_delta = in_bytes(BitData::bit_data_size()); -+ if (TypeProfileCasts) { -+ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); -+ } -+ update_mdp_by_constant(mdp, mdp_delta); -+ -+ bind(profile_continue); ++void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { ++ switch (size_in_bytes) { ++ case 8: sd(src, dst); break; ++ case 4: sw(src, dst); break; ++ case 2: sh(src, dst); break; ++ case 1: sb(src, dst); break; ++ default: ShouldNotReachHere(); + } +} + -+void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) { -+ if (ProfileInterpreter && TypeProfileCasts) { -+ Label profile_continue; -+ -+ // If no method data exists, go to profile_continue. -+ test_method_data_pointer(mdp, profile_continue); -+ -+ int count_offset = in_bytes(CounterData::count_offset()); -+ // Back up the address, since we have already bumped the mdp. -+ count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); -+ -+ // *Decrement* the counter. We expect to see zero or small negatives. -+ increment_mdp_data_at(mdp, count_offset, true); -+ -+ bind (profile_continue); ++// reverse bytes in halfword in lower 16 bits and sign-extend ++// Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits) ++void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) { ++ if (UseZbb) { ++ rev8(Rd, Rs); ++ srai(Rd, Rd, 48); ++ return; + } ++ assert_different_registers(Rs, tmp); ++ assert_different_registers(Rd, tmp); ++ srli(tmp, Rs, 8); ++ andi(tmp, tmp, 0xFF); ++ slli(Rd, Rs, 56); ++ srai(Rd, Rd, 48); // sign-extend ++ orr(Rd, Rd, tmp); +} + -+void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) { -+ if (ProfileInterpreter) { -+ Label profile_continue; -+ -+ // If no method data exists, go to profile_continue. -+ test_method_data_pointer(mdp, profile_continue); -+ -+ // The method data pointer needs to be updated. -+ int mdp_delta = in_bytes(BitData::bit_data_size()); -+ if (TypeProfileCasts) { -+ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); -+ -+ // Record the object type. -+ record_klass_in_profile(klass, mdp, reg2, false); -+ } -+ update_mdp_by_constant(mdp, mdp_delta); -+ -+ bind(profile_continue); ++// reverse bytes in lower word and sign-extend ++// Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits) ++void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { ++ if (UseZbb) { ++ rev8(Rd, Rs); ++ srai(Rd, Rd, 32); ++ return; + } ++ assert_different_registers(Rs, tmp1, tmp2); ++ assert_different_registers(Rd, tmp1, tmp2); ++ revb_h_w_u(Rd, Rs, tmp1, tmp2); ++ slli(tmp2, Rd, 48); ++ srai(tmp2, tmp2, 32); // sign-extend ++ srli(Rd, Rd, 16); ++ orr(Rd, Rd, tmp2); +} + -+void InterpreterMacroAssembler::profile_switch_default(Register mdp) { -+ if (ProfileInterpreter) { -+ Label profile_continue; -+ -+ // If no method data exists, go to profile_continue. -+ test_method_data_pointer(mdp, profile_continue); -+ -+ // Update the default case count -+ increment_mdp_data_at(mdp, -+ in_bytes(MultiBranchData::default_count_offset())); -+ -+ // The method data pointer needs to be updated. -+ update_mdp_by_offset(mdp, -+ in_bytes(MultiBranchData:: -+ default_displacement_offset())); ++// reverse bytes in halfword in lower 16 bits and zero-extend ++// Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits) ++void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) { ++ if (UseZbb) { ++ rev8(Rd, Rs); ++ srli(Rd, Rd, 48); ++ return; ++ } ++ assert_different_registers(Rs, tmp); ++ assert_different_registers(Rd, tmp); ++ srli(tmp, Rs, 8); ++ andi(tmp, tmp, 0xFF); ++ andi(Rd, Rs, 0xFF); ++ slli(Rd, Rd, 8); ++ orr(Rd, Rd, tmp); ++} + -+ bind(profile_continue); ++// reverse bytes in halfwords in lower 32 bits and zero-extend ++// Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits) ++void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) { ++ if (UseZbb) { ++ rev8(Rd, Rs); ++ rori(Rd, Rd, 32); ++ roriw(Rd, Rd, 16); ++ zero_extend(Rd, Rd, 32); ++ return; + } ++ assert_different_registers(Rs, tmp1, tmp2); ++ assert_different_registers(Rd, tmp1, tmp2); ++ srli(tmp2, Rs, 16); ++ revb_h_h_u(tmp2, tmp2, tmp1); ++ revb_h_h_u(Rd, Rs, tmp1); ++ slli(tmp2, tmp2, 16); ++ orr(Rd, Rd, tmp2); +} + -+void InterpreterMacroAssembler::profile_switch_case(Register index, -+ Register mdp, -+ Register reg2) { -+ if (ProfileInterpreter) { -+ Label profile_continue; -+ -+ // If no method data exists, go to profile_continue. -+ test_method_data_pointer(mdp, profile_continue); -+ -+ // Build the base (index * per_case_size_in_bytes()) + -+ // case_array_offset_in_bytes() -+ mvw(reg2, in_bytes(MultiBranchData::per_case_size())); -+ mvw(t0, in_bytes(MultiBranchData::case_array_offset())); -+ Assembler::mul(index, index, reg2); -+ Assembler::add(index, index, t0); -+ -+ // Update the case count -+ increment_mdp_data_at(mdp, -+ index, -+ in_bytes(MultiBranchData::relative_count_offset())); -+ -+ // The method data pointer need to be updated. -+ update_mdp_by_offset(mdp, -+ index, -+ in_bytes(MultiBranchData:: -+ relative_displacement_offset())); ++// This method is only used for revb_h ++// Rd = Rs[47:0] Rs[55:48] Rs[63:56] ++void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) { ++ assert_different_registers(Rs, tmp1, tmp2); ++ assert_different_registers(Rd, tmp1); ++ srli(tmp1, Rs, 48); ++ andi(tmp2, tmp1, 0xFF); ++ slli(tmp2, tmp2, 8); ++ srli(tmp1, tmp1, 8); ++ orr(tmp1, tmp1, tmp2); ++ slli(Rd, Rs, 16); ++ orr(Rd, Rd, tmp1); ++} + -+ bind(profile_continue); ++// reverse bytes in each halfword ++// Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] ++void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) { ++ if (UseZbb) { ++ assert_different_registers(Rs, tmp1); ++ assert_different_registers(Rd, tmp1); ++ rev8(Rd, Rs); ++ zero_extend(tmp1, Rd, 32); ++ roriw(tmp1, tmp1, 16); ++ slli(tmp1, tmp1, 32); ++ srli(Rd, Rd, 32); ++ roriw(Rd, Rd, 16); ++ zero_extend(Rd, Rd, 32); ++ orr(Rd, Rd, tmp1); ++ return; ++ } ++ assert_different_registers(Rs, tmp1, tmp2); ++ assert_different_registers(Rd, tmp1, tmp2); ++ revb_h_helper(Rd, Rs, tmp1, tmp2); ++ for (int i = 0; i < 3; ++i) { ++ revb_h_helper(Rd, Rd, tmp1, tmp2); + } +} + -+void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ; } -+ -+void InterpreterMacroAssembler::notify_method_entry() { -+ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to -+ // track stack depth. If it is possible to enter interp_only_mode we add -+ // the code to check if the event should be sent. -+ if (JvmtiExport::can_post_interpreter_events()) { -+ Label L; -+ lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset())); -+ beqz(x13, L); -+ call_VM(noreg, CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::post_method_entry)); -+ bind(L); ++// reverse bytes in each word ++// Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] ++void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { ++ if (UseZbb) { ++ rev8(Rd, Rs); ++ rori(Rd, Rd, 32); ++ return; + } ++ assert_different_registers(Rs, tmp1, tmp2); ++ assert_different_registers(Rd, tmp1, tmp2); ++ revb(Rd, Rs, tmp1, tmp2); ++ ror_imm(Rd, Rd, 32); ++} + -+ { -+ SkipIfEqual skip(this, &DTraceMethodProbes, false); -+ get_method(c_rarg1); -+ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), -+ xthread, c_rarg1); ++// reverse bytes in doubleword ++// Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56] ++void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) { ++ if (UseZbb) { ++ rev8(Rd, Rs); ++ return; + } -+ -+ // RedefineClasses() tracing support for obsolete method entry -+ if (log_is_enabled(Trace, redefine, class, obsolete)) { -+ get_method(c_rarg1); -+ call_VM_leaf( -+ CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), -+ xthread, c_rarg1); ++ assert_different_registers(Rs, tmp1, tmp2); ++ assert_different_registers(Rd, tmp1, tmp2); ++ andi(tmp1, Rs, 0xFF); ++ slli(tmp1, tmp1, 8); ++ for (int step = 8; step < 56; step += 8) { ++ srli(tmp2, Rs, step); ++ andi(tmp2, tmp2, 0xFF); ++ orr(tmp1, tmp1, tmp2); ++ slli(tmp1, tmp1, 8); + } ++ srli(Rd, Rs, 56); ++ andi(Rd, Rd, 0xFF); ++ orr(Rd, tmp1, Rd); +} + ++// rotate right with shift bits ++void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp) ++{ ++ if (UseZbb) { ++ rori(dst, src, shift); ++ return; ++ } + -+void InterpreterMacroAssembler::notify_method_exit( -+ TosState state, NotifyMethodExitMode mode) { -+ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to -+ // track stack depth. If it is possible to enter interp_only_mode we add -+ // the code to check if the event should be sent. -+ if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) { -+ Label L; -+ // Note: frame::interpreter_frame_result has a dependency on how the -+ // method result is saved across the call to post_method_exit. If this -+ // is changed then the interpreter_frame_result implementation will -+ // need to be updated too. ++ assert_different_registers(dst, tmp); ++ assert_different_registers(src, tmp); ++ assert(shift < 64, "shift amount must be < 64"); ++ slli(tmp, src, 64 - shift); ++ srli(dst, src, shift); ++ orr(dst, dst, tmp); ++} + -+ // template interpreter will leave the result on the top of the stack. -+ push(state); -+ lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset())); -+ beqz(x13, L); -+ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); -+ bind(L); -+ pop(state); ++void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) { ++ if (is_imm_in_range(imm, 12, 0)) { ++ and_imm12(Rd, Rn, imm); ++ } else { ++ assert_different_registers(Rn, tmp); ++ li(tmp, imm); ++ andr(Rd, Rn, tmp); + } ++} + -+ { -+ SkipIfEqual skip(this, &DTraceMethodProbes, false); -+ push(state); -+ get_method(c_rarg1); -+ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), -+ xthread, c_rarg1); -+ pop(state); ++void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) { ++ ld(tmp1, adr); ++ if (src.is_register()) { ++ orr(tmp1, tmp1, src.as_register()); ++ } else { ++ if (is_imm_in_range(src.as_constant(), 12, 0)) { ++ ori(tmp1, tmp1, src.as_constant()); ++ } else { ++ assert_different_registers(tmp1, tmp2); ++ li(tmp2, src.as_constant()); ++ orr(tmp1, tmp1, tmp2); ++ } + } ++ sd(tmp1, adr); +} + -+ -+// Jump if ((*counter_addr += increment) & mask) satisfies the condition. -+void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, -+ int increment, Address mask, -+ Register tmp1, Register tmp2, -+ bool preloaded, Label* where) { -+ Label done; -+ if (!preloaded) { -+ lwu(tmp1, counter_addr); ++void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L) { ++ if (UseCompressedClassPointers) { ++ lwu(tmp, Address(oop, oopDesc::klass_offset_in_bytes())); ++ if (Universe::narrow_klass_base() == NULL) { ++ slli(tmp, tmp, Universe::narrow_klass_shift()); ++ beq(trial_klass, tmp, L); ++ return; ++ } ++ decode_klass_not_null(tmp); ++ } else { ++ ld(tmp, Address(oop, oopDesc::klass_offset_in_bytes())); + } -+ add(tmp1, tmp1, increment); -+ sw(tmp1, counter_addr); -+ lwu(tmp2, mask); -+ andr(tmp1, tmp1, tmp2); -+ bnez(tmp1, done); -+ j(*where); // offset is too large so we have to use j instead of beqz here -+ bind(done); ++ beq(trial_klass, tmp, L); +} + -+void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, -+ int number_of_arguments) { -+ // interpreter specific -+ // -+ // Note: No need to save/restore rbcp & rlocals pointer since these -+ // are callee saved registers and no blocking/ GC can happen -+ // in leaf calls. ++// Move an oop into a register. immediate is true if we want ++// immediate instructions, i.e. we are not going to patch this ++// instruction while the code is being executed by another thread. In ++// that case we can use move immediates rather than the constant pool. ++void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) { ++ int oop_index; ++ if (obj == NULL) { ++ oop_index = oop_recorder()->allocate_oop_index(obj); ++ } else { +#ifdef ASSERT -+ { -+ Label L; -+ ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); -+ beqz(t0, L); -+ stop("InterpreterMacroAssembler::call_VM_leaf_base:" -+ " last_sp != NULL"); -+ bind(L); ++ { ++ ThreadInVMfromUnknown tiv; ++ assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop"); ++ } ++#endif ++ oop_index = oop_recorder()->find_index(obj); + } -+#endif /* ASSERT */ -+ // super call -+ MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ if (!immediate) { ++ address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address ++ ld_constant(dst, Address(dummy, rspec)); ++ } else ++ mv(dst, Address((address)obj, rspec)); +} + -+void InterpreterMacroAssembler::call_VM_base(Register oop_result, -+ Register java_thread, -+ Register last_java_sp, -+ address entry_point, -+ int number_of_arguments, -+ bool check_exceptions) { -+ // interpreter specific -+ // -+ // Note: Could avoid restoring locals ptr (callee saved) - however doesn't -+ // really make a difference for these runtime calls, since they are -+ // slow anyway. Btw., bcp must be saved/restored since it may change -+ // due to GC. -+ save_bcp(); -+#ifdef ASSERT -+ { -+ Label L; -+ ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); -+ beqz(t0, L); -+ stop("InterpreterMacroAssembler::call_VM_base:" -+ " last_sp != NULL"); -+ bind(L); ++// Move a metadata address into a register. ++void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { ++ int oop_index; ++ if (obj == NULL) { ++ oop_index = oop_recorder()->allocate_metadata_index(obj); ++ } else { ++ oop_index = oop_recorder()->find_index(obj); + } -+#endif /* ASSERT */ -+ // super call -+ MacroAssembler::call_VM_base(oop_result, noreg, last_java_sp, -+ entry_point, number_of_arguments, -+ check_exceptions); -+// interpreter specific -+ restore_bcp(); -+ restore_locals(); ++ RelocationHolder rspec = metadata_Relocation::spec(oop_index); ++ mv(dst, Address((address)obj, rspec)); +} + -+void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr, Register tmp) { -+ assert_different_registers(obj, tmp, t0, mdo_addr.base()); -+ Label update, next, none; ++// Writes to stack successive pages until offset reached to check for ++// stack overflow + shadow pages. This clobbers tmp. ++void MacroAssembler::bang_stack_size(Register size, Register tmp) { ++ assert_different_registers(tmp, size, t0); ++ // Bang stack for total size given plus shadow page size. ++ // Bang one page at a time because large size can bang beyond yellow and ++ // red zones. ++ mv(t0, os::vm_page_size()); ++ Label loop; ++ bind(loop); ++ sub(tmp, sp, t0); ++ subw(size, size, t0); ++ sd(size, Address(tmp)); ++ bgtz(size, loop); + -+ verify_oop(obj); ++ // Bang down shadow pages too. ++ // At this point, (tmp-0) is the last address touched, so don't ++ // touch it again. (It was touched as (tmp-pagesize) but then tmp ++ // was post-decremented.) Skip this address by starting at i=1, and ++ // touch a few more pages below. N.B. It is important to touch all ++ // the way down to and including i=StackShadowPages. ++ for (int i = 0; i < (int)(JavaThread::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) { ++ // this could be any sized move but this is can be a debugging crumb ++ // so the bigger the better. ++ sub(tmp, tmp, os::vm_page_size()); ++ sd(size, Address(tmp, 0)); ++ } ++} + -+ bnez(obj, update); -+ orptr(mdo_addr, TypeEntries::null_seen, t0, tmp); -+ j(next); ++SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) { ++ assert_cond(masm != NULL); ++ int32_t offset = 0; ++ _masm = masm; ++ _masm->la_patchable(t0, ExternalAddress((address)flag_addr), offset); ++ _masm->lbu(t0, Address(t0, offset)); ++ _masm->beqz(t0, _label); ++} + -+ bind(update); -+ load_klass(obj, obj); ++SkipIfEqual::~SkipIfEqual() { ++ assert_cond(_masm != NULL); ++ _masm->bind(_label); ++ _masm = NULL; ++} + -+ ld(t0, mdo_addr); -+ xorr(obj, obj, t0); -+ andi(t0, obj, TypeEntries::type_klass_mask); -+ beqz(t0, next); // klass seen before, nothing to -+ // do. The unknown bit may have been -+ // set already but no need to check. ++void MacroAssembler::load_mirror(Register dst, Register method, Register tmp) { ++ const int mirror_offset = in_bytes(Klass::java_mirror_offset()); ++ ld(dst, Address(xmethod, Method::const_offset())); ++ ld(dst, Address(dst, ConstMethod::constants_offset())); ++ ld(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes())); ++ ld(dst, Address(dst, mirror_offset)); ++ resolve_oop_handle(dst, tmp); ++} + -+ andi(t0, obj, TypeEntries::type_unknown); -+ bnez(t0, next); -+ // already unknown. Nothing to do anymore. ++void MacroAssembler::resolve_oop_handle(Register result, Register tmp) { ++ // OopHandle::resolve is an indirection. ++ assert_different_registers(result, tmp); ++ access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, noreg); ++} + -+ ld(t0, mdo_addr); -+ beqz(t0, none); -+ li(tmp, (u1)TypeEntries::null_seen); -+ beq(t0, tmp, none); -+ // There is a chance that the checks above (re-reading profiling -+ // data from memory) fail if another thread has just set the -+ // profiling to this obj's klass -+ ld(t0, mdo_addr); -+ xorr(obj, obj, t0); -+ andi(t0, obj, TypeEntries::type_klass_mask); -+ beqz(t0, next); ++void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, ++ Register dst, Address src, ++ Register tmp1, Register thread_tmp) { ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ decorators = AccessInternal::decorator_fixup(decorators); ++ bool as_raw = (decorators & AS_RAW) != 0; ++ if (as_raw) { ++ bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp); ++ } else { ++ bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp); ++ } ++} + -+ // different than before. Cannot keep accurate profile. -+ orptr(mdo_addr, TypeEntries::type_unknown, t0, tmp); -+ j(next); ++void MacroAssembler::null_check(Register reg, int offset) { ++ if (needs_explicit_null_check(offset)) { ++ // provoke OS NULL exception if reg = NULL by ++ // accessing M[reg] w/o changing any registers ++ // NOTE: this is plenty to provoke a segv ++ ld(zr, Address(reg, 0)); ++ } else { ++ // nothing to do, (later) access of M[reg + offset] ++ // will provoke OS NULL exception if reg = NULL ++ } ++} + -+ bind(none); -+ // first time here. Set profile type. -+ sd(obj, mdo_addr); ++void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, ++ Address dst, Register src, ++ Register tmp1, Register thread_tmp) { ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ decorators = AccessInternal::decorator_fixup(decorators); ++ bool as_raw = (decorators & AS_RAW) != 0; ++ if (as_raw) { ++ bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, thread_tmp); ++ } else { ++ bs->store_at(this, decorators, type, dst, src, tmp1, thread_tmp); ++ } ++} + -+ bind(next); ++// Algorithm must match CompressedOops::encode. ++void MacroAssembler::encode_heap_oop(Register d, Register s) { ++ verify_oop(s, "broken oop in encode_heap_oop"); ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ srli(d, s, LogMinObjAlignmentInBytes); ++ } else { ++ mv(d, s); ++ } ++ } else { ++ Label notNull; ++ sub(d, s, xheapbase); ++ bgez(d, notNull); ++ mv(d, zr); ++ bind(notNull); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ srli(d, d, Universe::narrow_oop_shift()); ++ } ++ } +} + -+void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) { -+ if (!ProfileInterpreter) { -+ return; ++void MacroAssembler::load_klass(Register dst, Register src) { ++ if (UseCompressedClassPointers) { ++ lwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); ++ decode_klass_not_null(dst); ++ } else { ++ ld(dst, Address(src, oopDesc::klass_offset_in_bytes())); + } ++} + -+ if (MethodData::profile_arguments() || MethodData::profile_return()) { -+ Label profile_continue; ++void MacroAssembler::store_klass(Register dst, Register src) { ++ // FIXME: Should this be a store release? concurrent gcs assumes ++ // klass length is valid if klass field is not null. ++ if (UseCompressedClassPointers) { ++ encode_klass_not_null(src); ++ sw(src, Address(dst, oopDesc::klass_offset_in_bytes())); ++ } else { ++ sd(src, Address(dst, oopDesc::klass_offset_in_bytes())); ++ } ++} + -+ test_method_data_pointer(mdp, profile_continue); ++void MacroAssembler::store_klass_gap(Register dst, Register src) { ++ if (UseCompressedClassPointers) { ++ // Store to klass gap in destination ++ sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes())); ++ } ++} + -+ int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size()); ++void MacroAssembler::decode_klass_not_null(Register r) { ++ decode_klass_not_null(r, r); ++} + -+ lbu(t0, Address(mdp, in_bytes(DataLayout::tag_offset()) - off_to_start)); -+ if (is_virtual) { -+ li(tmp, (u1)DataLayout::virtual_call_type_data_tag); -+ bne(t0, tmp, profile_continue); ++void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) { ++ assert(UseCompressedClassPointers, "should only be used for compressed headers"); ++ ++ if (Universe::narrow_klass_base() == NULL) { ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ slli(dst, src, LogKlassAlignmentInBytes); + } else { -+ li(tmp, (u1)DataLayout::call_type_data_tag); -+ bne(t0, tmp, profile_continue); ++ mv(dst, src); + } ++ return; ++ } + -+ // calculate slot step -+ static int stack_slot_offset0 = in_bytes(TypeEntriesAtCall::stack_slot_offset(0)); -+ static int slot_step = in_bytes(TypeEntriesAtCall::stack_slot_offset(1)) - stack_slot_offset0; ++ Register xbase = dst; ++ if (dst == src) { ++ xbase = tmp; ++ } + -+ // calculate type step -+ static int argument_type_offset0 = in_bytes(TypeEntriesAtCall::argument_type_offset(0)); -+ static int type_step = in_bytes(TypeEntriesAtCall::argument_type_offset(1)) - argument_type_offset0; ++ assert_different_registers(src, xbase); ++ li(xbase, (uintptr_t)Universe::narrow_klass_base()); + -+ if (MethodData::profile_arguments()) { -+ Label done, loop, loopEnd, profileArgument, profileReturnType; -+ RegSet pushed_registers; -+ pushed_registers += x15; -+ pushed_registers += x16; -+ pushed_registers += x17; -+ Register mdo_addr = x15; -+ Register index = x16; -+ Register off_to_args = x17; -+ push_reg(pushed_registers, sp); ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ assert_different_registers(t0, xbase); ++ shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes); ++ } else { ++ add(dst, xbase, src); ++ } + -+ mv(off_to_args, in_bytes(TypeEntriesAtCall::args_data_offset())); -+ mv(t0, TypeProfileArgsLimit); -+ beqz(t0, loopEnd); ++ if (xbase == xheapbase) { reinit_heapbase(); } ++} + -+ mv(index, zr); // index < TypeProfileArgsLimit -+ bind(loop); -+ bgtz(index, profileReturnType); -+ li(t0, (int)MethodData::profile_return()); -+ beqz(t0, profileArgument); // (index > 0 || MethodData::profile_return()) == false -+ bind(profileReturnType); -+ // If return value type is profiled we may have no argument to profile -+ ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset()))); -+ mv(t1, - TypeStackSlotEntries::per_arg_count()); -+ mul(t1, index, t1); -+ add(tmp, tmp, t1); -+ li(t1, TypeStackSlotEntries::per_arg_count()); -+ add(t0, mdp, off_to_args); -+ blt(tmp, t1, done); -+ -+ bind(profileArgument); -+ -+ ld(tmp, Address(callee, Method::const_offset())); -+ load_unsigned_short(tmp, Address(tmp, ConstMethod::size_of_parameters_offset())); -+ // stack offset o (zero based) from the start of the argument -+ // list, for n arguments translates into offset n - o - 1 from -+ // the end of the argument list -+ li(t0, stack_slot_offset0); -+ li(t1, slot_step); -+ mul(t1, index, t1); -+ add(t0, t0, t1); -+ add(t0, mdp, t0); -+ ld(t0, Address(t0)); -+ sub(tmp, tmp, t0); -+ addi(tmp, tmp, -1); -+ Address arg_addr = argument_address(tmp); -+ ld(tmp, arg_addr); -+ -+ li(t0, argument_type_offset0); -+ li(t1, type_step); -+ mul(t1, index, t1); -+ add(t0, t0, t1); -+ add(mdo_addr, mdp, t0); -+ Address mdo_arg_addr(mdo_addr, 0); -+ profile_obj_type(tmp, mdo_arg_addr, t1); -+ -+ int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); -+ addi(off_to_args, off_to_args, to_add); -+ -+ // increment index by 1 -+ addi(index, index, 1); -+ li(t1, TypeProfileArgsLimit); -+ blt(index, t1, loop); -+ bind(loopEnd); -+ -+ if (MethodData::profile_return()) { -+ ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset()))); -+ addi(tmp, tmp, -TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count()); -+ } -+ -+ add(t0, mdp, off_to_args); -+ bind(done); -+ mv(mdp, t0); ++void MacroAssembler::encode_klass_not_null(Register r) { ++ encode_klass_not_null(r, r); ++} + -+ // unspill the clobbered registers -+ pop_reg(pushed_registers, sp); ++void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) { ++ assert(UseCompressedClassPointers, "should only be used for compressed headers"); + -+ if (MethodData::profile_return()) { -+ // We're right after the type profile for the last -+ // argument. tmp is the number of cells left in the -+ // CallTypeData/VirtualCallTypeData to reach its end. Non null -+ // if there's a return to profile. -+ assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); -+ shadd(mdp, tmp, mdp, tmp, exact_log2(DataLayout::cell_size)); -+ } -+ sd(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); ++ if (Universe::narrow_klass_base() == NULL) { ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ srli(dst, src, LogKlassAlignmentInBytes); + } else { -+ assert(MethodData::profile_return(), "either profile call args or call ret"); -+ update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size())); ++ mv(dst, src); + } ++ return; ++ } + -+ // mdp points right after the end of the -+ // CallTypeData/VirtualCallTypeData, right after the cells for the -+ // return value type if there's one -+ -+ bind(profile_continue); ++ if (((uint64_t)(uintptr_t)Universe::narrow_klass_base() & 0xffffffff) == 0 && ++ Universe::narrow_klass_shift() == 0) { ++ zero_extend(dst, src, 32); ++ return; + } -+} + -+void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) { -+ assert_different_registers(mdp, ret, tmp, xbcp, t0, t1); -+ if (ProfileInterpreter && MethodData::profile_return()) { -+ Label profile_continue, done; ++ Register xbase = dst; ++ if (dst == src) { ++ xbase = tmp; ++ } + -+ test_method_data_pointer(mdp, profile_continue); ++ assert_different_registers(src, xbase); ++ li(xbase, (intptr_t)Universe::narrow_klass_base()); ++ sub(dst, src, xbase); ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ srli(dst, dst, LogKlassAlignmentInBytes); ++ } ++ if (xbase == xheapbase) { ++ reinit_heapbase(); ++ } ++} + -+ if (MethodData::profile_return_jsr292_only()) { -+ assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); ++void MacroAssembler::decode_heap_oop_not_null(Register r) { ++ decode_heap_oop_not_null(r, r); ++} + -+ // If we don't profile all invoke bytecodes we must make sure -+ // it's a bytecode we indeed profile. We can't go back to the -+ // begining of the ProfileData we intend to update to check its -+ // type because we're right after it and we don't known its -+ // length -+ Label do_profile; -+ lbu(t0, Address(xbcp, 0)); -+ li(tmp, (u1)Bytecodes::_invokedynamic); -+ beq(t0, tmp, do_profile); -+ li(tmp, (u1)Bytecodes::_invokehandle); -+ beq(t0, tmp, do_profile); -+ get_method(tmp); -+ lhu(t0, Address(tmp, Method::intrinsic_id_offset_in_bytes())); -+ li(t1, static_cast(vmIntrinsics::_compiledLambdaForm)); -+ bne(t0, t1, profile_continue); -+ bind(do_profile); ++void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { ++ assert(UseCompressedOops, "should only be used for compressed headers"); ++ assert(Universe::heap() != NULL, "java heap should be initialized"); ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ slli(dst, src, LogMinObjAlignmentInBytes); ++ if (Universe::narrow_oop_base() != NULL) { ++ add(dst, xheapbase, dst); + } -+ -+ Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); -+ mv(tmp, ret); -+ profile_obj_type(tmp, mdo_ret_addr, t1); -+ -+ bind(profile_continue); ++ } else { ++ assert(Universe::narrow_oop_base() == NULL, "sanity"); ++ mv(dst, src); + } +} + -+void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3) { -+ assert_different_registers(t0, t1, mdp, tmp1, tmp2, tmp3); -+ if (ProfileInterpreter && MethodData::profile_parameters()) { -+ Label profile_continue, done; -+ -+ test_method_data_pointer(mdp, profile_continue); -+ -+ // Load the offset of the area within the MDO used for -+ // parameters. If it's negative we're not profiling any parameters -+ lwu(tmp1, Address(mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()))); -+ srli(tmp2, tmp1, 31); -+ bnez(tmp2, profile_continue); // i.e. sign bit set -+ -+ // Compute a pointer to the area for parameters from the offset -+ // and move the pointer to the slot for the last -+ // parameters. Collect profiling from last parameter down. -+ // mdo start + parameters offset + array length - 1 -+ add(mdp, mdp, tmp1); -+ ld(tmp1, Address(mdp, ArrayData::array_len_offset())); -+ add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count()); ++void MacroAssembler::decode_heap_oop(Register d, Register s) { ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0 || d != s) { ++ slli(d, s, Universe::narrow_oop_shift()); ++ } ++ } else { ++ Label done; ++ mv(d, s); ++ beqz(s, done); ++ shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes); ++ bind(done); ++ } ++ verify_oop(d, "broken oop in decode_heap_oop"); ++} + -+ Label loop; -+ bind(loop); ++void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1, ++ Register thread_tmp, DecoratorSet decorators) { ++ access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); ++} + -+ int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0)); -+ int type_base = in_bytes(ParametersTypeData::type_offset(0)); -+ int per_arg_scale = exact_log2(DataLayout::cell_size); -+ add(t0, mdp, off_base); -+ add(t1, mdp, type_base); ++void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, ++ Register thread_tmp, DecoratorSet decorators) { ++ access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); ++} + -+ shadd(tmp2, tmp1, t0, tmp2, per_arg_scale); -+ // load offset on the stack from the slot for this parameter -+ ld(tmp2, Address(tmp2, 0)); -+ neg(tmp2, tmp2); ++void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, ++ Register thread_tmp, DecoratorSet decorators) { ++ access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, thread_tmp); ++} + -+ // read the parameter from the local area -+ shadd(tmp2, tmp2, xlocals, tmp2, Interpreter::logStackElementSize); -+ ld(tmp2, Address(tmp2, 0)); ++// Used for storing NULLs. ++void MacroAssembler::store_heap_oop_null(Address dst) { ++ access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg); ++} + -+ // profile the parameter -+ shadd(t1, tmp1, t1, t0, per_arg_scale); -+ Address arg_type(t1, 0); -+ profile_obj_type(tmp2, arg_type, tmp3); ++int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2, ++ bool want_remainder) ++{ ++ // Full implementation of Java idiv and irem. The function ++ // returns the (pc) offset of the div instruction - may be needed ++ // for implicit exceptions. ++ // ++ // input : rs1: dividend ++ // rs2: divisor ++ // ++ // result: either ++ // quotient (= rs1 idiv rs2) ++ // remainder (= rs1 irem rs2) + -+ // go to next parameter -+ add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count()); -+ bgez(tmp1, loop); + -+ bind(profile_continue); ++ int idivl_offset = offset(); ++ if (!want_remainder) { ++ divw(result, rs1, rs2); ++ } else { ++ remw(result, rs1, rs2); // result = rs1 % rs2; + } ++ return idivl_offset; +} + -+void InterpreterMacroAssembler::get_method_counters(Register method, -+ Register mcs, Label& skip) { -+ Label has_counters; -+ ld(mcs, Address(method, Method::method_counters_offset())); -+ bnez(mcs, has_counters); -+ call_VM(noreg, CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::build_method_counters), method); -+ ld(mcs, Address(method, Method::method_counters_offset())); -+ beqz(mcs, skip); // No MethodCounters allocated, OutOfMemory -+ bind(has_counters); -+} ++int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2, ++ bool want_remainder) ++{ ++ // Full implementation of Java ldiv and lrem. The function ++ // returns the (pc) offset of the div instruction - may be needed ++ // for implicit exceptions. ++ // ++ // input : rs1: dividend ++ // rs2: divisor ++ // ++ // result: either ++ // quotient (= rs1 idiv rs2) ++ // remainder (= rs1 irem rs2) + -+#ifdef ASSERT -+void InterpreterMacroAssembler::verify_access_flags(Register access_flags, uint32_t flag_bits, -+ const char* msg, bool stop_by_hit) { -+ Label L; -+ andi(t0, access_flags, flag_bits); -+ if (stop_by_hit) { -+ beqz(t0, L); ++ int idivq_offset = offset(); ++ if (!want_remainder) { ++ div(result, rs1, rs2); + } else { -+ bnez(t0, L); ++ rem(result, rs1, rs2); // result = rs1 % rs2; + } -+ stop(msg); -+ bind(L); ++ return idivq_offset; +} + -+void InterpreterMacroAssembler::verify_frame_setup() { -+ Label L; -+ const Address monitor_block_top(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize); -+ ld(t0, monitor_block_top); -+ beq(esp, t0, L); -+ stop("broken stack frame setup in interpreter"); -+ bind(L); -+} -+#endif -diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp -new file mode 100644 -index 00000000000..4d8cb086f82 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp -@@ -0,0 +1,285 @@ -+/* -+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++// Look up the method for a megamorpic invkkeinterface call. ++// The target method is determined by . ++// The receiver klass is in recv_klass. ++// On success, the result will be in method_result, and execution falls through. ++// On failure, execution transfers to the given label. ++void MacroAssembler::lookup_interface_method(Register recv_klass, ++ Register intf_klass, ++ RegisterOrConstant itable_index, ++ Register method_result, ++ Register scan_tmp, ++ Label& L_no_such_interface, ++ bool return_method) { ++ assert_different_registers(recv_klass, intf_klass, scan_tmp); ++ assert_different_registers(method_result, intf_klass, scan_tmp); ++ assert(recv_klass != method_result || !return_method, ++ "recv_klass can be destroyed when mehtid isn't needed"); ++ assert(itable_index.is_constant() || itable_index.as_register() == method_result, ++ "caller must be same register for non-constant itable index as for method"); + -+#ifndef CPU_RISCV_INTERP_MASM_RISCV_HPP -+#define CPU_RISCV_INTERP_MASM_RISCV_HPP ++ // Compute start of first itableOffsetEntry (which is at the end of the vtable). ++ int vtable_base = in_bytes(Klass::vtable_start_offset()); ++ int itentry_off = itableMethodEntry::method_offset_in_bytes(); ++ int scan_step = itableOffsetEntry::size() * wordSize; ++ int vte_size = vtableEntry::size_in_bytes(); ++ assert(vte_size == wordSize, "else adjust times_vte_scale"); + -+#include "asm/macroAssembler.hpp" -+#include "interpreter/invocationCounter.hpp" -+#include "runtime/frame.hpp" ++ lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset())); + -+// This file specializes the assember with interpreter-specific macros ++ // %%% Could store the aligned, prescaled offset in the klassoop. ++ shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3); ++ add(scan_tmp, scan_tmp, vtable_base); + -+typedef ByteSize (*OffsetFunction)(uint); ++ if (return_method) { ++ // Adjust recv_klass by scaled itable_index, so we can free itable_index. ++ assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); ++ if (itable_index.is_register()) { ++ slli(t0, itable_index.as_register(), 3); ++ } else { ++ li(t0, itable_index.as_constant() << 3); ++ } ++ add(recv_klass, recv_klass, t0); ++ if (itentry_off) { ++ add(recv_klass, recv_klass, itentry_off); ++ } ++ } + -+class InterpreterMacroAssembler: public MacroAssembler { -+ protected: -+ // Interpreter specific version of call_VM_base -+ using MacroAssembler::call_VM_leaf_base; ++ Label search, found_method; + -+ virtual void call_VM_leaf_base(address entry_point, -+ int number_of_arguments); ++ ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes())); ++ beq(intf_klass, method_result, found_method); ++ bind(search); ++ // Check that the previous entry is non-null. A null entry means that ++ // the receiver class doens't implement the interface, and wasn't the ++ // same as when the caller was compiled. ++ beqz(method_result, L_no_such_interface, /* is_far */ true); ++ addi(scan_tmp, scan_tmp, scan_step); ++ ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes())); ++ bne(intf_klass, method_result, search); + -+ virtual void call_VM_base(Register oop_result, -+ Register java_thread, -+ Register last_java_sp, -+ address entry_point, -+ int number_of_arguments, -+ bool check_exceptions); ++ bind(found_method); + -+ // base routine for all dispatches -+ void dispatch_base(TosState state, address* table, bool verifyoop = true, -+ bool generate_poll = false, Register Rs = t0); ++ // Got a hit. ++ if (return_method) { ++ lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset_in_bytes())); ++ add(method_result, recv_klass, scan_tmp); ++ ld(method_result, Address(method_result)); ++ } ++} + -+ public: -+ InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {} -+ virtual ~InterpreterMacroAssembler() {} ++// virtual method calling ++void MacroAssembler::lookup_virtual_method(Register recv_klass, ++ RegisterOrConstant vtable_index, ++ Register method_result) { ++ const int base = in_bytes(Klass::vtable_start_offset()); ++ assert(vtableEntry::size() * wordSize == 8, ++ "adjust the scaling in the code below"); ++ int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes(); + -+ void load_earlyret_value(TosState state); ++ if (vtable_index.is_register()) { ++ shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord); ++ ld(method_result, Address(method_result, vtable_offset_in_bytes)); ++ } else { ++ vtable_offset_in_bytes += vtable_index.as_constant() * wordSize; ++ ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes)); ++ } ++} + -+ void jump_to_entry(address entry); ++void MacroAssembler::membar(uint32_t order_constraint) { ++ address prev = pc() - NativeMembar::instruction_size; ++ address last = code()->last_insn(); + -+ virtual void check_and_handle_popframe(Register java_thread); -+ virtual void check_and_handle_earlyret(Register java_thread); ++ if (last != NULL && nativeInstruction_at(last)->is_membar() && prev == last) { ++ NativeMembar *bar = NativeMembar_at(prev); ++ // We are merging two memory barrier instructions. On RISCV we ++ // can do this simply by ORing them together. ++ bar->set_kind(bar->get_kind() | order_constraint); ++ BLOCK_COMMENT("merged membar"); ++ } else { ++ code()->set_last_insn(pc()); + -+ // Interpreter-specific registers -+ void save_bcp() { -+ sd(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize)); -+ } ++ uint32_t predecessor = 0; ++ uint32_t successor = 0; + -+ void restore_bcp() { -+ ld(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize)); ++ membar_mask_to_pred_succ(order_constraint, predecessor, successor); ++ fence(predecessor, successor); + } ++} + -+ void restore_locals() { -+ ld(xlocals, Address(fp, frame::interpreter_frame_locals_offset * wordSize)); ++// Form an addres from base + offset in Rd. Rd my or may not ++// actually be used: you must use the Address that is returned. It ++// is up to you to ensure that the shift provided mathces the size ++// of your data. ++Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset) { ++ if (is_offset_in_range(byte_offset, 12)) { // 12: imm in range 2^12 ++ return Address(base, byte_offset); + } + -+ void restore_constant_pool_cache() { -+ ld(xcpool, Address(fp, frame::interpreter_frame_cache_offset * wordSize)); -+ } ++ // Do it the hard way ++ mv(Rd, byte_offset); ++ add(Rd, base, Rd); ++ return Address(Rd); ++} + -+ void get_dispatch(); ++void MacroAssembler::check_klass_subtype(Register sub_klass, ++ Register super_klass, ++ Register tmp_reg, ++ Label& L_success) { ++ Label L_failure; ++ check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, NULL); ++ check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, NULL); ++ bind(L_failure); ++} + -+ // Helpers for runtime call arguments/results -+ void get_method(Register reg) { -+ ld(reg, Address(fp, frame::interpreter_frame_method_offset * wordSize)); ++void MacroAssembler::safepoint_poll(Label& slow_path) { ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ ld(t1, Address(xthread, Thread::polling_page_offset())); ++ andi(t0, t1, SafepointMechanism::poll_bit()); ++ bnez(t0, slow_path); ++ } else { ++ int32_t offset = 0; ++ la_patchable(t0, ExternalAddress(SafepointSynchronize::address_of_state()), offset); ++ lwu(t0, Address(t0, offset)); ++ assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code"); ++ bnez(t0, slow_path); + } ++} + -+ void get_const(Register reg) { -+ get_method(reg); -+ ld(reg, Address(reg, in_bytes(Method::const_offset()))); -+ } ++void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, ++ Label &succeed, Label *fail) { ++ // oldv holds comparison value ++ // newv holds value to write in exchange ++ // addr identifies memory word to compare against/update ++ Label retry_load, nope; ++ bind(retry_load); ++ // Load reserved from the memory location ++ lr_d(tmp, addr, Assembler::aqrl); ++ // Fail and exit if it is not what we expect ++ bne(tmp, oldv, nope); ++ // If the store conditional succeeds, tmp will be zero ++ sc_d(tmp, newv, addr, Assembler::rl); ++ beqz(tmp, succeed); ++ // Retry only when the store conditional failed ++ j(retry_load); + -+ void get_constant_pool(Register reg) { -+ get_const(reg); -+ ld(reg, Address(reg, in_bytes(ConstMethod::constants_offset()))); ++ bind(nope); ++ membar(AnyAny); ++ mv(oldv, tmp); ++ if (fail != NULL) { ++ j(*fail); + } ++} + -+ void get_constant_pool_cache(Register reg) { -+ get_constant_pool(reg); -+ ld(reg, Address(reg, ConstantPool::cache_offset_in_bytes())); -+ } ++void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, ++ Label &succeed, Label *fail) { ++ assert(oopDesc::mark_offset_in_bytes() == 0, "assumption"); ++ cmpxchgptr(oldv, newv, obj, tmp, succeed, fail); ++} + -+ void get_cpool_and_tags(Register cpool, Register tags) { -+ get_constant_pool(cpool); -+ ld(tags, Address(cpool, ConstantPool::tags_offset_in_bytes())); ++void MacroAssembler::load_reserved(Register addr, ++ enum operand_size size, ++ Assembler::Aqrl acquire) { ++ switch (size) { ++ case int64: ++ lr_d(t0, addr, acquire); ++ break; ++ case int32: ++ lr_w(t0, addr, acquire); ++ break; ++ case uint32: ++ lr_w(t0, addr, acquire); ++ zero_extend(t0, t0, 32); ++ break; ++ default: ++ ShouldNotReachHere(); + } ++} + -+ void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset); -+ void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2)); -+ void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2)); -+ void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2)); -+ void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2)); -+ void get_method_counters(Register method, Register mcs, Label& skip); ++void MacroAssembler::store_conditional(Register addr, ++ Register new_val, ++ enum operand_size size, ++ Assembler::Aqrl release) { ++ switch (size) { ++ case int64: ++ sc_d(t0, new_val, addr, release); ++ break; ++ case int32: ++ case uint32: ++ sc_w(t0, new_val, addr, release); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} + -+ // Load cpool->resolved_references(index). -+ void load_resolved_reference_at_index(Register result, Register index, Register tmp = x15); + -+ // Load cpool->resolved_klass_at(index). -+ void load_resolved_klass_at_offset(Register cpool, Register index, Register klass, Register temp); ++void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Register tmp1, Register tmp2, Register tmp3) { ++ assert(size == int8 || size == int16, "unsupported operand size"); + -+ void load_resolved_method_at_index(int byte_no, Register method, Register cache); ++ Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3; + -+ void pop_ptr(Register r = x10); -+ void pop_i(Register r = x10); -+ void pop_l(Register r = x10); -+ void pop_f(FloatRegister r = f10); -+ void pop_d(FloatRegister r = f10); -+ void push_ptr(Register r = x10); -+ void push_i(Register r = x10); -+ void push_l(Register r = x10); -+ void push_f(FloatRegister r = f10); -+ void push_d(FloatRegister r = f10); ++ andi(shift, addr, 3); ++ slli(shift, shift, 3); + -+ void pop(TosState state); // transition vtos -> state -+ void push(TosState state); // transition state -> vtos ++ andi(aligned_addr, addr, ~3); + -+ void empty_expression_stack() { -+ ld(esp, Address(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize)); -+ // NULL last_sp until next java call -+ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ if (size == int8) { ++ addi(mask, zr, 0xff); ++ } else { ++ // size == int16 case ++ addi(mask, zr, -1); ++ zero_extend(mask, mask, 16); + } ++ sll(mask, mask, shift); + -+ // Helpers for swap and dup -+ void load_ptr(int n, Register val); -+ void store_ptr(int n, Register val); -+ -+ // Load float value from 'address'. The value is loaded onto the FPU register v0. -+ void load_float(Address src); -+ void load_double(Address src); -+ -+ // Generate a subtype check: branch to ok_is_subtype if sub_klass is -+ // a subtype of super_klass. -+ void gen_subtype_check( Register sub_klass, Label &ok_is_subtype ); -+ -+ // Dispatching -+ void dispatch_prolog(TosState state, int step = 0); -+ void dispatch_epilog(TosState state, int step = 0); -+ // dispatch via t0 -+ void dispatch_only(TosState state, bool generate_poll = false, Register Rs = t0); -+ // dispatch normal table via t0 (assume t0 is loaded already) -+ void dispatch_only_normal(TosState state, Register Rs = t0); -+ void dispatch_only_noverify(TosState state, Register Rs = t0); -+ // load t0 from [xbcp + step] and dispatch via t0 -+ void dispatch_next(TosState state, int step = 0, bool generate_poll = false); -+ // load t0 from [xbcp] and dispatch via t0 and table -+ void dispatch_via (TosState state, address* table); -+ -+ // jump to an invoked target -+ void prepare_to_jump_from_interpreted(); -+ void jump_from_interpreted(Register method); -+ ++ xori(not_mask, mask, -1); + -+ // Returning from interpreted functions -+ // -+ // Removes the current activation (incl. unlocking of monitors) -+ // and sets up the return address. This code is also used for -+ // exception unwindwing. In that case, we do not want to throw -+ // IllegalMonitorStateExceptions, since that might get us into an -+ // infinite rethrow exception loop. -+ // Additionally this code is used for popFrame and earlyReturn. -+ // In popFrame case we want to skip throwing an exception, -+ // installing an exception, and notifying jvmdi. -+ // In earlyReturn case we only want to skip throwing an exception -+ // and installing an exception. -+ void remove_activation(TosState state, -+ bool throw_monitor_exception = true, -+ bool install_monitor_exception = true, -+ bool notify_jvmdi = true); ++ sll(expected, expected, shift); ++ andr(expected, expected, mask); + -+ // FIXME: Give us a valid frame at a null check. -+ virtual void null_check(Register reg, int offset = -1) { -+ MacroAssembler::null_check(reg, offset); -+ } ++ sll(new_val, new_val, shift); ++ andr(new_val, new_val, mask); ++} + -+ // Object locking -+ void lock_object (Register lock_reg); -+ void unlock_object(Register lock_reg); ++// cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps. ++// It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w, ++// which are forced to work with 4-byte aligned address. ++void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, ++ Register result, bool result_as_bool, ++ Register tmp1, Register tmp2, Register tmp3) { ++ Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; ++ assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); ++ cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); + -+ // Interpreter profiling operations -+ void set_method_data_pointer_for_bcp(); -+ void test_method_data_pointer(Register mdp, Label& zero_continue); -+ void verify_method_data_pointer(); ++ Label retry, fail, done; + -+ void set_mdp_data_at(Register mdp_in, int constant, Register value); -+ void increment_mdp_data_at(Address data, bool decrement = false); -+ void increment_mdp_data_at(Register mdp_in, int constant, -+ bool decrement = false); -+ void increment_mdp_data_at(Register mdp_in, Register reg, int constant, -+ bool decrement = false); -+ void increment_mask_and_jump(Address counter_addr, -+ int increment, Address mask, -+ Register tmp1, Register tmp2, -+ bool preloaded, Label* where); ++ bind(retry); ++ lr_w(old, aligned_addr, acquire); ++ andr(tmp, old, mask); ++ bne(tmp, expected, fail); + -+ void set_mdp_flag_at(Register mdp_in, int flag_constant); -+ void test_mdp_data_at(Register mdp_in, int offset, Register value, -+ Register test_value_out, -+ Label& not_equal_continue); ++ andr(tmp, old, not_mask); ++ orr(tmp, tmp, new_val); ++ sc_w(tmp, tmp, aligned_addr, release); ++ bnez(tmp, retry); + -+ void record_klass_in_profile(Register receiver, Register mdp, -+ Register reg2, bool is_virtual_call); -+ void record_klass_in_profile_helper(Register receiver, Register mdp, -+ Register reg2, -+ Label& done, bool is_virtual_call); -+ void record_item_in_profile_helper(Register item, Register mdp, -+ Register reg2, int start_row, Label& done, int total_rows, -+ OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, -+ int non_profiled_offset); ++ if (result_as_bool) { ++ addi(result, zr, 1); ++ j(done); + -+ void update_mdp_by_offset(Register mdp_in, int offset_of_offset); -+ void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp); -+ void update_mdp_by_constant(Register mdp_in, int constant); -+ void update_mdp_for_ret(Register return_bci); ++ bind(fail); ++ mv(result, zr); + -+ // narrow int return value -+ void narrow(Register result); ++ bind(done); ++ } else { ++ andr(tmp, old, mask); + -+ void profile_taken_branch(Register mdp, Register bumped_count); -+ void profile_not_taken_branch(Register mdp); -+ void profile_call(Register mdp); -+ void profile_final_call(Register mdp); -+ void profile_virtual_call(Register receiver, Register mdp, -+ Register t1, -+ bool receiver_can_be_null = false); -+ void profile_ret(Register return_bci, Register mdp); -+ void profile_null_seen(Register mdp); -+ void profile_typecheck(Register mdp, Register klass, Register temp); -+ void profile_typecheck_failed(Register mdp); -+ void profile_switch_default(Register mdp); -+ void profile_switch_case(Register index_in_scratch, Register mdp, -+ Register temp); ++ bind(fail); ++ srl(result, tmp, shift); + -+ void profile_obj_type(Register obj, const Address& mdo_addr, Register tmp); -+ void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual); -+ void profile_return_type(Register mdp, Register ret, Register tmp); -+ void profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3); ++ if (size == int8) { ++ sign_extend(result, result, 8); ++ } else { ++ // size == int16 case ++ sign_extend(result, result, 16); ++ } ++ } ++} + -+ // Debugging -+ // only if +VerifyFPU && (state == ftos || state == dtos) -+ void verify_FPU(int stack_depth, TosState state = ftos); ++// weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement ++// the weak CAS stuff. The major difference is that it just failed when store conditional ++// failed. ++void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, ++ Register result, ++ Register tmp1, Register tmp2, Register tmp3) { ++ Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; ++ assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); ++ cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); + -+ typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; ++ Label succ, fail, done; + -+ // support for jvmti/dtrace -+ void notify_method_entry(); -+ void notify_method_exit(TosState state, NotifyMethodExitMode mode); ++ lr_w(old, aligned_addr, acquire); ++ andr(tmp, old, mask); ++ bne(tmp, expected, fail); + -+ virtual void _call_Unimplemented(address call_site) { -+ save_bcp(); -+ set_last_Java_frame(esp, fp, (address) pc(), t0); -+ MacroAssembler::_call_Unimplemented(call_site); -+ } ++ andr(tmp, old, not_mask); ++ orr(tmp, tmp, new_val); ++ sc_w(tmp, tmp, aligned_addr, release); ++ beqz(tmp, succ); + -+#ifdef ASSERT -+ void verify_access_flags(Register access_flags, uint32_t flag_bits, -+ const char* msg, bool stop_by_hit = true); -+ void verify_frame_setup(); -+#endif -+}; ++ bind(fail); ++ addi(result, zr, 1); ++ j(done); + -+#endif // CPU_RISCV_INTERP_MASM_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp -new file mode 100644 -index 00000000000..d93530d8564 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp -@@ -0,0 +1,295 @@ -+/* -+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ bind(succ); ++ mv(result, zr); + -+#include "precompiled.hpp" -+#include "asm/macroAssembler.inline.hpp" -+#include "interpreter/interp_masm.hpp" -+#include "interpreter/interpreter.hpp" -+#include "interpreter/interpreterRuntime.hpp" -+#include "memory/allocation.inline.hpp" -+#include "memory/universe.hpp" -+#include "oops/method.hpp" -+#include "oops/oop.inline.hpp" -+#include "runtime/handles.inline.hpp" -+#include "runtime/icache.hpp" -+#include "runtime/interfaceSupport.inline.hpp" -+#include "runtime/signature.hpp" ++ bind(done); ++} + -+#define __ _masm-> ++void MacroAssembler::cmpxchg(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, ++ Register result, bool result_as_bool) { ++ assert(size != int8 && size != int16, "unsupported operand size"); + -+// Implementation of SignatureHandlerGenerator -+Register InterpreterRuntime::SignatureHandlerGenerator::from() { return xlocals; } -+Register InterpreterRuntime::SignatureHandlerGenerator::to() { return sp; } -+Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return t0; } ++ Label retry_load, done, ne_done; ++ bind(retry_load); ++ load_reserved(addr, size, acquire); ++ bne(t0, expected, ne_done); ++ store_conditional(addr, new_val, size, release); ++ bnez(t0, retry_load); + -+Register InterpreterRuntime::SignatureHandlerGenerator::next_gpr() { -+ if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) { -+ return g_INTArgReg[++_num_reg_int_args]; ++ // equal, succeed ++ if (result_as_bool) { ++ li(result, 1); ++ } else { ++ mv(result, expected); + } -+ return noreg; -+} ++ j(done); + -+FloatRegister InterpreterRuntime::SignatureHandlerGenerator::next_fpr() { -+ if (_num_reg_fp_args < Argument::n_float_register_parameters_c) { -+ return g_FPArgReg[_num_reg_fp_args++]; ++ // not equal, failed ++ bind(ne_done); ++ if (result_as_bool) { ++ mv(result, zr); + } else { -+ return fnoreg; ++ mv(result, t0); + } -+} + -+int InterpreterRuntime::SignatureHandlerGenerator::next_stack_offset() { -+ int ret = _stack_offset; -+ _stack_offset += wordSize; -+ return ret; ++ bind(done); +} + -+InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator( -+ const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) { -+ _masm = new MacroAssembler(buffer); // allocate on resourse area by default -+ _num_reg_int_args = (method->is_static() ? 1 : 0); -+ _num_reg_fp_args = 0; -+ _stack_offset = 0; -+} ++void MacroAssembler::cmpxchg_weak(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, ++ Register result) { ++ Label fail, done, sc_done; ++ load_reserved(addr, size, acquire); ++ bne(t0, expected, fail); ++ store_conditional(addr, new_val, size, release); ++ beqz(t0, sc_done); + -+void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { -+ const Address src(from(), Interpreter::local_offset_in_bytes(offset())); ++ // fail ++ bind(fail); ++ li(result, 1); ++ j(done); + -+ Register reg = next_gpr(); -+ if (reg != noreg) { -+ __ lw(reg, src); -+ } else { -+ __ lw(x10, src); -+ __ sw(x10, Address(to(), next_stack_offset())); -+ } ++ // sc_done ++ bind(sc_done); ++ mv(result, 0); ++ bind(done); +} + -+void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { -+ const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++#define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE) \ ++void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \ ++ prev = prev->is_valid() ? prev : zr; \ ++ if (incr.is_register()) { \ ++ AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ ++ } else { \ ++ mv(t0, incr.as_constant()); \ ++ AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ ++ } \ ++ return; \ ++} + -+ Register reg = next_gpr(); -+ if (reg != noreg) { -+ __ ld(reg, src); -+ } else { -+ __ ld(x10, src); -+ __ sd(x10, Address(to(), next_stack_offset())); -+ } ++ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed) ++ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed) ++ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl) ++ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl) ++ ++#undef ATOMIC_OP ++ ++#define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE) \ ++void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \ ++ prev = prev->is_valid() ? prev : zr; \ ++ AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ ++ return; \ +} + -+void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { -+ const Address src(from(), Interpreter::local_offset_in_bytes(offset())); ++ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed) ++ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed) ++ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl) ++ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl) + -+ FloatRegister reg = next_fpr(); -+ if (reg != fnoreg) { -+ __ flw(reg, src); -+ } else { -+ // a floating-point argument is passed according to the integer calling -+ // convention if no floating-point argument register available -+ pass_int(); -+ } ++#undef ATOMIC_XCHG ++ ++#define ATOMIC_XCHGU(OP1, OP2) \ ++void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \ ++ atomic_##OP2(prev, newv, addr); \ ++ zero_extend(prev, prev, 32); \ ++ return; \ +} + -+void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { -+ const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ATOMIC_XCHGU(xchgwu, xchgw) ++ATOMIC_XCHGU(xchgalwu, xchgalw) + -+ FloatRegister reg = next_fpr(); -+ if (reg != fnoreg) { -+ __ fld(reg, src); ++#undef ATOMIC_XCHGU ++ ++void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) { ++ assert(ReservedCodeCacheSize < 4*G, "branch out of range"); ++ assert(CodeCache::find_blob(entry.target()) != NULL, ++ "destination of far call not found in code cache"); ++ int32_t offset = 0; ++ if (far_branches()) { ++ // We can use auipc + jalr here because we know that the total size of ++ // the code cache cannot exceed 2Gb. ++ la_patchable(tmp, entry, offset); ++ if (cbuf != NULL) { cbuf->set_insts_mark(); } ++ jalr(x0, tmp, offset); + } else { -+ // a floating-point argument is passed according to the integer calling -+ // convention if no floating-point argument register available -+ pass_long(); ++ if (cbuf != NULL) { cbuf->set_insts_mark(); } ++ j(entry); + } +} + -+void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { -+ Register reg = next_gpr(); -+ if (reg == c_rarg1) { -+ assert(offset() == 0, "argument register 1 can only be (non-null) receiver"); -+ __ addi(c_rarg1, from(), Interpreter::local_offset_in_bytes(offset())); -+ } else if (reg != noreg) { -+ // c_rarg2-c_rarg7 -+ __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset())); -+ __ mv(reg, zr); //_num_reg_int_args:c_rarg -> 1:c_rarg2, 2:c_rarg3... -+ __ ld(temp(), x10); -+ Label L; -+ __ beqz(temp(), L); -+ __ mv(reg, x10); -+ __ bind(L); ++void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) { ++ assert(ReservedCodeCacheSize < 4*G, "branch out of range"); ++ assert(CodeCache::find_blob(entry.target()) != NULL, ++ "destination of far call not found in code cache"); ++ int32_t offset = 0; ++ if (far_branches()) { ++ // We can use auipc + jalr here because we know that the total size of ++ // the code cache cannot exceed 2Gb. ++ la_patchable(tmp, entry, offset); ++ if (cbuf != NULL) { cbuf->set_insts_mark(); } ++ jalr(x1, tmp, offset); // link + } else { -+ //to stack -+ __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset())); -+ __ ld(temp(), x10); -+ Label L; -+ __ bnez(temp(), L); -+ __ mv(x10, zr); -+ __ bind(L); -+ assert(sizeof(jobject) == wordSize, ""); -+ __ sd(x10, Address(to(), next_stack_offset())); ++ if (cbuf != NULL) { cbuf->set_insts_mark(); } ++ jal(entry); // link + } +} + -+void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { -+ // generate code to handle arguments -+ iterate(fingerprint); -+ -+ // return result handler -+ __ la(x10, ExternalAddress(Interpreter::result_handler(method()->result_type()))); -+ __ ret(); ++void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, ++ Register super_klass, ++ Register tmp_reg, ++ Label* L_success, ++ Label* L_failure, ++ Label* L_slow_path, ++ Register super_check_offset) { ++ assert_different_registers(sub_klass, super_klass, tmp_reg); ++ bool must_load_sco = (super_check_offset == noreg); ++ if (must_load_sco) { ++ assert(tmp_reg != noreg, "supply either a temp or a register offset"); ++ } else { ++ assert_different_registers(sub_klass, super_klass, super_check_offset); ++ } + -+ __ flush(); -+} ++ Label L_fallthrough; ++ int label_nulls = 0; ++ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } ++ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } ++ if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } ++ assert(label_nulls <= 1, "at most one NULL in batch"); + ++ int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); ++ int sco_offset = in_bytes(Klass::super_check_offset_offset()); ++ Address super_check_offset_addr(super_klass, sco_offset); + -+// Implementation of SignatureHandlerLibrary ++ // Hacked jmp, which may only be used just before L_fallthrough. ++#define final_jmp(label) \ ++ if (&(label) == &L_fallthrough) { /*do nothing*/ } \ ++ else j(label) /*omit semi*/ + -+void SignatureHandlerLibrary::pd_set_handler(address handler) {} ++ // If the pointers are equal, we are done (e.g., String[] elements). ++ // This self-check enables sharing of secondary supertype arrays among ++ // non-primary types such as array-of-interface. Otherwise, each such ++ // type would need its own customized SSA. ++ // We move this check to the front fo the fast path because many ++ // type checks are in fact trivially successful in this manner, ++ // so we get a nicely predicted branch right at the start of the check. ++ beq(sub_klass, super_klass, *L_success); + ++ // Check the supertype display: ++ if (must_load_sco) { ++ lwu(tmp_reg, super_check_offset_addr); ++ super_check_offset = tmp_reg; ++ } ++ add(t0, sub_klass, super_check_offset); ++ Address super_check_addr(t0); ++ ld(t0, super_check_addr); // load displayed supertype + -+class SlowSignatureHandler -+ : public NativeSignatureIterator { -+ private: -+ address _from; -+ intptr_t* _to; -+ intptr_t* _int_args; -+ intptr_t* _fp_args; -+ intptr_t* _fp_identifiers; -+ unsigned int _num_reg_int_args; -+ unsigned int _num_reg_fp_args; ++ // Ths check has worked decisively for primary supers. ++ // Secondary supers are sought in the super_cache ('super_cache_addr'). ++ // (Secondary supers are interfaces and very deeply nested subtypes.) ++ // This works in the same check above because of a tricky aliasing ++ // between the super_Cache and the primary super dispaly elements. ++ // (The 'super_check_addr' can address either, as the case requires.) ++ // Note that the cache is updated below if it does not help us find ++ // what we need immediately. ++ // So if it was a primary super, we can just fail immediately. ++ // Otherwise, it's the slow path for us (no success at this point). + -+ intptr_t* single_slot_addr() { -+ intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0)); -+ _from -= Interpreter::stackElementSize; -+ return from_addr; ++ beq(super_klass, t0, *L_success); ++ mv(t1, sc_offset); ++ if (L_failure == &L_fallthrough) { ++ beq(super_check_offset, t1, *L_slow_path); ++ } else { ++ bne(super_check_offset, t1, *L_failure, /* is_far */ true); ++ final_jmp(*L_slow_path); + } + -+ intptr_t* double_slot_addr() { -+ intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(1)); -+ _from -= 2 * Interpreter::stackElementSize; -+ return from_addr; -+ } ++ bind(L_fallthrough); + -+ int pass_gpr(intptr_t value) { -+ if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) { -+ *_int_args++ = value; -+ return _num_reg_int_args++; -+ } -+ return -1; -+ } ++#undef final_jmp ++} + -+ int pass_fpr(intptr_t value) { -+ if (_num_reg_fp_args < Argument::n_float_register_parameters_c) { -+ *_fp_args++ = value; -+ return _num_reg_fp_args++; -+ } -+ return -1; -+ } ++// Scans count pointer sized words at [addr] for occurence of value, ++// generic ++void MacroAssembler::repne_scan(Register addr, Register value, Register count, ++ Register tmp) { ++ Label Lloop, Lexit; ++ beqz(count, Lexit); ++ bind(Lloop); ++ ld(tmp, addr); ++ beq(value, tmp, Lexit); ++ add(addr, addr, wordSize); ++ sub(count, count, 1); ++ bnez(count, Lloop); ++ bind(Lexit); ++} + -+ void pass_stack(intptr_t value) { -+ *_to++ = value; ++void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, ++ Register super_klass, ++ Register tmp1_reg, ++ Register tmp2_reg, ++ Label* L_success, ++ Label* L_failure) { ++ assert_different_registers(sub_klass, super_klass, tmp1_reg); ++ if (tmp2_reg != noreg) { ++ assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0); + } ++#define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg) + -+ virtual void pass_int() { -+ jint value = *(jint*)single_slot_addr(); -+ if (pass_gpr(value) < 0) { -+ pass_stack(value); -+ } -+ } ++ Label L_fallthrough; ++ int label_nulls = 0; ++ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } ++ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + -+ virtual void pass_long() { -+ intptr_t value = *double_slot_addr(); -+ if (pass_gpr(value) < 0) { -+ pass_stack(value); -+ } -+ } ++ assert(label_nulls <= 1, "at most one NULL in the batch"); + -+ virtual void pass_object() { -+ intptr_t* addr = single_slot_addr(); -+ intptr_t value = *addr == 0 ? NULL : (intptr_t)addr; -+ if (pass_gpr(value) < 0) { -+ pass_stack(value); -+ } -+ } ++ // A couple of usefule fields in sub_klass: ++ int ss_offset = in_bytes(Klass::secondary_supers_offset()); ++ int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); ++ Address secondary_supers_addr(sub_klass, ss_offset); ++ Address super_cache_addr( sub_klass, sc_offset); + -+ virtual void pass_float() { -+ jint value = *(jint*) single_slot_addr(); -+ // a floating-point argument is passed according to the integer calling -+ // convention if no floating-point argument register available -+ if (pass_fpr(value) < 0 && pass_gpr(value) < 0) { -+ pass_stack(value); -+ } ++ BLOCK_COMMENT("check_klass_subtype_slow_path"); ++ ++ // Do a linear scan of the secondary super-klass chain. ++ // This code is rarely used, so simplicity is a virtue here. ++ // The repne_scan instruction uses fixed registers, which we must spill. ++ // Don't worry too much about pre-existing connecitons with the input regs. ++ ++ assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super) ++ assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter) ++ ++ RegSet pushed_registers; ++ if (!IS_A_TEMP(x12)) { ++ pushed_registers += x12; ++ } ++ if (!IS_A_TEMP(x15)) { ++ pushed_registers += x15; + } + -+ virtual void pass_double() { -+ intptr_t value = *double_slot_addr(); -+ int arg = pass_fpr(value); -+ if (0 <= arg) { -+ *_fp_identifiers |= (1ull << arg); // mark as double -+ } else if (pass_gpr(value) < 0) { // no need to mark if passing by integer registers or stack -+ pass_stack(value); ++ if (super_klass != x10 || UseCompressedOops) { ++ if (!IS_A_TEMP(x10)) { ++ pushed_registers += x10; + } + } + -+ public: -+ SlowSignatureHandler(const methodHandle& method, address from, intptr_t* to) -+ : NativeSignatureIterator(method) -+ { -+ _from = from; -+ _to = to; ++ push_reg(pushed_registers, sp); + -+ _int_args = to - (method->is_static() ? 16 : 17); -+ _fp_args = to - 8; -+ _fp_identifiers = to - 9; -+ *(int*) _fp_identifiers = 0; -+ _num_reg_int_args = (method->is_static() ? 1 : 0); -+ _num_reg_fp_args = 0; -+ } ++ // Get super_klass value into x10 (even if it was in x15 or x12) ++ mv(x10, super_klass); + -+ ~SlowSignatureHandler() -+ { -+ _from = NULL; -+ _to = NULL; -+ _int_args = NULL; -+ _fp_args = NULL; -+ _fp_identifiers = NULL; -+ } -+}; ++#ifndef PRODUCT ++ mv(t1, (address)&SharedRuntime::_partial_subtype_ctr); ++ Address pst_counter_addr(t1); ++ ld(t0, pst_counter_addr); ++ add(t0, t0, 1); ++ sd(t0, pst_counter_addr); ++#endif // PRODUCT + ++ // We will consult the secondary-super array. ++ ld(x15, secondary_supers_addr); ++ // Load the array length. ++ lwu(x12, Address(x15, Array::length_offset_in_bytes())); ++ // Skip to start of data. ++ add(x15, x15, Array::base_offset_in_bytes()); + -+JRT_ENTRY(address, -+ InterpreterRuntime::slow_signature_handler(JavaThread* current, -+ Method* method, -+ intptr_t* from, -+ intptr_t* to)) -+ methodHandle m(current, (Method*)method); -+ assert(m->is_native(), "sanity check"); ++ // Set t0 to an obvious invalid value, falling through by default ++ li(t0, -1); ++ // Scan X12 words at [X15] for an occurrence of X10. ++ repne_scan(x15, x10, x12, t0); + -+ // handle arguments -+ SlowSignatureHandler ssh(m, (address)from, to); -+ ssh.iterate(UCONST64(-1)); ++ // pop will restore x10, so we should use a temp register to keep its value ++ mv(t1, x10); + -+ // return result handler -+ return Interpreter::result_handler(m->result_type()); -+JRT_END -diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp -new file mode 100644 -index 00000000000..05df63ba2ae ---- /dev/null -+++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp -@@ -0,0 +1,68 @@ -+/* -+ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ // Unspill the temp registers: ++ pop_reg(pushed_registers, sp); + -+#ifndef CPU_RISCV_INTERPRETERRT_RISCV_HPP -+#define CPU_RISCV_INTERPRETERRT_RISCV_HPP ++ bne(t1, t0, *L_failure); + -+// This is included in the middle of class Interpreter. -+// Do not include files here. ++ // Success. Cache the super we found an proceed in triumph. ++ sd(super_klass, super_cache_addr); + -+// native method calls ++ if (L_success != &L_fallthrough) { ++ j(*L_success); ++ } + -+class SignatureHandlerGenerator: public NativeSignatureIterator { -+ private: -+ MacroAssembler* _masm; -+ unsigned int _num_reg_fp_args; -+ unsigned int _num_reg_int_args; -+ int _stack_offset; ++#undef IS_A_TEMP + -+ void pass_int(); -+ void pass_long(); -+ void pass_float(); -+ void pass_double(); -+ void pass_object(); ++ bind(L_fallthrough); ++} + -+ Register next_gpr(); -+ FloatRegister next_fpr(); -+ int next_stack_offset(); ++// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes. ++void MacroAssembler::tlab_allocate(Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register tmp1, ++ Register tmp2, ++ Label& slow_case, ++ bool is_far) { ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far); ++} + -+ public: -+ // Creation -+ SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer); -+ virtual ~SignatureHandlerGenerator() { -+ _masm = NULL; -+ } ++// Defines obj, preserves var_size_in_bytes ++void MacroAssembler::eden_allocate(Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register tmp, ++ Label& slow_case, ++ bool is_far) { ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp, slow_case, is_far); ++} + -+ // Code generation -+ void generate(uint64_t fingerprint); + -+ // Code generation support -+ static Register from(); -+ static Register to(); -+ static Register temp(); -+}; ++// get_thread() can be called anywhere inside generated code so we ++// need to save whatever non-callee save context might get clobbered ++// by the call to Thread::current() or, indeed, the call setup code. ++void MacroAssembler::get_thread(Register thread) { ++ // save all call-clobbered regs except thread ++ RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) + ++ RegSet::range(x28, x31) + ra - thread; ++ push_reg(saved_regs, sp); + -+#endif // CPU_RISCV_INTERPRETERRT_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp -new file mode 100644 -index 00000000000..9a6084afa1d ---- /dev/null -+++ b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp -@@ -0,0 +1,86 @@ -+/* -+ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ int32_t offset = 0; ++ movptr_with_offset(ra, CAST_FROM_FN_PTR(address, Thread::current), offset); ++ jalr(ra, ra, offset); ++ if (thread != x10) { ++ mv(thread, x10); ++ } + -+#ifndef CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP -+#define CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP ++ // restore pushed registers ++ pop_reg(saved_regs, sp); ++} + -+private: ++void MacroAssembler::load_byte_map_base(Register reg) { ++ jbyte *byte_map_base = ++ ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); ++ li(reg, (uint64_t)byte_map_base); ++} + -+ // FP value associated with _last_Java_sp: -+ intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to ++void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) { ++ relocInfo::relocType rtype = dest.rspec().reloc()->type(); ++ unsigned long low_address = (uintptr_t)CodeCache::low_bound(); ++ unsigned long high_address = (uintptr_t)CodeCache::high_bound(); ++ unsigned long dest_address = (uintptr_t)dest.target(); ++ long offset_low = dest_address - low_address; ++ long offset_high = dest_address - high_address; + -+public: -+ // Each arch must define reset, save, restore -+ // These are used by objects that only care about: -+ // 1 - initializing a new state (thread creation, javaCalls) -+ // 2 - saving a current state (javaCalls) -+ // 3 - restoring an old state (javaCalls) ++ assert(is_valid_riscv64_address(dest.target()), "bad address"); ++ assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address"); + -+ void clear(void) { -+ // clearing _last_Java_sp must be first -+ _last_Java_sp = NULL; -+ OrderAccess::release(); -+ _last_Java_fp = NULL; -+ _last_Java_pc = NULL; ++ InstructionMark im(this); ++ code_section()->relocate(inst_mark(), dest.rspec()); ++ // RISC-V doesn't compute a page-aligned address, in order to partially ++ // compensate for the use of *signed* offsets in its base+disp12 ++ // addressing mode (RISC-V's PC-relative reach remains asymmetric ++ // [-(2G + 2K), 2G - 2k). ++ if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) { ++ int64_t distance = dest.target() - pc(); ++ auipc(reg1, (int32_t)distance + 0x800); ++ offset = ((int32_t)distance << 20) >> 20; ++ } else { ++ movptr_with_offset(reg1, dest.target(), offset); + } ++} + -+ void copy(JavaFrameAnchor* src) { -+ // In order to make sure the transition state is valid for "this" -+ // We must clear _last_Java_sp before copying the rest of the new data -+ // -+ // Hack Alert: Temporary bugfix for 4717480/4721647 -+ // To act like previous version (pd_cache_state) don't NULL _last_Java_sp -+ // unless the value is changing -+ // -+ assert(src != NULL, "Src should not be NULL."); -+ if (_last_Java_sp != src->_last_Java_sp) { -+ _last_Java_sp = NULL; -+ OrderAccess::release(); -+ } -+ _last_Java_fp = src->_last_Java_fp; -+ _last_Java_pc = src->_last_Java_pc; -+ // Must be last so profiler will always see valid frame if has_last_frame() is true -+ _last_Java_sp = src->_last_Java_sp; -+ } ++void MacroAssembler::build_frame(int framesize) { ++ assert(framesize >= 2, "framesize must include space for FP/RA"); ++ assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); ++ sub(sp, sp, framesize); ++ sd(fp, Address(sp, framesize - 2 * wordSize)); ++ sd(ra, Address(sp, framesize - wordSize)); ++ if (PreserveFramePointer) { add(fp, sp, framesize); } ++} + -+ bool walkable(void) { return _last_Java_sp != NULL && _last_Java_pc != NULL; } -+ void make_walkable(JavaThread* thread); -+ void capture_last_Java_pc(void); ++void MacroAssembler::remove_frame(int framesize) { ++ assert(framesize >= 2, "framesize must include space for FP/RA"); ++ assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); ++ ld(fp, Address(sp, framesize - 2 * wordSize)); ++ ld(ra, Address(sp, framesize - wordSize)); ++ add(sp, sp, framesize); ++} + -+ intptr_t* last_Java_sp(void) const { return _last_Java_sp; } ++void MacroAssembler::reserved_stack_check() { ++ // testing if reserved zone needs to be enabled ++ Label no_reserved_zone_enabling; + -+ const address last_Java_pc(void) { return _last_Java_pc; } ++ ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset())); ++ bltu(sp, t0, no_reserved_zone_enabling); + -+private: ++ enter(); // RA and FP are live. ++ mv(c_rarg0, xthread); ++ int32_t offset = 0; ++ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)), offset); ++ jalr(x1, t0, offset); ++ leave(); + -+ static ByteSize last_Java_fp_offset() { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); } ++ // We have already removed our own frame. ++ // throw_delayed_StackOverflowError will think that it's been ++ // called by our caller. ++ offset = 0; ++ la_patchable(t0, RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()), offset); ++ jalr(x0, t0, offset); ++ should_not_reach_here(); + -+public: ++ bind(no_reserved_zone_enabling); ++} + -+ void set_last_Java_sp(intptr_t* java_sp) { _last_Java_sp = java_sp; OrderAccess::release(); } ++void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) { ++ Label retry_load; ++ bind(retry_load); ++ // flush and load exclusive from the memory location ++ lr_w(tmp, counter_addr); ++ addw(tmp, tmp, 1); ++ // if we store+flush with no intervening write tmp wil be zero ++ sc_w(tmp, tmp, counter_addr); ++ bnez(tmp, retry_load); ++} + -+ intptr_t* last_Java_fp(void) { return _last_Java_fp; } ++void MacroAssembler::load_prototype_header(Register dst, Register src) { ++ load_klass(dst, src); ++ ld(dst, Address(dst, Klass::prototype_header_offset())); ++} + -+#endif // CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp -new file mode 100644 -index 00000000000..814ed23e471 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp -@@ -0,0 +1,214 @@ -+/* -+ * Copyright (c) 2004, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+#include "precompiled.hpp" -+#include "asm/macroAssembler.hpp" -+#include "gc/shared/barrierSet.hpp" -+#include "gc/shared/barrierSetAssembler.hpp" -+#include "memory/resourceArea.hpp" -+#include "prims/jniFastGetField.hpp" -+#include "prims/jvm_misc.hpp" -+#include "prims/jvmtiExport.hpp" -+#include "runtime/safepoint.hpp" -+ -+#define __ masm-> -+ -+#define BUFFER_SIZE 30*wordSize -+ -+// Instead of issuing a LoadLoad barrier we create an address -+// dependency between loads; this might be more efficient. ++int MacroAssembler::biased_locking_enter(Register lock_reg, ++ Register obj_reg, ++ Register swap_reg, ++ Register tmp_reg, ++ bool swap_reg_contains_mark, ++ Label& done, ++ Label* slow_case, ++ BiasedLockingCounters* counters, ++ Register flag) { ++ assert(UseBiasedLocking, "why call this otherwise?"); ++ assert_different_registers(lock_reg, obj_reg, swap_reg); + -+// Common register usage: -+// x10/f10: result -+// c_rarg0: jni env -+// c_rarg1: obj -+// c_rarg2: jfield id ++ if (PrintBiasedLockingStatistics && counters == NULL) ++ counters = BiasedLocking::counters(); + -+static const Register robj = x13; -+static const Register rcounter = x14; -+static const Register roffset = x15; -+static const Register rcounter_addr = x16; -+static const Register result = x17; ++ assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, t0); ++ assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); ++ Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); + -+address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { -+ const char *name; -+ switch (type) { -+ case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; -+ case T_BYTE: name = "jni_fast_GetByteField"; break; -+ case T_CHAR: name = "jni_fast_GetCharField"; break; -+ case T_SHORT: name = "jni_fast_GetShortField"; break; -+ case T_INT: name = "jni_fast_GetIntField"; break; -+ case T_LONG: name = "jni_fast_GetLongField"; break; -+ case T_FLOAT: name = "jni_fast_GetFloatField"; break; -+ case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; -+ default: ShouldNotReachHere(); -+ name = NULL; // unreachable ++ // Biased locking ++ // See whether the lock is currently biased toward our thread and ++ // whether the epoch is still valid ++ // Note that the runtime guarantees sufficient alignment of JavaThread ++ // pointers to allow age to be placed into low bits ++ // First check to see whether biasing is even enabled for this object ++ Label cas_label; ++ int null_check_offset = -1; ++ if (!swap_reg_contains_mark) { ++ null_check_offset = offset(); ++ ld(swap_reg, mark_addr); + } -+ ResourceMark rm; -+ BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); -+ CodeBuffer cbuf(blob); -+ MacroAssembler* masm = new MacroAssembler(&cbuf); -+ address fast_entry = __ pc(); -+ -+ Label slow; -+ int32_t offset = 0; -+ __ la_patchable(rcounter_addr, SafepointSynchronize::safepoint_counter_addr(), offset); -+ __ addi(rcounter_addr, rcounter_addr, offset); -+ -+ Address safepoint_counter_addr(rcounter_addr, 0); -+ __ lwu(rcounter, safepoint_counter_addr); -+ // An even value means there are no ongoing safepoint operations -+ __ andi(t0, rcounter, 1); -+ __ bnez(t0, slow); -+ -+ if (JvmtiExport::can_post_field_access()) { -+ // Using barrier to order wrt. JVMTI check and load of result. -+ __ membar(MacroAssembler::LoadLoad); -+ -+ // Check to see if a field access watch has been set before we -+ // take the fast path. -+ int32_t offset2; -+ __ la_patchable(result, -+ ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), -+ offset2); -+ __ lwu(result, Address(result, offset2)); -+ __ bnez(result, slow); -+ -+ __ mv(robj, c_rarg1); ++ andi(tmp_reg, swap_reg, markOopDesc::biased_lock_mask_in_place); ++ li(t0, markOopDesc::biased_lock_pattern); ++ bne(t0, tmp_reg, cas_label); ++ // The bias pattern is present in the object's header. Need to check ++ // whether the bias owner and the epoch are both still current. ++ load_prototype_header(tmp_reg, obj_reg); ++ orr(tmp_reg, tmp_reg, xthread); ++ xorr(tmp_reg, swap_reg, tmp_reg); ++ andi(tmp_reg, tmp_reg, ~((int) markOopDesc::age_mask_in_place)); ++ if (flag->is_valid()) { ++ mv(flag, tmp_reg); ++ } ++ if (counters != NULL) { ++ Label around; ++ bnez(tmp_reg, around); ++ atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, t0); ++ j(done); ++ bind(around); + } else { -+ // Using address dependency to order wrt. load of result. -+ __ xorr(robj, c_rarg1, rcounter); -+ __ xorr(robj, robj, rcounter); // obj, since -+ // robj ^ rcounter ^ rcounter == robj -+ // robj is address dependent on rcounter. ++ beqz(tmp_reg, done); + } + -+ // Both robj and t0 are clobbered by try_resolve_jobject_in_native. -+ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ assert_cond(bs != NULL); -+ bs->try_resolve_jobject_in_native(masm, c_rarg0, robj, t0, slow); ++ Label try_revoke_bias; ++ Label try_rebias; + -+ __ srli(roffset, c_rarg2, 2); // offset ++ // At this point we know that the header has the bias pattern and ++ // that we are not the bias owner in the current epoch. We need to ++ // figure out more details about the state of the header in order to ++ // know what operations can be legally performed on the object's ++ // header. + -+ assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); -+ speculative_load_pclist[count] = __ pc(); // Used by the segfault handler -+ __ add(roffset, robj, roffset); ++ // If the low three bits in the xor result aren't clear, that means ++ // the prototype header is no longer biased and we have to revoke ++ // the bias on this object. ++ andi(t0, tmp_reg, markOopDesc::biased_lock_mask_in_place); ++ bnez(t0, try_revoke_bias); + -+ switch (type) { -+ case T_BOOLEAN: __ lbu(result, Address(roffset, 0)); break; -+ case T_BYTE: __ lb(result, Address(roffset, 0)); break; -+ case T_CHAR: __ lhu(result, Address(roffset, 0)); break; -+ case T_SHORT: __ lh(result, Address(roffset, 0)); break; -+ case T_INT: __ lw(result, Address(roffset, 0)); break; -+ case T_LONG: __ ld(result, Address(roffset, 0)); break; -+ case T_FLOAT: { -+ __ flw(f28, Address(roffset, 0)); // f28 as temporaries -+ __ fmv_x_w(result, f28); // f{31--0}-->x -+ break; ++ // Biasing is still enabled for this data type. See whether the ++ // epoch of the current bias is still valid, meaning that the epoch ++ // bits of the mark word are equal to the epoch bits of the ++ // prototype header. (Note that the prototype header's epoch bits ++ // only change at a safepoint.) If not, attempt to rebias the object ++ // toward the current thread. Note that we must be absolutely sure ++ // that the current epoch is invalid in order to do this because ++ // otherwise the manipulations it performs on the mark word are ++ // illegal. ++ andi(t0, tmp_reg, markOopDesc::epoch_mask_in_place); ++ bnez(t0, try_rebias); ++ ++ // The epoch of the current bias is still valid but we know nothing ++ // about the owner; it might be set or it might be clear. Try to ++ // acquire the bias of the object using an atomic operation. If this ++ // fails we will go in to the runtime to revoke the object's bias. ++ // Note that we first construct the presumed unbiased header so we ++ // don't accidentally blow away another thread's valid bias. ++ { ++ Label cas_success; ++ Label counter; ++ mv(t0, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); ++ andr(swap_reg, swap_reg, t0); ++ orr(tmp_reg, swap_reg, xthread); ++ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case); ++ // cas failed here if slow_cass == NULL ++ if (flag->is_valid()) { ++ mv(flag, 1); ++ j(counter); + } -+ case T_DOUBLE: { -+ __ fld(f28, Address(roffset, 0)); // f28 as temporaries -+ __ fmv_x_d(result, f28); // d{63--0}-->x -+ break; ++ // If the biasing toward our thread failed, this means that ++ // another thread succeeded in biasing it toward itself and we ++ // need to revoke that bias. The revocation will occur in the ++ // interpreter runtime in the slow case. ++ bind(cas_success); ++ if (flag->is_valid()) { ++ mv(flag, 0); ++ bind(counter); ++ } ++ if (counters != NULL) { ++ atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()), ++ tmp_reg, t0); + } -+ default: ShouldNotReachHere(); + } ++ j(done); + -+ // Using acquire: Order JVMTI check and load of result wrt. succeeding check -+ // (LoadStore for volatile field). -+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); -+ -+ __ lw(t0, safepoint_counter_addr); -+ __ bne(rcounter, t0, slow); -+ -+ switch (type) { -+ case T_FLOAT: __ fmv_w_x(f10, result); break; -+ case T_DOUBLE: __ fmv_d_x(f10, result); break; -+ default: __ mv(x10, result); break; -+ } -+ __ ret(); ++ bind(try_rebias); ++ // At this point we know the epoch has expired, meaning that the ++ // current "bias owner", if any, is actually invalid. Under these ++ // circumstances _only_, we are allowed to use the current header's ++ // value as the comparison value when doing the cas to acquire the ++ // bias in the current epoch. In other words, we allow transfer of ++ // the bias from one thread to another directly in this situation. ++ // ++ // FIXME: due to a lack of registers we currently blow away the age ++ // bits in this situation. Should attempt to preserve them. ++ { ++ Label cas_success; ++ Label counter; ++ load_prototype_header(tmp_reg, obj_reg); ++ orr(tmp_reg, xthread, tmp_reg); ++ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case); ++ // cas failed here if slow_cass == NULL ++ if (flag->is_valid()) { ++ mv(flag, 1); ++ j(counter); ++ } + -+ slowcase_entry_pclist[count++] = __ pc(); -+ __ bind(slow); -+ address slow_case_addr; -+ switch (type) { -+ case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; -+ case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; -+ case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; -+ case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; -+ case T_INT: slow_case_addr = jni_GetIntField_addr(); break; -+ case T_LONG: slow_case_addr = jni_GetLongField_addr(); break; -+ case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; -+ case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; -+ default: ShouldNotReachHere(); -+ slow_case_addr = NULL; // unreachable ++ // If the biasing toward our thread failed, then another thread ++ // succeeded in biasing it toward itself and we need to revoke that ++ // bias. The revocation will occur in the runtime in the slow case. ++ bind(cas_success); ++ if (flag->is_valid()) { ++ mv(flag, 0); ++ bind(counter); ++ } ++ if (counters != NULL) { ++ atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()), ++ tmp_reg, t0); ++ } + } ++ j(done); + ++ bind(try_revoke_bias); ++ // The prototype mark in the klass doesn't have the bias bit set any ++ // more, indicating that objects of this data type are not supposed ++ // to be biased any more. We are going to try to reset the mark of ++ // this object to the prototype value and fall through to the ++ // CAS-based locking scheme. Note that if our CAS fails, it means ++ // that another thread raced us for the privilege of revoking the ++ // bias of this particular object, so it's okay to continue in the ++ // normal locking code. ++ // ++ // FIXME: due to a lack of registers we currently blow away the age ++ // bits in this situation. Should attempt to preserve them. + { -+ __ enter(); -+ int32_t tmp_offset = 0; -+ __ la_patchable(t0, ExternalAddress(slow_case_addr), tmp_offset); -+ __ jalr(x1, t0, tmp_offset); -+ __ leave(); -+ __ ret(); -+ } -+ __ flush(); -+ -+ return fast_entry; -+} ++ Label cas_success, nope; ++ load_prototype_header(tmp_reg, obj_reg); ++ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, &nope); ++ bind(cas_success); + ++ // Fall through to the normal CAS-based lock, because no matter what ++ // the result of the above CAS, some thread must have succeeded in ++ // removing the bias bit from the object's header. ++ if (counters != NULL) { ++ atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg, ++ t0); ++ } ++ bind(nope); ++ } + -+address JNI_FastGetField::generate_fast_get_boolean_field() { -+ return generate_fast_get_int_field0(T_BOOLEAN); -+} ++ bind(cas_label); + -+address JNI_FastGetField::generate_fast_get_byte_field() { -+ return generate_fast_get_int_field0(T_BYTE); ++ return null_check_offset; +} + -+address JNI_FastGetField::generate_fast_get_char_field() { -+ return generate_fast_get_int_field0(T_CHAR); -+} ++void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag) { ++ assert(UseBiasedLocking, "why call this otherwise?"); + -+address JNI_FastGetField::generate_fast_get_short_field() { -+ return generate_fast_get_int_field0(T_SHORT); ++ // Check for biased locking unlock case, which is a no-op ++ // Note: we do not have to check the thread ID for two reasons. ++ // First, the interpreter checks for IllegalMonitorStateException at ++ // a higher level. Second, if the bias was revoked while we held the ++ // lock, the object could not be rebiased toward another thread, so ++ // the bias bit would be clear. ++ ld(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); ++ andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place); ++ sub(tmp_reg, tmp_reg, markOopDesc::biased_lock_pattern); ++ if (flag->is_valid()) { mv(flag, tmp_reg); } ++ beqz(tmp_reg, done); +} + -+address JNI_FastGetField::generate_fast_get_int_field() { -+ return generate_fast_get_int_field0(T_INT); ++// Move the address of the polling page into dest. ++void MacroAssembler::get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype) { ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ ld(dest, Address(xthread, Thread::polling_page_offset())); ++ } else { ++ uint64_t align = (uint64_t)page & 0xfff; ++ assert(align == 0, "polling page must be page aligned"); ++ la_patchable(dest, Address(page, rtype), offset); ++ } +} + -+address JNI_FastGetField::generate_fast_get_long_field() { -+ return generate_fast_get_int_field0(T_LONG); ++// Read the polling page. The address of the polling page must ++// already be in r. ++void MacroAssembler::read_polling_page(Register dest, address page, relocInfo::relocType rtype) { ++ int32_t offset = 0; ++ get_polling_page(dest, page, offset, rtype); ++ read_polling_page(dest, offset, rtype); +} + -+address JNI_FastGetField::generate_fast_get_float_field() { -+ return generate_fast_get_int_field0(T_FLOAT); ++// Read the polling page. The address of the polling page must ++// already be in r. ++void MacroAssembler::read_polling_page(Register dest, int32_t offset, relocInfo::relocType rtype) { ++ code_section()->relocate(pc(), rtype); ++ lwu(zr, Address(dest, offset)); +} + -+address JNI_FastGetField::generate_fast_get_double_field() { -+ return generate_fast_get_int_field0(T_DOUBLE); ++void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { ++#ifdef ASSERT ++ { ++ ThreadInVMfromUnknown tiv; ++ assert (UseCompressedOops, "should only be used for compressed oops"); ++ assert (Universe::heap() != NULL, "java heap should be initialized"); ++ assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); ++ assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop"); ++ } ++#endif ++ int oop_index = oop_recorder()->find_index(obj); ++ InstructionMark im(this); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ code_section()->relocate(inst_mark(), rspec); ++ li32(dst, 0xDEADBEEF); ++ zero_extend(dst, dst, 32); +} -diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp -new file mode 100644 -index 00000000000..83ffcc55d83 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp -@@ -0,0 +1,106 @@ -+/* -+ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+#ifndef CPU_RISCV_JNITYPES_RISCV_HPP -+#define CPU_RISCV_JNITYPES_RISCV_HPP -+ -+#include "jni.h" -+#include "memory/allStatic.hpp" -+#include "oops/oop.hpp" + -+// This file holds platform-dependent routines used to write primitive jni -+// types to the array of arguments passed into JavaCalls::call ++void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { ++ assert (UseCompressedClassPointers, "should only be used for compressed headers"); ++ assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); ++ int index = oop_recorder()->find_index(k); ++ assert(!Universe::heap()->is_in_reserved(k), "should not be an oop"); + -+class JNITypes : private AllStatic { -+ // These functions write a java primitive type (in native format) -+ // to a java stack slot array to be passed as an argument to JavaCalls:calls. -+ // I.e., they are functionally 'push' operations if they have a 'pos' -+ // formal parameter. Note that jlong's and jdouble's are written -+ // _in reverse_ of the order in which they appear in the interpreter -+ // stack. This is because call stubs (see stubGenerator_sparc.cpp) -+ // reverse the argument list constructed by JavaCallArguments (see -+ // javaCalls.hpp). ++ InstructionMark im(this); ++ RelocationHolder rspec = metadata_Relocation::spec(index); ++ code_section()->relocate(inst_mark(), rspec); ++ narrowKlass nk = Klass::encode_klass(k); ++ li32(dst, nk); ++ zero_extend(dst, dst, 32); ++} + -+public: -+ // Ints are stored in native format in one JavaCallArgument slot at *to. -+ static inline void put_int(jint from, intptr_t *to) { *(jint *)(to + 0 ) = from; } -+ static inline void put_int(jint from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = from; } -+ static inline void put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; } ++// Maybe emit a call via a trampoline. If the code cache is small ++// trampolines won't be emitted. ++address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) { ++ assert(JavaThread::current()->is_Compiler_thread(), "just checking"); ++ assert(entry.rspec().type() == relocInfo::runtime_call_type || ++ entry.rspec().type() == relocInfo::opt_virtual_call_type || ++ entry.rspec().type() == relocInfo::static_call_type || ++ entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); + -+ // Longs are stored in native format in one JavaCallArgument slot at -+ // *(to+1). -+ static inline void put_long(jlong from, intptr_t *to) { -+ *(jlong*) (to + 1) = from; ++ // We need a trampoline if branches are far. ++ if (far_branches()) { ++ bool in_scratch_emit_size = false; ++#ifdef COMPILER2 ++ // We don't want to emit a trampoline if C2 is generating dummy ++ // code during its branch shortening phase. ++ CompileTask* task = ciEnv::current()->task(); ++ in_scratch_emit_size = ++ (task != NULL && is_c2_compile(task->comp_level()) && ++ Compile::current()->in_scratch_emit_size()); ++#endif ++ if (!in_scratch_emit_size) { ++ address stub = emit_trampoline_stub(offset(), entry.target()); ++ if (stub == NULL) { ++ postcond(pc() == badAddress); ++ return NULL; // CodeCache is full ++ } ++ } + } + -+ static inline void put_long(jlong from, intptr_t *to, int& pos) { -+ *(jlong*) (to + 1 + pos) = from; -+ pos += 2; ++ if (cbuf != NULL) { cbuf->set_insts_mark(); } ++ relocate(entry.rspec()); ++ if (!far_branches()) { ++ jal(entry.target()); ++ } else { ++ jal(pc()); + } ++ // just need to return a non-null address ++ postcond(pc() != badAddress); ++ return pc(); ++} + -+ static inline void put_long(jlong *from, intptr_t *to, int& pos) { -+ *(jlong*) (to + 1 + pos) = *from; -+ pos += 2; ++address MacroAssembler::ic_call(address entry, jint method_index) { ++ RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); ++ movptr(t1, (address)Universe::non_oop_word()); ++ assert_cond(entry != NULL); ++ return trampoline_call(Address(entry, rh)); ++} ++ ++// Emit a trampoline stub for a call to a target which is too far away. ++// ++// code sequences: ++// ++// call-site: ++// branch-and-link to or ++// ++// Related trampoline stub for this call site in the stub section: ++// load the call target from the constant pool ++// branch (RA still points to the call site above) ++ ++address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, ++ address dest) { ++ address stub = start_a_stub(NativeInstruction::instruction_size ++ + NativeCallTrampolineStub::instruction_size); ++ if (stub == NULL) { ++ return NULL; // CodeBuffer::expand failed + } + -+ // Oops are stored in native format in one JavaCallArgument slot at *to. -+ static inline void put_obj(const Handle& from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle.raw_value(); } -+ static inline void put_obj(jobject from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle; } ++ // Create a trampoline stub relocation which relates this trampoline stub ++ // with the call instruction at insts_call_instruction_offset in the ++ // instructions code-section. + -+ // Floats are stored in native format in one JavaCallArgument slot at *to. -+ static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } -+ static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; } -+ static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; } -+ -+#undef _JNI_SLOT_OFFSET -+#define _JNI_SLOT_OFFSET 1 -+ // Doubles are stored in native word format in one JavaCallArgument -+ // slot at *(to+1). -+ static inline void put_double(jdouble from, intptr_t *to) { -+ *(jdouble*) (to + 1) = from; -+ } -+ -+ static inline void put_double(jdouble from, intptr_t *to, int& pos) { -+ *(jdouble*) (to + 1 + pos) = from; -+ pos += 2; -+ } ++ // make sure 4 byte aligned here, so that the destination address would be ++ // 8 byte aligned after 3 intructions ++ // when we reach here we may get a 2-byte alignment so need to align it ++ align(wordSize, NativeCallTrampolineStub::data_offset); + -+ static inline void put_double(jdouble *from, intptr_t *to, int& pos) { -+ *(jdouble*) (to + 1 + pos) = *from; -+ pos += 2; -+ } ++ relocate(trampoline_stub_Relocation::spec(code()->insts()->start() + ++ insts_call_instruction_offset)); ++ const int stub_start_offset = offset(); + -+ // The get_xxx routines, on the other hand, actually _do_ fetch -+ // java primitive types from the interpreter stack. -+ // No need to worry about alignment on Intel. -+ static inline jint get_int (intptr_t *from) { return *(jint *) from; } -+ static inline jlong get_long (intptr_t *from) { return *(jlong *) (from + _JNI_SLOT_OFFSET); } -+ static inline oop get_obj (intptr_t *from) { return *(oop *) from; } -+ static inline jfloat get_float (intptr_t *from) { return *(jfloat *) from; } -+ static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); } -+#undef _JNI_SLOT_OFFSET -+}; ++ // Now, create the trampoline stub's code: ++ // - load the call ++ // - call ++ Label target; ++ ld(t0, target); // auipc + ld ++ jr(t0); // jalr ++ bind(target); ++ assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset, ++ "should be"); ++ assert(offset() % wordSize == 0, "bad alignment"); ++ emit_int64((intptr_t)dest); + -+#endif // CPU_RISCV_JNITYPES_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -new file mode 100644 -index 00000000000..86710295444 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -@@ -0,0 +1,4016 @@ -+/* -+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ const address stub_start_addr = addr_at(stub_start_offset); + -+#include "precompiled.hpp" -+#include "asm/assembler.hpp" -+#include "asm/assembler.inline.hpp" -+#include "compiler/disassembler.hpp" -+#include "gc/shared/barrierSet.hpp" -+#include "gc/shared/barrierSetAssembler.hpp" -+#include "gc/shared/cardTable.hpp" -+#include "gc/shared/cardTableBarrierSet.hpp" -+#include "interpreter/bytecodeHistogram.hpp" -+#include "interpreter/interpreter.hpp" -+#include "memory/resourceArea.hpp" -+#include "memory/universe.hpp" -+#include "nativeInst_riscv.hpp" -+#include "oops/accessDecorators.hpp" -+#include "oops/compressedOops.inline.hpp" -+#include "oops/klass.inline.hpp" -+#include "oops/oop.hpp" -+#include "runtime/interfaceSupport.inline.hpp" -+#include "runtime/jniHandles.inline.hpp" -+#include "runtime/sharedRuntime.hpp" -+#include "runtime/stubRoutines.hpp" -+#include "runtime/thread.hpp" -+#include "utilities/powerOfTwo.hpp" -+#ifdef COMPILER2 -+#include "opto/compile.hpp" -+#include "opto/node.hpp" -+#include "opto/output.hpp" -+#endif ++ assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline"); + -+#ifdef PRODUCT -+#define BLOCK_COMMENT(str) /* nothing */ -+#else -+#define BLOCK_COMMENT(str) block_comment(str) -+#endif -+#define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":") ++ end_a_stub(); ++ return stub_start_addr; ++} + -+static void pass_arg0(MacroAssembler* masm, Register arg) { -+ if (c_rarg0 != arg) { -+ assert_cond(masm != NULL); -+ masm->mv(c_rarg0, arg); ++Address MacroAssembler::add_memory_helper(const Address dst) { ++ switch (dst.getMode()) { ++ case Address::base_plus_offset: ++ // This is the expected mode, although we allow all the other ++ // forms below. ++ return form_address(t1, dst.base(), dst.offset()); ++ default: ++ la(t1, dst); ++ return Address(t1); + } +} + -+static void pass_arg1(MacroAssembler* masm, Register arg) { -+ if (c_rarg1 != arg) { -+ assert_cond(masm != NULL); -+ masm->mv(c_rarg1, arg); -+ } ++void MacroAssembler::add_memory_int64(const Address dst, int64_t imm) { ++ Address adr = add_memory_helper(dst); ++ assert_different_registers(adr.base(), t0); ++ ld(t0, adr); ++ addi(t0, t0, imm); ++ sd(t0, adr); +} + -+static void pass_arg2(MacroAssembler* masm, Register arg) { -+ if (c_rarg2 != arg) { -+ assert_cond(masm != NULL); -+ masm->mv(c_rarg2, arg); -+ } ++void MacroAssembler::add_memory_int32(const Address dst, int32_t imm) { ++ Address adr = add_memory_helper(dst); ++ assert_different_registers(adr.base(), t0); ++ lwu(t0, adr); ++ addiw(t0, t0, imm); ++ sw(t0, adr); +} + -+static void pass_arg3(MacroAssembler* masm, Register arg) { -+ if (c_rarg3 != arg) { -+ assert_cond(masm != NULL); -+ masm->mv(c_rarg3, arg); -+ } ++void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) { ++ assert_different_registers(src1, t0); ++ int32_t offset; ++ la_patchable(t0, src2, offset); ++ ld(t0, Address(t0, offset)); ++ beq(src1, t0, equal); +} + -+void MacroAssembler::align(int modulus, int extra_offset) { -+ CompressibleRegion cr(this); -+ while ((offset() + extra_offset) % modulus != 0) { nop(); } ++// string indexof ++// compute index by trailing zeros ++void MacroAssembler::compute_index(Register haystack, Register trailing_zeros, ++ Register match_mask, Register result, ++ Register ch2, Register tmp, ++ bool haystack_isL) ++{ ++ int haystack_chr_shift = haystack_isL ? 0 : 1; ++ srl(match_mask, match_mask, trailing_zeros); ++ srli(match_mask, match_mask, 1); ++ srli(tmp, trailing_zeros, LogBitsPerByte); ++ if (!haystack_isL) andi(tmp, tmp, 0xE); ++ add(haystack, haystack, tmp); ++ ld(ch2, Address(haystack)); ++ if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift); ++ add(result, result, tmp); +} + -+void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { -+ call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions); ++// string indexof ++// Find pattern element in src, compute match mask, ++// only the first occurrence of 0x80/0x8000 at low bits is the valid match index ++// match mask patterns and corresponding indices would be like: ++// - 0x8080808080808080 (Latin1) ++// - 7 6 5 4 3 2 1 0 (match index) ++// - 0x8000800080008000 (UTF16) ++// - 3 2 1 0 (match index) ++void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask, ++ Register mask1, Register mask2) ++{ ++ xorr(src, pattern, src); ++ sub(match_mask, src, mask1); ++ orr(src, src, mask2); ++ notr(src, src); ++ andr(match_mask, match_mask, src); +} + -+// Implementation of call_VM versions ++#ifdef COMPILER2 ++// Code for BigInteger::mulAdd instrinsic ++// out = x10 ++// in = x11 ++// offset = x12 (already out.length-offset) ++// len = x13 ++// k = x14 ++// tmp = x28 ++// ++// pseudo code from java implementation: ++// long kLong = k & LONG_MASK; ++// carry = 0; ++// offset = out.length-offset - 1; ++// for (int j = len - 1; j >= 0; j--) { ++// product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry; ++// out[offset--] = (int)product; ++// carry = product >>> 32; ++// } ++// return (int)carry; ++void MacroAssembler::mul_add(Register out, Register in, Register offset, ++ Register len, Register k, Register tmp) { ++ Label L_tail_loop, L_unroll, L_end; ++ mv(tmp, out); ++ mv(out, zr); ++ blez(len, L_end); ++ zero_extend(k, k, 32); ++ slliw(t0, offset, LogBytesPerInt); ++ add(offset, tmp, t0); ++ slliw(t0, len, LogBytesPerInt); ++ add(in, in, t0); + -+void MacroAssembler::call_VM(Register oop_result, -+ address entry_point, -+ bool check_exceptions) { -+ call_VM_helper(oop_result, entry_point, 0, check_exceptions); -+} ++ const int unroll = 8; ++ li(tmp, unroll); ++ blt(len, tmp, L_tail_loop); ++ bind(L_unroll); ++ for (int i = 0; i < unroll; i++) { ++ sub(in, in, BytesPerInt); ++ lwu(t0, Address(in, 0)); ++ mul(t1, t0, k); ++ add(t0, t1, out); ++ sub(offset, offset, BytesPerInt); ++ lwu(t1, Address(offset, 0)); ++ add(t0, t0, t1); ++ sw(t0, Address(offset, 0)); ++ srli(out, t0, 32); ++ } ++ subw(len, len, tmp); ++ bge(len, tmp, L_unroll); + -+void MacroAssembler::call_VM(Register oop_result, -+ address entry_point, -+ Register arg_1, -+ bool check_exceptions) { -+ pass_arg1(this, arg_1); -+ call_VM_helper(oop_result, entry_point, 1, check_exceptions); -+} ++ bind(L_tail_loop); ++ blez(len, L_end); ++ sub(in, in, BytesPerInt); ++ lwu(t0, Address(in, 0)); ++ mul(t1, t0, k); ++ add(t0, t1, out); ++ sub(offset, offset, BytesPerInt); ++ lwu(t1, Address(offset, 0)); ++ add(t0, t0, t1); ++ sw(t0, Address(offset, 0)); ++ srli(out, t0, 32); ++ subw(len, len, 1); ++ j(L_tail_loop); + -+void MacroAssembler::call_VM(Register oop_result, -+ address entry_point, -+ Register arg_1, -+ Register arg_2, -+ bool check_exceptions) { -+ assert(arg_1 != c_rarg2, "smashed arg"); -+ pass_arg2(this, arg_2); -+ pass_arg1(this, arg_1); -+ call_VM_helper(oop_result, entry_point, 2, check_exceptions); ++ bind(L_end); +} + -+void MacroAssembler::call_VM(Register oop_result, -+ address entry_point, -+ Register arg_1, -+ Register arg_2, -+ Register arg_3, -+ bool check_exceptions) { -+ assert(arg_1 != c_rarg3, "smashed arg"); -+ assert(arg_2 != c_rarg3, "smashed arg"); -+ pass_arg3(this, arg_3); -+ -+ assert(arg_1 != c_rarg2, "smashed arg"); -+ pass_arg2(this, arg_2); ++// add two unsigned input and output carry ++void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry) ++{ ++ assert_different_registers(dst, carry); ++ assert_different_registers(dst, src2); ++ add(dst, src1, src2); ++ sltu(carry, dst, src2); ++} + -+ pass_arg1(this, arg_1); -+ call_VM_helper(oop_result, entry_point, 3, check_exceptions); ++// add two input with carry ++void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry) ++{ ++ assert_different_registers(dst, carry); ++ add(dst, src1, src2); ++ add(dst, dst, carry); +} + -+void MacroAssembler::call_VM(Register oop_result, -+ Register last_java_sp, -+ address entry_point, -+ int number_of_arguments, -+ bool check_exceptions) { -+ call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions); ++// add two unsigned input with carry and output carry ++void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry) ++{ ++ assert_different_registers(dst, src2); ++ adc(dst, src1, src2, carry); ++ sltu(carry, dst, src2); +} + -+void MacroAssembler::call_VM(Register oop_result, -+ Register last_java_sp, -+ address entry_point, -+ Register arg_1, -+ bool check_exceptions) { -+ pass_arg1(this, arg_1); -+ call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); ++void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, ++ Register src1, Register src2, Register carry) ++{ ++ cad(dest_lo, dest_lo, src1, carry); ++ add(dest_hi, dest_hi, carry); ++ cad(dest_lo, dest_lo, src2, carry); ++ add(final_dest_hi, dest_hi, carry); +} + -+void MacroAssembler::call_VM(Register oop_result, -+ Register last_java_sp, -+ address entry_point, -+ Register arg_1, -+ Register arg_2, -+ bool check_exceptions) { ++/** ++ * Multiply 32 bit by 32 bit first loop. ++ */ ++void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, ++ Register y, Register y_idx, Register z, ++ Register carry, Register product, ++ Register idx, Register kdx) ++{ ++ // jlong carry, x[], y[], z[]; ++ // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { ++ // long product = y[idx] * x[xstart] + carry; ++ // z[kdx] = (int)product; ++ // carry = product >>> 32; ++ // } ++ // z[xstart] = (int)carry; + -+ assert(arg_1 != c_rarg2, "smashed arg"); -+ pass_arg2(this, arg_2); -+ pass_arg1(this, arg_1); -+ call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); -+} ++ Label L_first_loop, L_first_loop_exit; ++ blez(idx, L_first_loop_exit); + -+void MacroAssembler::call_VM(Register oop_result, -+ Register last_java_sp, -+ address entry_point, -+ Register arg_1, -+ Register arg_2, -+ Register arg_3, -+ bool check_exceptions) { -+ assert(arg_1 != c_rarg3, "smashed arg"); -+ assert(arg_2 != c_rarg3, "smashed arg"); -+ pass_arg3(this, arg_3); -+ assert(arg_1 != c_rarg2, "smashed arg"); -+ pass_arg2(this, arg_2); -+ pass_arg1(this, arg_1); -+ call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); -+} ++ shadd(t0, xstart, x, t0, LogBytesPerInt); ++ lwu(x_xstart, Address(t0, 0)); + -+// these are no-ops overridden by InterpreterMacroAssembler -+void MacroAssembler::check_and_handle_earlyret(Register java_thread) {} -+void MacroAssembler::check_and_handle_popframe(Register java_thread) {} ++ bind(L_first_loop); ++ subw(idx, idx, 1); ++ shadd(t0, idx, y, t0, LogBytesPerInt); ++ lwu(y_idx, Address(t0, 0)); ++ mul(product, x_xstart, y_idx); ++ add(product, product, carry); ++ srli(carry, product, 32); ++ subw(kdx, kdx, 1); ++ shadd(t0, kdx, z, t0, LogBytesPerInt); ++ sw(product, Address(t0, 0)); ++ bgtz(idx, L_first_loop); + -+// Calls to C land -+// -+// When entering C land, the fp, & esp of the last Java frame have to be recorded -+// in the (thread-local) JavaThread object. When leaving C land, the last Java fp -+// has to be reset to 0. This is required to allow proper stack traversal. -+void MacroAssembler::set_last_Java_frame(Register last_java_sp, -+ Register last_java_fp, -+ Register last_java_pc, -+ Register tmp) { ++ bind(L_first_loop_exit); ++} + -+ if (last_java_pc->is_valid()) { -+ sd(last_java_pc, Address(xthread, -+ JavaThread::frame_anchor_offset() + -+ JavaFrameAnchor::last_Java_pc_offset())); -+ } ++/** ++ * Multiply 64 bit by 64 bit first loop. ++ */ ++void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, ++ Register y, Register y_idx, Register z, ++ Register carry, Register product, ++ Register idx, Register kdx) ++{ ++ // ++ // jlong carry, x[], y[], z[]; ++ // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { ++ // huge_128 product = y[idx] * x[xstart] + carry; ++ // z[kdx] = (jlong)product; ++ // carry = (jlong)(product >>> 64); ++ // } ++ // z[xstart] = carry; ++ // + -+ // determine last_java_sp register -+ if (last_java_sp == sp) { -+ mv(tmp, sp); -+ last_java_sp = tmp; -+ } else if (!last_java_sp->is_valid()) { -+ last_java_sp = esp; -+ } ++ Label L_first_loop, L_first_loop_exit; ++ Label L_one_x, L_one_y, L_multiply; + -+ sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset())); ++ subw(xstart, xstart, 1); ++ bltz(xstart, L_one_x); + -+ // last_java_fp is optional -+ if (last_java_fp->is_valid()) { -+ sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset())); -+ } -+} ++ shadd(t0, xstart, x, t0, LogBytesPerInt); ++ ld(x_xstart, Address(t0, 0)); ++ ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian + -+void MacroAssembler::set_last_Java_frame(Register last_java_sp, -+ Register last_java_fp, -+ address last_java_pc, -+ Register tmp) { -+ assert(last_java_pc != NULL, "must provide a valid PC"); ++ bind(L_first_loop); ++ subw(idx, idx, 1); ++ bltz(idx, L_first_loop_exit); ++ subw(idx, idx, 1); ++ bltz(idx, L_one_y); + -+ la(tmp, last_java_pc); -+ sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ shadd(t0, idx, y, t0, LogBytesPerInt); ++ ld(y_idx, Address(t0, 0)); ++ ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian ++ bind(L_multiply); + -+ set_last_Java_frame(last_java_sp, last_java_fp, noreg, tmp); -+} ++ mulhu(t0, x_xstart, y_idx); ++ mul(product, x_xstart, y_idx); ++ cad(product, product, carry, t1); ++ adc(carry, t0, zr, t1); + -+void MacroAssembler::set_last_Java_frame(Register last_java_sp, -+ Register last_java_fp, -+ Label &L, -+ Register tmp) { -+ if (L.is_bound()) { -+ set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp); -+ } else { -+ InstructionMark im(this); -+ L.add_patch_at(code(), locator()); -+ set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp); -+ } -+} ++ subw(kdx, kdx, 2); ++ ror_imm(product, product, 32); // back to big-endian ++ shadd(t0, kdx, z, t0, LogBytesPerInt); ++ sd(product, Address(t0, 0)); + -+void MacroAssembler::reset_last_Java_frame(bool clear_fp) { -+ // we must set sp to zero to clear frame -+ sd(zr, Address(xthread, JavaThread::last_Java_sp_offset())); ++ j(L_first_loop); + -+ // must clear fp, so that compiled frames are not confused; it is -+ // possible that we need it only for debugging -+ if (clear_fp) { -+ sd(zr, Address(xthread, JavaThread::last_Java_fp_offset())); -+ } ++ bind(L_one_y); ++ lwu(y_idx, Address(y, 0)); ++ j(L_multiply); + -+ // Always clear the pc because it could have been set by make_walkable() -+ sd(zr, Address(xthread, JavaThread::last_Java_pc_offset())); ++ bind(L_one_x); ++ lwu(x_xstart, Address(x, 0)); ++ j(L_first_loop); ++ ++ bind(L_first_loop_exit); +} + -+void MacroAssembler::call_VM_base(Register oop_result, -+ Register java_thread, -+ Register last_java_sp, -+ address entry_point, -+ int number_of_arguments, -+ bool check_exceptions) { -+ // determine java_thread register -+ if (!java_thread->is_valid()) { -+ java_thread = xthread; -+ } -+ // determine last_java_sp register -+ if (!last_java_sp->is_valid()) { -+ last_java_sp = esp; -+ } ++/** ++ * Multiply 128 bit by 128 bit. Unrolled inner loop. ++ * ++ */ ++void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, ++ Register carry, Register carry2, ++ Register idx, Register jdx, ++ Register yz_idx1, Register yz_idx2, ++ Register tmp, Register tmp3, Register tmp4, ++ Register tmp6, Register product_hi) ++{ ++ // jlong carry, x[], y[], z[]; ++ // int kdx = xstart+1; ++ // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop ++ // huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry; ++ // jlong carry2 = (jlong)(tmp3 >>> 64); ++ // huge_128 tmp4 = (y[idx] * product_hi) + z[kdx+idx] + carry2; ++ // carry = (jlong)(tmp4 >>> 64); ++ // z[kdx+idx+1] = (jlong)tmp3; ++ // z[kdx+idx] = (jlong)tmp4; ++ // } ++ // idx += 2; ++ // if (idx > 0) { ++ // yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry; ++ // z[kdx+idx] = (jlong)yz_idx1; ++ // carry = (jlong)(yz_idx1 >>> 64); ++ // } ++ // + -+ // debugging support -+ assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); -+ assert(java_thread == xthread, "unexpected register"); ++ Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; + -+ assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); -+ assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); ++ srliw(jdx, idx, 2); + -+ // push java thread (becomes first argument of C function) -+ mv(c_rarg0, java_thread); ++ bind(L_third_loop); + -+ // set last Java frame before call -+ assert(last_java_sp != fp, "can't use fp"); ++ subw(jdx, jdx, 1); ++ bltz(jdx, L_third_loop_exit); ++ subw(idx, idx, 4); + -+ Label l; -+ set_last_Java_frame(last_java_sp, fp, l, t0); ++ shadd(t0, idx, y, t0, LogBytesPerInt); ++ ld(yz_idx2, Address(t0, 0)); ++ ld(yz_idx1, Address(t0, wordSize)); + -+ // do the call, remove parameters -+ MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l); ++ shadd(tmp6, idx, z, t0, LogBytesPerInt); + -+ // reset last Java frame -+ // Only interpreter should have to clear fp -+ reset_last_Java_frame(true); ++ ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian ++ ror_imm(yz_idx2, yz_idx2, 32); + -+ // C++ interp handles this in the interpreter -+ check_and_handle_popframe(java_thread); -+ check_and_handle_earlyret(java_thread); ++ ld(t1, Address(tmp6, 0)); ++ ld(t0, Address(tmp6, wordSize)); + -+ if (check_exceptions) { -+ // check for pending exceptions (java_thread is set upon return) -+ ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset()))); -+ Label ok; -+ beqz(t0, ok); -+ int32_t offset = 0; -+ la_patchable(t0, RuntimeAddress(StubRoutines::forward_exception_entry()), offset); -+ jalr(x0, t0, offset); -+ bind(ok); -+ } ++ mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 ++ mulhu(tmp4, product_hi, yz_idx1); + -+ // get oop result if there is one and reset the value in the thread -+ if (oop_result->is_valid()) { -+ get_vm_result(oop_result, java_thread); -+ } -+} ++ ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian ++ ror_imm(t1, t1, 32, tmp); + -+void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { -+ ld(oop_result, Address(java_thread, JavaThread::vm_result_offset())); -+ sd(zr, Address(java_thread, JavaThread::vm_result_offset())); -+ verify_oop(oop_result, "broken oop in call_VM_base"); -+} ++ mul(tmp, product_hi, yz_idx2); // yz_idx2 * product_hi -> carry2:tmp ++ mulhu(carry2, product_hi, yz_idx2); + -+void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { -+ ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); -+ sd(zr, Address(java_thread, JavaThread::vm_result_2_offset())); -+} ++ cad(tmp3, tmp3, carry, carry); ++ adc(tmp4, tmp4, zr, carry); ++ cad(tmp3, tmp3, t0, t0); ++ cadc(tmp4, tmp4, tmp, t0); ++ adc(carry, carry2, zr, t0); ++ cad(tmp4, tmp4, t1, carry2); ++ adc(carry, carry, zr, carry2); + -+void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) { -+ assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required"); -+ assert_different_registers(klass, xthread, tmp); ++ ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian ++ ror_imm(tmp4, tmp4, 32); ++ sd(tmp4, Address(tmp6, 0)); ++ sd(tmp3, Address(tmp6, wordSize)); + -+ Label L_fallthrough, L_tmp; -+ if (L_fast_path == NULL) { -+ L_fast_path = &L_fallthrough; -+ } else if (L_slow_path == NULL) { -+ L_slow_path = &L_fallthrough; -+ } ++ j(L_third_loop); + -+ // Fast path check: class is fully initialized -+ lbu(tmp, Address(klass, InstanceKlass::init_state_offset())); -+ sub(tmp, tmp, InstanceKlass::fully_initialized); -+ beqz(tmp, *L_fast_path); ++ bind(L_third_loop_exit); + -+ // Fast path check: current thread is initializer thread -+ ld(tmp, Address(klass, InstanceKlass::init_thread_offset())); ++ andi(idx, idx, 0x3); ++ beqz(idx, L_post_third_loop_done); + -+ if (L_slow_path == &L_fallthrough) { -+ beq(xthread, tmp, *L_fast_path); -+ bind(*L_slow_path); -+ } else if (L_fast_path == &L_fallthrough) { -+ bne(xthread, tmp, *L_slow_path); -+ bind(*L_fast_path); -+ } else { -+ Unimplemented(); -+ } -+} ++ Label L_check_1; ++ subw(idx, idx, 2); ++ bltz(idx, L_check_1); + -+void MacroAssembler::verify_oop(Register reg, const char* s) { -+ if (!VerifyOops) { return; } ++ shadd(t0, idx, y, t0, LogBytesPerInt); ++ ld(yz_idx1, Address(t0, 0)); ++ ror_imm(yz_idx1, yz_idx1, 32); + -+ // Pass register number to verify_oop_subroutine -+ const char* b = NULL; -+ { -+ ResourceMark rm; -+ stringStream ss; -+ ss.print("verify_oop: %s: %s", reg->name(), s); -+ b = code_string(ss.as_string()); -+ } -+ BLOCK_COMMENT("verify_oop {"); ++ mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 ++ mulhu(tmp4, product_hi, yz_idx1); + -+ push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); ++ shadd(t0, idx, z, t0, LogBytesPerInt); ++ ld(yz_idx2, Address(t0, 0)); ++ ror_imm(yz_idx2, yz_idx2, 32, tmp); + -+ mv(c_rarg0, reg); // c_rarg0 : x10 -+ li(t0, (uintptr_t)(address)b); ++ add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp); + -+ // call indirectly to solve generation ordering problem -+ int32_t offset = 0; -+ la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset); -+ ld(t1, Address(t1, offset)); -+ jalr(t1); ++ ror_imm(tmp3, tmp3, 32, tmp); ++ sd(tmp3, Address(t0, 0)); + -+ pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); ++ bind(L_check_1); + -+ BLOCK_COMMENT("} verify_oop"); -+} ++ andi(idx, idx, 0x1); ++ subw(idx, idx, 1); ++ bltz(idx, L_post_third_loop_done); ++ shadd(t0, idx, y, t0, LogBytesPerInt); ++ lwu(tmp4, Address(t0, 0)); ++ mul(tmp3, tmp4, product_hi); // tmp4 * product_hi -> carry2:tmp3 ++ mulhu(carry2, tmp4, product_hi); + -+void MacroAssembler::verify_oop_addr(Address addr, const char* s) { -+ if (!VerifyOops) { -+ return; -+ } ++ shadd(t0, idx, z, t0, LogBytesPerInt); ++ lwu(tmp4, Address(t0, 0)); + -+ const char* b = NULL; -+ { -+ ResourceMark rm; -+ stringStream ss; -+ ss.print("verify_oop_addr: %s", s); -+ b = code_string(ss.as_string()); -+ } -+ BLOCK_COMMENT("verify_oop_addr {"); ++ add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0); + -+ push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); ++ shadd(t0, idx, z, t0, LogBytesPerInt); ++ sw(tmp3, Address(t0, 0)); + -+ if (addr.uses(sp)) { -+ la(x10, addr); -+ ld(x10, Address(x10, 4 * wordSize)); -+ } else { -+ ld(x10, addr); -+ } ++ slli(t0, carry2, 32); ++ srli(carry, tmp3, 32); ++ orr(carry, carry, t0); + -+ li(t0, (uintptr_t)(address)b); ++ bind(L_post_third_loop_done); ++} + -+ // call indirectly to solve generation ordering problem -+ int32_t offset = 0; -+ la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset); -+ ld(t1, Address(t1, offset)); -+ jalr(t1); ++/** ++ * Code for BigInteger::multiplyToLen() intrinsic. ++ * ++ * x10: x ++ * x11: xlen ++ * x12: y ++ * x13: ylen ++ * x14: z ++ * x15: zlen ++ * x16: tmp1 ++ * x17: tmp2 ++ * x7: tmp3 ++ * x28: tmp4 ++ * x29: tmp5 ++ * x30: tmp6 ++ * x31: tmp7 ++ */ ++void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, ++ Register z, Register zlen, ++ Register tmp1, Register tmp2, Register tmp3, Register tmp4, ++ Register tmp5, Register tmp6, Register product_hi) ++{ ++ assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); + -+ pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); ++ const Register idx = tmp1; ++ const Register kdx = tmp2; ++ const Register xstart = tmp3; + -+ BLOCK_COMMENT("} verify_oop_addr"); -+} ++ const Register y_idx = tmp4; ++ const Register carry = tmp5; ++ const Register product = xlen; ++ const Register x_xstart = zlen; // reuse register + -+Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, -+ int extra_slot_offset) { -+ // cf. TemplateTable::prepare_invoke(), if (load_receiver). -+ int stackElementSize = Interpreter::stackElementSize; -+ int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); -+#ifdef ASSERT -+ int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); -+ assert(offset1 - offset == stackElementSize, "correct arithmetic"); -+#endif -+ if (arg_slot.is_constant()) { -+ return Address(esp, arg_slot.as_constant() * stackElementSize + offset); -+ } else { -+ assert_different_registers(t0, arg_slot.as_register()); -+ shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize)); -+ return Address(t0, offset); -+ } -+} ++ mv(idx, ylen); // idx = ylen; ++ mv(kdx, zlen); // kdx = xlen+ylen; ++ mv(carry, zr); // carry = 0; + -+#ifndef PRODUCT -+extern "C" void findpc(intptr_t x); -+#endif ++ Label L_multiply_64_x_64_loop, L_done; + -+void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) -+{ -+ // In order to get locks to work, we need to fake a in_VM state -+ if (ShowMessageBoxOnError) { -+ JavaThread* thread = JavaThread::current(); -+ JavaThreadState saved_state = thread->thread_state(); -+ thread->set_thread_state(_thread_in_vm); -+#ifndef PRODUCT -+ if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { -+ ttyLocker ttyl; -+ BytecodeCounter::print(); -+ } -+#endif -+ if (os::message_box(msg, "Execution stopped, print registers?")) { -+ ttyLocker ttyl; -+ tty->print_cr(" pc = 0x%016lx", pc); -+#ifndef PRODUCT -+ tty->cr(); -+ findpc(pc); -+ tty->cr(); -+#endif -+ tty->print_cr(" x0 = 0x%016lx", regs[0]); -+ tty->print_cr(" x1 = 0x%016lx", regs[1]); -+ tty->print_cr(" x2 = 0x%016lx", regs[2]); -+ tty->print_cr(" x3 = 0x%016lx", regs[3]); -+ tty->print_cr(" x4 = 0x%016lx", regs[4]); -+ tty->print_cr(" x5 = 0x%016lx", regs[5]); -+ tty->print_cr(" x6 = 0x%016lx", regs[6]); -+ tty->print_cr(" x7 = 0x%016lx", regs[7]); -+ tty->print_cr(" x8 = 0x%016lx", regs[8]); -+ tty->print_cr(" x9 = 0x%016lx", regs[9]); -+ tty->print_cr("x10 = 0x%016lx", regs[10]); -+ tty->print_cr("x11 = 0x%016lx", regs[11]); -+ tty->print_cr("x12 = 0x%016lx", regs[12]); -+ tty->print_cr("x13 = 0x%016lx", regs[13]); -+ tty->print_cr("x14 = 0x%016lx", regs[14]); -+ tty->print_cr("x15 = 0x%016lx", regs[15]); -+ tty->print_cr("x16 = 0x%016lx", regs[16]); -+ tty->print_cr("x17 = 0x%016lx", regs[17]); -+ tty->print_cr("x18 = 0x%016lx", regs[18]); -+ tty->print_cr("x19 = 0x%016lx", regs[19]); -+ tty->print_cr("x20 = 0x%016lx", regs[20]); -+ tty->print_cr("x21 = 0x%016lx", regs[21]); -+ tty->print_cr("x22 = 0x%016lx", regs[22]); -+ tty->print_cr("x23 = 0x%016lx", regs[23]); -+ tty->print_cr("x24 = 0x%016lx", regs[24]); -+ tty->print_cr("x25 = 0x%016lx", regs[25]); -+ tty->print_cr("x26 = 0x%016lx", regs[26]); -+ tty->print_cr("x27 = 0x%016lx", regs[27]); -+ tty->print_cr("x28 = 0x%016lx", regs[28]); -+ tty->print_cr("x30 = 0x%016lx", regs[30]); -+ tty->print_cr("x31 = 0x%016lx", regs[31]); -+ BREAKPOINT; -+ } -+ } -+ fatal("DEBUG MESSAGE: %s", msg); -+} ++ subw(xstart, xlen, 1); ++ bltz(xstart, L_done); + -+void MacroAssembler::resolve_jobject(Register value, Register thread, Register tmp) { -+ Label done, not_weak; -+ beqz(value, done); // Use NULL as-is. ++ const Register jdx = tmp1; + -+ // Test for jweak tag. -+ andi(t0, value, JNIHandles::weak_tag_mask); -+ beqz(t0, not_weak); ++ if (AvoidUnalignedAccesses) { ++ // Check if x and y are both 8-byte aligned. ++ orr(t0, xlen, ylen); ++ andi(t0, t0, 0x1); ++ beqz(t0, L_multiply_64_x_64_loop); + -+ // Resolve jweak. -+ access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value, -+ Address(value, -JNIHandles::weak_tag_value), tmp, thread); -+ verify_oop(value); -+ j(done); ++ multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); ++ shadd(t0, xstart, z, t0, LogBytesPerInt); ++ sw(carry, Address(t0, 0)); + -+ bind(not_weak); -+ // Resolve (untagged) jobject. -+ access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread); -+ verify_oop(value); -+ bind(done); -+} ++ Label L_second_loop_unaligned; ++ bind(L_second_loop_unaligned); ++ mv(carry, zr); ++ mv(jdx, ylen); ++ subw(xstart, xstart, 1); ++ bltz(xstart, L_done); ++ sub(sp, sp, 2 * wordSize); ++ sd(z, Address(sp, 0)); ++ sd(zr, Address(sp, wordSize)); ++ shadd(t0, xstart, z, t0, LogBytesPerInt); ++ addi(z, t0, 4); ++ shadd(t0, xstart, x, t0, LogBytesPerInt); ++ lwu(product, Address(t0, 0)); ++ Label L_third_loop, L_third_loop_exit; + -+void MacroAssembler::stop(const char* msg) { -+ address ip = pc(); -+ pusha(); -+ li(c_rarg0, (uintptr_t)(address)msg); -+ li(c_rarg1, (uintptr_t)(address)ip); -+ mv(c_rarg2, sp); -+ mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64)); -+ jalr(c_rarg3); -+ ebreak(); -+} ++ blez(jdx, L_third_loop_exit); + -+void MacroAssembler::unimplemented(const char* what) { -+ const char* buf = NULL; -+ { -+ ResourceMark rm; -+ stringStream ss; -+ ss.print("unimplemented: %s", what); -+ buf = code_string(ss.as_string()); ++ bind(L_third_loop); ++ subw(jdx, jdx, 1); ++ shadd(t0, jdx, y, t0, LogBytesPerInt); ++ lwu(t0, Address(t0, 0)); ++ mul(t1, t0, product); ++ add(t0, t1, carry); ++ shadd(tmp6, jdx, z, t1, LogBytesPerInt); ++ lwu(t1, Address(tmp6, 0)); ++ add(t0, t0, t1); ++ sw(t0, Address(tmp6, 0)); ++ srli(carry, t0, 32); ++ bgtz(jdx, L_third_loop); ++ ++ bind(L_third_loop_exit); ++ ld(z, Address(sp, 0)); ++ addi(sp, sp, 2 * wordSize); ++ shadd(t0, xstart, z, t0, LogBytesPerInt); ++ sw(carry, Address(t0, 0)); ++ ++ j(L_second_loop_unaligned); + } -+ stop(buf); -+} + -+void MacroAssembler::emit_static_call_stub() { -+ // CompiledDirectStaticCall::set_to_interpreted knows the -+ // exact layout of this stub. ++ bind(L_multiply_64_x_64_loop); ++ multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); + -+ ifence(); -+ mov_metadata(xmethod, (Metadata*)NULL); ++ Label L_second_loop_aligned; ++ beqz(kdx, L_second_loop_aligned); + -+ // Jump to the entry point of the i2c stub. -+ int32_t offset = 0; -+ movptr_with_offset(t0, 0, offset); -+ jalr(x0, t0, offset); -+} ++ Label L_carry; ++ subw(kdx, kdx, 1); ++ beqz(kdx, L_carry); + -+void MacroAssembler::call_VM_leaf_base(address entry_point, -+ int number_of_arguments, -+ Label *retaddr) { -+ call_native_base(entry_point, retaddr); -+} ++ shadd(t0, kdx, z, t0, LogBytesPerInt); ++ sw(carry, Address(t0, 0)); ++ srli(carry, carry, 32); ++ subw(kdx, kdx, 1); + -+void MacroAssembler::call_native(address entry_point, Register arg_0) { -+ pass_arg0(this, arg_0); -+ call_native_base(entry_point); -+} ++ bind(L_carry); ++ shadd(t0, kdx, z, t0, LogBytesPerInt); ++ sw(carry, Address(t0, 0)); + -+void MacroAssembler::call_native_base(address entry_point, Label *retaddr) { -+ Label E, L; -+ int32_t offset = 0; -+ push_reg(0x80000040, sp); // push << t0 & xmethod >> to sp -+ movptr_with_offset(t0, entry_point, offset); -+ jalr(x1, t0, offset); -+ if (retaddr != NULL) { -+ bind(*retaddr); -+ } -+ pop_reg(0x80000040, sp); // pop << t0 & xmethod >> from sp -+} ++ // Second and third (nested) loops. ++ // ++ // for (int i = xstart-1; i >= 0; i--) { // Second loop ++ // carry = 0; ++ // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop ++ // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + ++ // (z[k] & LONG_MASK) + carry; ++ // z[k] = (int)product; ++ // carry = product >>> 32; ++ // } ++ // z[i] = (int)carry; ++ // } ++ // ++ // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi + -+void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { -+ call_VM_leaf_base(entry_point, number_of_arguments); -+} ++ bind(L_second_loop_aligned); ++ mv(carry, zr); // carry = 0; ++ mv(jdx, ylen); // j = ystart+1 + -+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { -+ pass_arg0(this, arg_0); -+ call_VM_leaf_base(entry_point, 1); -+} ++ subw(xstart, xstart, 1); // i = xstart-1; ++ bltz(xstart, L_done); + -+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { -+ pass_arg0(this, arg_0); -+ pass_arg1(this, arg_1); -+ call_VM_leaf_base(entry_point, 2); -+} ++ sub(sp, sp, 4 * wordSize); ++ sd(z, Address(sp, 0)); + -+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, -+ Register arg_1, Register arg_2) { -+ pass_arg0(this, arg_0); -+ pass_arg1(this, arg_1); -+ pass_arg2(this, arg_2); -+ call_VM_leaf_base(entry_point, 3); -+} ++ Label L_last_x; ++ shadd(t0, xstart, z, t0, LogBytesPerInt); ++ addi(z, t0, 4); ++ subw(xstart, xstart, 1); // i = xstart-1; ++ bltz(xstart, L_last_x); + -+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { -+ pass_arg0(this, arg_0); -+ MacroAssembler::call_VM_leaf_base(entry_point, 1); -+} ++ shadd(t0, xstart, x, t0, LogBytesPerInt); ++ ld(product_hi, Address(t0, 0)); ++ ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian + -+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { ++ Label L_third_loop_prologue; ++ bind(L_third_loop_prologue); + -+ assert(arg_0 != c_rarg1, "smashed arg"); -+ pass_arg1(this, arg_1); -+ pass_arg0(this, arg_0); -+ MacroAssembler::call_VM_leaf_base(entry_point, 2); -+} ++ sd(ylen, Address(sp, wordSize)); ++ sd(x, Address(sp, 2 * wordSize)); ++ sd(xstart, Address(sp, 3 * wordSize)); ++ multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product, ++ tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi); ++ ld(z, Address(sp, 0)); ++ ld(ylen, Address(sp, wordSize)); ++ ld(x, Address(sp, 2 * wordSize)); ++ ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen ++ addi(sp, sp, 4 * wordSize); + -+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { -+ assert(arg_0 != c_rarg2, "smashed arg"); -+ assert(arg_1 != c_rarg2, "smashed arg"); -+ pass_arg2(this, arg_2); -+ assert(arg_0 != c_rarg1, "smashed arg"); -+ pass_arg1(this, arg_1); -+ pass_arg0(this, arg_0); -+ MacroAssembler::call_VM_leaf_base(entry_point, 3); -+} ++ addiw(tmp3, xlen, 1); ++ shadd(t0, tmp3, z, t0, LogBytesPerInt); ++ sw(carry, Address(t0, 0)); + -+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { -+ assert(arg_0 != c_rarg3, "smashed arg"); -+ assert(arg_1 != c_rarg3, "smashed arg"); -+ assert(arg_2 != c_rarg3, "smashed arg"); -+ pass_arg3(this, arg_3); -+ assert(arg_0 != c_rarg2, "smashed arg"); -+ assert(arg_1 != c_rarg2, "smashed arg"); -+ pass_arg2(this, arg_2); -+ assert(arg_0 != c_rarg1, "smashed arg"); -+ pass_arg1(this, arg_1); -+ pass_arg0(this, arg_0); -+ MacroAssembler::call_VM_leaf_base(entry_point, 4); -+} -+ -+void MacroAssembler::nop() { -+ addi(x0, x0, 0); -+} -+ -+void MacroAssembler::mv(Register Rd, Register Rs) { -+ if (Rd != Rs) { -+ addi(Rd, Rs, 0); -+ } -+} -+ -+void MacroAssembler::notr(Register Rd, Register Rs) { -+ xori(Rd, Rs, -1); -+} -+ -+void MacroAssembler::neg(Register Rd, Register Rs) { -+ sub(Rd, x0, Rs); -+} -+ -+void MacroAssembler::negw(Register Rd, Register Rs) { -+ subw(Rd, x0, Rs); -+} -+ -+void MacroAssembler::sext_w(Register Rd, Register Rs) { -+ addiw(Rd, Rs, 0); -+} -+ -+void MacroAssembler::zext_b(Register Rd, Register Rs) { -+ andi(Rd, Rs, 0xFF); -+} -+ -+void MacroAssembler::seqz(Register Rd, Register Rs) { -+ sltiu(Rd, Rs, 1); -+} ++ subw(tmp3, tmp3, 1); ++ bltz(tmp3, L_done); + -+void MacroAssembler::snez(Register Rd, Register Rs) { -+ sltu(Rd, x0, Rs); -+} ++ srli(carry, carry, 32); ++ shadd(t0, tmp3, z, t0, LogBytesPerInt); ++ sw(carry, Address(t0, 0)); ++ j(L_second_loop_aligned); + -+void MacroAssembler::sltz(Register Rd, Register Rs) { -+ slt(Rd, Rs, x0); -+} ++ // Next infrequent code is moved outside loops. ++ bind(L_last_x); ++ lwu(product_hi, Address(x, 0)); ++ j(L_third_loop_prologue); + -+void MacroAssembler::sgtz(Register Rd, Register Rs) { -+ slt(Rd, x0, Rs); ++ bind(L_done); +} ++#endif + -+void MacroAssembler::fmv_s(FloatRegister Rd, FloatRegister Rs) { -+ if (Rd != Rs) { -+ fsgnj_s(Rd, Rs, Rs); ++// Count bits of trailing zero chars from lsb to msb until first non-zero element. ++// For LL case, one byte for one element, so shift 8 bits once, and for other case, ++// shift 16 bits once. ++void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2) ++{ ++ if (UseZbb) { ++ assert_different_registers(Rd, Rs, tmp1); ++ int step = isLL ? 8 : 16; ++ ctz(Rd, Rs); ++ andi(tmp1, Rd, step - 1); ++ sub(Rd, Rd, tmp1); ++ return; + } -+} -+ -+void MacroAssembler::fabs_s(FloatRegister Rd, FloatRegister Rs) { -+ fsgnjx_s(Rd, Rs, Rs); -+} ++ assert_different_registers(Rd, Rs, tmp1, tmp2); ++ Label Loop; ++ int step = isLL ? 8 : 16; ++ li(Rd, -step); ++ mv(tmp2, Rs); + -+void MacroAssembler::fneg_s(FloatRegister Rd, FloatRegister Rs) { -+ fsgnjn_s(Rd, Rs, Rs); ++ bind(Loop); ++ addi(Rd, Rd, step); ++ andi(tmp1, tmp2, ((1 << step) - 1)); ++ srli(tmp2, tmp2, step); ++ beqz(tmp1, Loop); +} + -+void MacroAssembler::fmv_d(FloatRegister Rd, FloatRegister Rs) { -+ if (Rd != Rs) { -+ fsgnj_d(Rd, Rs, Rs); ++// This instruction reads adjacent 4 bytes from the lower half of source register, ++// inflate into a register, for example: ++// Rs: A7A6A5A4A3A2A1A0 ++// Rd: 00A300A200A100A0 ++void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2) ++{ ++ assert_different_registers(Rd, Rs, tmp1, tmp2); ++ li(tmp1, 0xFF); ++ mv(Rd, zr); ++ for (int i = 0; i <= 3; i++) ++ { ++ andr(tmp2, Rs, tmp1); ++ if (i) { ++ slli(tmp2, tmp2, i * 8); ++ } ++ orr(Rd, Rd, tmp2); ++ if (i != 3) { ++ slli(tmp1, tmp1, 8); ++ } + } +} + -+void MacroAssembler::fabs_d(FloatRegister Rd, FloatRegister Rs) { -+ fsgnjx_d(Rd, Rs, Rs); -+} -+ -+void MacroAssembler::fneg_d(FloatRegister Rd, FloatRegister Rs) { -+ fsgnjn_d(Rd, Rs, Rs); -+} -+ -+void MacroAssembler::vmnot_m(VectorRegister vd, VectorRegister vs) { -+ vmnand_mm(vd, vs, vs); -+} -+ -+void MacroAssembler::vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm) { -+ vnsrl_wx(vd, vs, x0, vm); ++// This instruction reads adjacent 4 bytes from the upper half of source register, ++// inflate into a register, for example: ++// Rs: A7A6A5A4A3A2A1A0 ++// Rd: 00A700A600A500A4 ++void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2) ++{ ++ assert_different_registers(Rd, Rs, tmp1, tmp2); ++ li(tmp1, 0xFF00000000); ++ mv(Rd, zr); ++ for (int i = 0; i <= 3; i++) ++ { ++ andr(tmp2, Rs, tmp1); ++ orr(Rd, Rd, tmp2); ++ srli(Rd, Rd, 8); ++ if (i != 3) { ++ slli(tmp1, tmp1, 8); ++ } ++ } +} + -+void MacroAssembler::vfneg_v(VectorRegister vd, VectorRegister vs) { -+ vfsgnjn_vv(vd, vs, vs); -+} ++// The size of the blocks erased by the zero_blocks stub. We must ++// handle anything smaller than this ourselves in zero_words(). ++const int MacroAssembler::zero_words_block_size = 8; + -+void MacroAssembler::la(Register Rd, const address &dest) { -+ int64_t offset = dest - pc(); -+ if (is_offset_in_range(offset, 32)) { -+ auipc(Rd, (int32_t)offset + 0x800); //0x800, Note:the 11th sign bit -+ addi(Rd, Rd, ((int64_t)offset << 52) >> 52); -+ } else { -+ movptr(Rd, dest); -+ } -+} ++// zero_words() is used by C2 ClearArray patterns. It is as small as ++// possible, handling small word counts locally and delegating ++// anything larger to the zero_blocks stub. It is expanded many times ++// in compiled code, so it is important to keep it short. + -+void MacroAssembler::la(Register Rd, const Address &adr) { -+ InstructionMark im(this); -+ code_section()->relocate(inst_mark(), adr.rspec()); -+ relocInfo::relocType rtype = adr.rspec().reloc()->type(); ++// ptr: Address of a buffer to be zeroed. ++// cnt: Count in HeapWords. ++// ++// ptr, cnt, and t0 are clobbered. ++address MacroAssembler::zero_words(Register ptr, Register cnt) ++{ ++ assert(is_power_of_2(zero_words_block_size), "adjust this"); ++ assert(ptr == x28 && cnt == x29, "mismatch in register usage"); ++ assert_different_registers(cnt, t0); + -+ switch (adr.getMode()) { -+ case Address::literal: { -+ if (rtype == relocInfo::none) { -+ li(Rd, (intptr_t)(adr.target())); -+ } else { -+ movptr(Rd, adr.target()); ++ BLOCK_COMMENT("zero_words {"); ++ mv(t0, zero_words_block_size); ++ Label around, done, done16; ++ bltu(cnt, t0, around); ++ { ++ RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks()); ++ assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated"); ++ if (StubRoutines::riscv::complete()) { ++ address tpc = trampoline_call(zero_blocks); ++ if (tpc == NULL) { ++ DEBUG_ONLY(reset_labels1(around)); ++ postcond(pc() == badAddress); ++ return NULL; + } -+ break; ++ } else { ++ jal(zero_blocks); + } -+ case Address::base_plus_offset: { -+ int32_t offset = 0; -+ baseOffset(Rd, adr, offset); -+ addi(Rd, Rd, offset); -+ break; ++ } ++ bind(around); ++ for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) { ++ Label l; ++ andi(t0, cnt, i); ++ beqz(t0, l); ++ for (int j = 0; j < i; j++) { ++ sd(zr, Address(ptr, 0)); ++ addi(ptr, ptr, 8); + } -+ default: -+ ShouldNotReachHere(); ++ bind(l); + } ++ { ++ Label l; ++ andi(t0, cnt, 1); ++ beqz(t0, l); ++ sd(zr, Address(ptr, 0)); ++ bind(l); ++ } ++ BLOCK_COMMENT("} zero_words"); ++ postcond(pc() != badAddress); ++ return pc(); +} + -+void MacroAssembler::la(Register Rd, Label &label) { -+ la(Rd, target(label)); -+} -+ -+#define INSN(NAME) \ -+ void MacroAssembler::NAME##z(Register Rs, const address &dest) { \ -+ NAME(Rs, zr, dest); \ -+ } \ -+ void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) { \ -+ NAME(Rs, zr, l, is_far); \ -+ } \ ++#define SmallArraySize (18 * BytesPerLong) + -+ INSN(beq); -+ INSN(bne); -+ INSN(blt); -+ INSN(ble); -+ INSN(bge); -+ INSN(bgt); ++// base: Address of a buffer to be zeroed, 8 bytes aligned. ++// cnt: Immediate count in HeapWords. ++void MacroAssembler::zero_words(Register base, u_int64_t cnt) ++{ ++ assert_different_registers(base, t0, t1); + -+#undef INSN ++ BLOCK_COMMENT("zero_words {"); + -+// Float compare branch instructions ++ if (cnt <= SmallArraySize / BytesPerLong) { ++ for (int i = 0; i < (int)cnt; i++) { ++ sd(zr, Address(base, i * wordSize)); ++ } ++ } else { ++ const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll ++ int remainder = cnt % unroll; ++ for (int i = 0; i < remainder; i++) { ++ sd(zr, Address(base, i * wordSize)); ++ } + -+#define INSN(NAME, FLOATCMP, BRANCH) \ -+ void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ -+ FLOATCMP##_s(t0, Rs1, Rs2); \ -+ BRANCH(t0, l, is_far); \ -+ } \ -+ void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ -+ FLOATCMP##_d(t0, Rs1, Rs2); \ -+ BRANCH(t0, l, is_far); \ ++ Label loop; ++ Register cnt_reg = t0; ++ Register loop_base = t1; ++ cnt = cnt - remainder; ++ li(cnt_reg, cnt); ++ add(loop_base, base, remainder * wordSize); ++ bind(loop); ++ sub(cnt_reg, cnt_reg, unroll); ++ for (int i = 0; i < unroll; i++) { ++ sd(zr, Address(loop_base, i * wordSize)); ++ } ++ add(loop_base, loop_base, unroll * wordSize); ++ bnez(cnt_reg, loop); + } + -+ INSN(beq, feq, bnez); -+ INSN(bne, feq, beqz); ++ BLOCK_COMMENT("} zero_words"); ++} + -+#undef INSN ++// base: Address of a buffer to be filled, 8 bytes aligned. ++// cnt: Count in 8-byte unit. ++// value: Value to be filled with. ++// base will point to the end of the buffer after filling. ++void MacroAssembler::fill_words(Register base, Register cnt, Register value) ++{ ++// Algorithm: ++// ++// t0 = cnt & 7 ++// cnt -= t0 ++// p += t0 ++// switch (t0): ++// switch start: ++// do while cnt ++// cnt -= 8 ++// p[-8] = value ++// case 7: ++// p[-7] = value ++// case 6: ++// p[-6] = value ++// // ... ++// case 1: ++// p[-1] = value ++// case 0: ++// p += 8 ++// do-while end ++// switch end + ++ assert_different_registers(base, cnt, value, t0, t1); + -+#define INSN(NAME, FLOATCMP1, FLOATCMP2) \ -+ void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ -+ bool is_far, bool is_unordered) { \ -+ if (is_unordered) { \ -+ /* jump if either source is NaN or condition is expected */ \ -+ FLOATCMP2##_s(t0, Rs2, Rs1); \ -+ beqz(t0, l, is_far); \ -+ } else { \ -+ /* jump if no NaN in source and condition is expected */ \ -+ FLOATCMP1##_s(t0, Rs1, Rs2); \ -+ bnez(t0, l, is_far); \ -+ } \ -+ } \ -+ void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ -+ bool is_far, bool is_unordered) { \ -+ if (is_unordered) { \ -+ /* jump if either source is NaN or condition is expected */ \ -+ FLOATCMP2##_d(t0, Rs2, Rs1); \ -+ beqz(t0, l, is_far); \ -+ } else { \ -+ /* jump if no NaN in source and condition is expected */ \ -+ FLOATCMP1##_d(t0, Rs1, Rs2); \ -+ bnez(t0, l, is_far); \ -+ } \ -+ } ++ Label fini, skip, entry, loop; ++ const int unroll = 8; // Number of sd instructions we'll unroll + -+ INSN(ble, fle, flt); -+ INSN(blt, flt, fle); ++ beqz(cnt, fini); + -+#undef INSN ++ andi(t0, cnt, unroll - 1); ++ sub(cnt, cnt, t0); ++ // align 8, so first sd n % 8 = mod, next loop sd 8 * n. ++ shadd(base, t0, base, t1, 3); ++ la(t1, entry); ++ slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst) ++ sub(t1, t1, t0); ++ jr(t1); + -+#define INSN(NAME, CMP) \ -+ void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ -+ bool is_far, bool is_unordered) { \ -+ float_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ -+ } \ -+ void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ -+ bool is_far, bool is_unordered) { \ -+ double_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ ++ bind(loop); ++ add(base, base, unroll * 8); ++ for (int i = -unroll; i < 0; i++) { ++ sd(value, Address(base, i * 8)); + } ++ bind(entry); ++ sub(cnt, cnt, unroll); ++ bgez(cnt, loop); + -+ INSN(bgt, blt); -+ INSN(bge, ble); -+ -+#undef INSN -+ ++ bind(fini); ++} + -+#define INSN(NAME, CSR) \ -+ void MacroAssembler::NAME(Register Rd) { \ -+ csrr(Rd, CSR); \ -+ } ++#define FCVT_SAFE(FLOATCVT, FLOATEQ) \ ++void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \ ++ Label L_Okay; \ ++ fscsr(zr); \ ++ FLOATCVT(dst, src); \ ++ frcsr(tmp); \ ++ andi(tmp, tmp, 0x1E); \ ++ beqz(tmp, L_Okay); \ ++ FLOATEQ(tmp, src, src); \ ++ bnez(tmp, L_Okay); \ ++ mv(dst, zr); \ ++ bind(L_Okay); \ ++} + -+ INSN(rdinstret, CSR_INSTERT); -+ INSN(rdcycle, CSR_CYCLE); -+ INSN(rdtime, CSR_TIME); -+ INSN(frcsr, CSR_FCSR); -+ INSN(frrm, CSR_FRM); -+ INSN(frflags, CSR_FFLAGS); ++FCVT_SAFE(fcvt_w_s, feq_s) ++FCVT_SAFE(fcvt_l_s, feq_s) ++FCVT_SAFE(fcvt_w_d, feq_d) ++FCVT_SAFE(fcvt_l_d, feq_d) + -+#undef INSN ++#undef FCVT_SAFE + -+void MacroAssembler::csrr(Register Rd, unsigned csr) { -+ csrrs(Rd, csr, x0); ++#define FCMP(FLOATTYPE, FLOATSIG) \ ++void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1, \ ++ FloatRegister Rs2, int unordered_result) { \ ++ Label Ldone; \ ++ if (unordered_result < 0) { \ ++ /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */ \ ++ /* installs 1 if gt else 0 */ \ ++ flt_##FLOATSIG(result, Rs2, Rs1); \ ++ /* Rs1 > Rs2, install 1 */ \ ++ bgtz(result, Ldone); \ ++ feq_##FLOATSIG(result, Rs1, Rs2); \ ++ addi(result, result, -1); \ ++ /* Rs1 = Rs2, install 0 */ \ ++ /* NaN or Rs1 < Rs2, install -1 */ \ ++ bind(Ldone); \ ++ } else { \ ++ /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */ \ ++ /* installs 1 if gt or unordered else 0 */ \ ++ flt_##FLOATSIG(result, Rs1, Rs2); \ ++ /* Rs1 < Rs2, install -1 */ \ ++ bgtz(result, Ldone); \ ++ feq_##FLOATSIG(result, Rs1, Rs2); \ ++ addi(result, result, -1); \ ++ /* Rs1 = Rs2, install 0 */ \ ++ /* NaN or Rs1 > Rs2, install 1 */ \ ++ bind(Ldone); \ ++ neg(result, result); \ ++ } \ +} + -+#define INSN(NAME, OPFUN) \ -+ void MacroAssembler::NAME(unsigned csr, Register Rs) { \ -+ OPFUN(x0, csr, Rs); \ -+ } ++FCMP(float, s); ++FCMP(double, d); + -+ INSN(csrw, csrrw); -+ INSN(csrs, csrrs); -+ INSN(csrc, csrrc); ++#undef FCMP + -+#undef INSN ++// Zero words; len is in bytes ++// Destroys all registers except addr ++// len must be a nonzero multiple of wordSize ++void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) { ++ assert_different_registers(addr, len, tmp, t0, t1); + -+#define INSN(NAME, OPFUN) \ -+ void MacroAssembler::NAME(unsigned csr, unsigned imm) { \ -+ OPFUN(x0, csr, imm); \ ++#ifdef ASSERT ++ { ++ Label L; ++ andi(t0, len, BytesPerWord - 1); ++ beqz(t0, L); ++ stop("len is not a multiple of BytesPerWord"); ++ bind(L); + } ++#endif // ASSERT + -+ INSN(csrwi, csrrwi); -+ INSN(csrsi, csrrsi); -+ INSN(csrci, csrrci); -+ -+#undef INSN -+ -+#define INSN(NAME, CSR) \ -+ void MacroAssembler::NAME(Register Rd, Register Rs) { \ -+ csrrw(Rd, CSR, Rs); \ -+ } ++#ifndef PRODUCT ++ block_comment("zero memory"); ++#endif // PRODUCT + -+ INSN(fscsr, CSR_FCSR); -+ INSN(fsrm, CSR_FRM); -+ INSN(fsflags, CSR_FFLAGS); ++ Label loop; ++ Label entry; + -+#undef INSN ++ // Algorithm: ++ // ++ // t0 = cnt & 7 ++ // cnt -= t0 ++ // p += t0 ++ // switch (t0) { ++ // do { ++ // cnt -= 8 ++ // p[-8] = 0 ++ // case 7: ++ // p[-7] = 0 ++ // case 6: ++ // p[-6] = 0 ++ // ... ++ // case 1: ++ // p[-1] = 0 ++ // case 0: ++ // p += 8 ++ // } while (cnt) ++ // } + -+#define INSN(NAME) \ -+ void MacroAssembler::NAME(Register Rs) { \ -+ NAME(x0, Rs); \ -+ } ++ const int unroll = 8; // Number of sd(zr) instructions we'll unroll + -+ INSN(fscsr); -+ INSN(fsrm); -+ INSN(fsflags); -+ -+#undef INSN -+ -+void MacroAssembler::fsrmi(Register Rd, unsigned imm) { -+ guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register"); -+ csrrwi(Rd, CSR_FRM, imm); -+} -+ -+void MacroAssembler::fsflagsi(Register Rd, unsigned imm) { -+ csrrwi(Rd, CSR_FFLAGS, imm); ++ srli(len, len, LogBytesPerWord); ++ andi(t0, len, unroll - 1); // t0 = cnt % unroll ++ sub(len, len, t0); // cnt -= unroll ++ // tmp always points to the end of the region we're about to zero ++ shadd(tmp, t0, addr, t1, LogBytesPerWord); ++ la(t1, entry); ++ slli(t0, t0, 2); ++ sub(t1, t1, t0); ++ jr(t1); ++ bind(loop); ++ sub(len, len, unroll); ++ for (int i = -unroll; i < 0; i++) { ++ Assembler::sd(zr, Address(tmp, i * wordSize)); ++ } ++ bind(entry); ++ add(tmp, tmp, unroll * wordSize); ++ bnez(len, loop); +} + -+#define INSN(NAME) \ -+ void MacroAssembler::NAME(unsigned imm) { \ -+ NAME(x0, imm); \ ++// shift left by shamt and add ++// Rd = (Rs1 << shamt) + Rs2 ++void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) { ++ if (UseZba) { ++ if (shamt == 1) { ++ sh1add(Rd, Rs1, Rs2); ++ return; ++ } else if (shamt == 2) { ++ sh2add(Rd, Rs1, Rs2); ++ return; ++ } else if (shamt == 3) { ++ sh3add(Rd, Rs1, Rs2); ++ return; ++ } + } + -+ INSN(fsrmi); -+ INSN(fsflagsi); ++ if (shamt != 0) { ++ slli(tmp, Rs1, shamt); ++ add(Rd, Rs2, tmp); ++ } else { ++ add(Rd, Rs1, Rs2); ++ } ++} + -+#undef INSN ++void MacroAssembler::zero_extend(Register dst, Register src, int bits) { ++ if (UseZba && bits == 32) { ++ zext_w(dst, src); ++ return; ++ } + -+void MacroAssembler::push_reg(Register Rs) -+{ -+ addi(esp, esp, 0 - wordSize); -+ sd(Rs, Address(esp, 0)); -+} ++ if (UseZbb && bits == 16) { ++ zext_h(dst, src); ++ return; ++ } + -+void MacroAssembler::pop_reg(Register Rd) -+{ -+ ld(Rd, esp, 0); -+ addi(esp, esp, wordSize); ++ if (bits == 8) { ++ zext_b(dst, src); ++ } else { ++ slli(dst, src, XLEN - bits); ++ srli(dst, dst, XLEN - bits); ++ } +} + -+int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) { -+ int count = 0; -+ // Scan bitset to accumulate register pairs -+ for (int reg = 31; reg >= 0; reg--) { -+ if ((1U << 31) & bitset) { -+ regs[count++] = reg; ++void MacroAssembler::sign_extend(Register dst, Register src, int bits) { ++ if (UseZbb) { ++ if (bits == 8) { ++ sext_b(dst, src); ++ return; ++ } else if (bits == 16) { ++ sext_h(dst, src); ++ return; + } -+ bitset <<= 1; + } -+ return count; -+} -+ -+// Push lots of registers in the bit set supplied. Don't push sp. -+// Return the number of words pushed -+int MacroAssembler::push_reg(unsigned int bitset, Register stack) { -+ DEBUG_ONLY(int words_pushed = 0;) -+ CompressibleRegion cr(this); + -+ unsigned char regs[32]; -+ int count = bitset_to_regs(bitset, regs); -+ // reserve one slot to align for odd count -+ int offset = is_even(count) ? 0 : wordSize; ++ if (bits == 32) { ++ sext_w(dst, src); ++ } else { ++ slli(dst, src, XLEN - bits); ++ srai(dst, dst, XLEN - bits); ++ } ++} + -+ if (count) { -+ addi(stack, stack, - count * wordSize - offset); ++void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp) ++{ ++ if (src1 == src2) { ++ mv(dst, zr); ++ return; + } -+ for (int i = count - 1; i >= 0; i--) { -+ sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); -+ DEBUG_ONLY(words_pushed ++;) ++ Label done; ++ Register left = src1; ++ Register right = src2; ++ if (dst == src1) { ++ assert_different_registers(dst, src2, tmp); ++ mv(tmp, src1); ++ left = tmp; ++ } else if (dst == src2) { ++ assert_different_registers(dst, src1, tmp); ++ mv(tmp, src2); ++ right = tmp; + } + -+ assert(words_pushed == count, "oops, pushed != count"); -+ -+ return count; ++ // installs 1 if gt else 0 ++ slt(dst, right, left); ++ bnez(dst, done); ++ slt(dst, left, right); ++ // dst = -1 if lt; else if eq , dst = 0 ++ neg(dst, dst); ++ bind(done); +} + -+int MacroAssembler::pop_reg(unsigned int bitset, Register stack) { -+ DEBUG_ONLY(int words_popped = 0;) -+ CompressibleRegion cr(this); -+ -+ unsigned char regs[32]; -+ int count = bitset_to_regs(bitset, regs); -+ // reserve one slot to align for odd count -+ int offset = is_even(count) ? 0 : wordSize; ++#ifdef COMPILER2 ++// short string ++// StringUTF16.indexOfChar ++// StringLatin1.indexOfChar ++void MacroAssembler::string_indexof_char_short(Register str1, Register cnt1, ++ Register ch, Register result, ++ bool isL) ++{ ++ Register ch1 = t0; ++ Register index = t1; + -+ for (int i = count - 1; i >= 0; i--) { -+ ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); -+ DEBUG_ONLY(words_popped ++;) -+ } ++ BLOCK_COMMENT("string_indexof_char_short {"); + -+ if (count) { -+ addi(stack, stack, count * wordSize + offset); -+ } -+ assert(words_popped == count, "oops, popped != count"); ++ Label LOOP, LOOP1, LOOP4, LOOP8; ++ Label MATCH, MATCH1, MATCH2, MATCH3, ++ MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH; + -+ return count; -+} ++ mv(result, -1); ++ mv(index, zr); + -+// Push float registers in the bitset, except sp. -+// Return the number of heapwords pushed. -+int MacroAssembler::push_fp(unsigned int bitset, Register stack) { -+ CompressibleRegion cr(this); -+ int words_pushed = 0; -+ unsigned char regs[32]; -+ int count = bitset_to_regs(bitset, regs); -+ int push_slots = count + (count & 1); ++ bind(LOOP); ++ addi(t0, index, 8); ++ ble(t0, cnt1, LOOP8); ++ addi(t0, index, 4); ++ ble(t0, cnt1, LOOP4); ++ j(LOOP1); + -+ if (count) { -+ addi(stack, stack, -push_slots * wordSize); -+ } ++ bind(LOOP8); ++ isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); ++ beq(ch, ch1, MATCH); ++ isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); ++ beq(ch, ch1, MATCH1); ++ isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); ++ beq(ch, ch1, MATCH2); ++ isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); ++ beq(ch, ch1, MATCH3); ++ isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8)); ++ beq(ch, ch1, MATCH4); ++ isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10)); ++ beq(ch, ch1, MATCH5); ++ isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12)); ++ beq(ch, ch1, MATCH6); ++ isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14)); ++ beq(ch, ch1, MATCH7); ++ addi(index, index, 8); ++ addi(str1, str1, isL ? 8 : 16); ++ blt(index, cnt1, LOOP); ++ j(NOMATCH); + -+ for (int i = count - 1; i >= 0; i--) { -+ fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize)); -+ words_pushed++; -+ } ++ bind(LOOP4); ++ isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); ++ beq(ch, ch1, MATCH); ++ isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); ++ beq(ch, ch1, MATCH1); ++ isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); ++ beq(ch, ch1, MATCH2); ++ isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); ++ beq(ch, ch1, MATCH3); ++ addi(index, index, 4); ++ addi(str1, str1, isL ? 4 : 8); ++ bge(index, cnt1, NOMATCH); + -+ assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count); -+ return count; -+} ++ bind(LOOP1); ++ isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1)); ++ beq(ch, ch1, MATCH); ++ addi(index, index, 1); ++ addi(str1, str1, isL ? 1 : 2); ++ blt(index, cnt1, LOOP1); ++ j(NOMATCH); + -+int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { -+ CompressibleRegion cr(this); -+ int words_popped = 0; -+ unsigned char regs[32]; -+ int count = bitset_to_regs(bitset, regs); -+ int pop_slots = count + (count & 1); ++ bind(MATCH1); ++ addi(index, index, 1); ++ j(MATCH); + -+ for (int i = count - 1; i >= 0; i--) { -+ fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize)); -+ words_popped++; -+ } ++ bind(MATCH2); ++ addi(index, index, 2); ++ j(MATCH); + -+ if (count) { -+ addi(stack, stack, pop_slots * wordSize); -+ } ++ bind(MATCH3); ++ addi(index, index, 3); ++ j(MATCH); + -+ assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count); -+ return count; -+} ++ bind(MATCH4); ++ addi(index, index, 4); ++ j(MATCH); + -+#ifdef COMPILER2 -+int MacroAssembler::push_vp(unsigned int bitset, Register stack) { -+ CompressibleRegion cr(this); -+ int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); ++ bind(MATCH5); ++ addi(index, index, 5); ++ j(MATCH); + -+ // Scan bitset to accumulate register pairs -+ unsigned char regs[32]; -+ int count = 0; -+ for (int reg = 31; reg >= 0; reg--) { -+ if ((1U << 31) & bitset) { -+ regs[count++] = reg; -+ } -+ bitset <<= 1; -+ } ++ bind(MATCH6); ++ addi(index, index, 6); ++ j(MATCH); + -+ for (int i = 0; i < count; i++) { -+ sub(stack, stack, vector_size_in_bytes); -+ vs1r_v(as_VectorRegister(regs[i]), stack); -+ } ++ bind(MATCH7); ++ addi(index, index, 7); + -+ return count * vector_size_in_bytes / wordSize; ++ bind(MATCH); ++ mv(result, index); ++ bind(NOMATCH); ++ BLOCK_COMMENT("} string_indexof_char_short"); +} + -+int MacroAssembler::pop_vp(unsigned int bitset, Register stack) { -+ CompressibleRegion cr(this); -+ int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); ++// StringUTF16.indexOfChar ++// StringLatin1.indexOfChar ++void MacroAssembler::string_indexof_char(Register str1, Register cnt1, ++ Register ch, Register result, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ bool isL) ++{ ++ Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG; ++ Register ch1 = t0; ++ Register orig_cnt = t1; ++ Register mask1 = tmp3; ++ Register mask2 = tmp2; ++ Register match_mask = tmp1; ++ Register trailing_char = tmp4; ++ Register unaligned_elems = tmp4; + -+ // Scan bitset to accumulate register pairs -+ unsigned char regs[32]; -+ int count = 0; -+ for (int reg = 31; reg >= 0; reg--) { -+ if ((1U << 31) & bitset) { -+ regs[count++] = reg; ++ BLOCK_COMMENT("string_indexof_char {"); ++ beqz(cnt1, NOMATCH); ++ ++ addi(t0, cnt1, isL ? -32 : -16); ++ bgtz(t0, DO_LONG); ++ string_indexof_char_short(str1, cnt1, ch, result, isL); ++ j(DONE); ++ ++ bind(DO_LONG); ++ mv(orig_cnt, cnt1); ++ if (AvoidUnalignedAccesses) { ++ Label ALIGNED; ++ andi(unaligned_elems, str1, 0x7); ++ beqz(unaligned_elems, ALIGNED); ++ sub(unaligned_elems, unaligned_elems, 8); ++ neg(unaligned_elems, unaligned_elems); ++ if (!isL) { ++ srli(unaligned_elems, unaligned_elems, 1); + } -+ bitset <<= 1; ++ // do unaligned part per element ++ string_indexof_char_short(str1, unaligned_elems, ch, result, isL); ++ bgez(result, DONE); ++ mv(orig_cnt, cnt1); ++ sub(cnt1, cnt1, unaligned_elems); ++ bind(ALIGNED); + } + -+ for (int i = count - 1; i >= 0; i--) { -+ vl1r_v(as_VectorRegister(regs[i]), stack); -+ add(stack, stack, vector_size_in_bytes); ++ // duplicate ch ++ if (isL) { ++ slli(ch1, ch, 8); ++ orr(ch, ch1, ch); + } ++ slli(ch1, ch, 16); ++ orr(ch, ch1, ch); ++ slli(ch1, ch, 32); ++ orr(ch, ch1, ch); + -+ return count * vector_size_in_bytes / wordSize; -+} -+#endif // COMPILER2 ++ if (!isL) { ++ slli(cnt1, cnt1, 1); ++ } + -+void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) { -+ CompressibleRegion cr(this); -+ // Push integer registers x7, x10-x17, x28-x31. -+ push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); ++ uint64_t mask0101 = UCONST64(0x0101010101010101); ++ uint64_t mask0001 = UCONST64(0x0001000100010001); ++ mv(mask1, isL ? mask0101 : mask0001); ++ uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f); ++ uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff); ++ mv(mask2, isL ? mask7f7f : mask7fff); + -+ // Push float registers f0-f7, f10-f17, f28-f31. -+ addi(sp, sp, - wordSize * 20); -+ int offset = 0; -+ for (int i = 0; i < 32; i++) { -+ if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { -+ fsd(as_FloatRegister(i), Address(sp, wordSize * (offset ++))); -+ } -+ } -+} ++ bind(CH1_LOOP); ++ ld(ch1, Address(str1)); ++ addi(str1, str1, 8); ++ addi(cnt1, cnt1, -8); ++ compute_match_mask(ch1, ch, match_mask, mask1, mask2); ++ bnez(match_mask, HIT); ++ bgtz(cnt1, CH1_LOOP); ++ j(NOMATCH); + -+void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) { -+ CompressibleRegion cr(this); -+ int offset = 0; -+ for (int i = 0; i < 32; i++) { -+ if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { -+ fld(as_FloatRegister(i), Address(sp, wordSize * (offset ++))); -+ } ++ bind(HIT); ++ ctzc_bit(trailing_char, match_mask, isL, ch1, result); ++ srli(trailing_char, trailing_char, 3); ++ addi(cnt1, cnt1, 8); ++ ble(cnt1, trailing_char, NOMATCH); ++ // match case ++ if (!isL) { ++ srli(cnt1, cnt1, 1); ++ srli(trailing_char, trailing_char, 1); + } -+ addi(sp, sp, wordSize * 20); + -+ pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); -+} ++ sub(result, orig_cnt, cnt1); ++ add(result, result, trailing_char); ++ j(DONE); + -+// Push all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4). -+void MacroAssembler::pusha() { -+ CompressibleRegion cr(this); -+ push_reg(0xffffffe2, sp); -+} ++ bind(NOMATCH); ++ mv(result, -1); + -+// Pop all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4). -+void MacroAssembler::popa() { -+ CompressibleRegion cr(this); -+ pop_reg(0xffffffe2, sp); ++ bind(DONE); ++ BLOCK_COMMENT("} string_indexof_char"); +} + -+void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) { -+ CompressibleRegion cr(this); -+ // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) -+ push_reg(0xffffffe0, sp); ++typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp); + -+ // float registers -+ addi(sp, sp, - 32 * wordSize); -+ for (int i = 0; i < 32; i++) { -+ fsd(as_FloatRegister(i), Address(sp, i * wordSize)); -+ } ++// Search for needle in haystack and return index or -1 ++// x10: result ++// x11: haystack ++// x12: haystack_len ++// x13: needle ++// x14: needle_len ++void MacroAssembler::string_indexof(Register haystack, Register needle, ++ Register haystack_len, Register needle_len, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ Register tmp5, Register tmp6, ++ Register result, int ae) ++{ ++ assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); + -+ // vector registers -+ if (save_vectors) { -+ sub(sp, sp, vector_size_in_bytes * VectorRegisterImpl::number_of_registers); -+ vsetvli(t0, x0, Assembler::e64, Assembler::m8); -+ for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) { -+ add(t0, sp, vector_size_in_bytes * i); -+ vse64_v(as_VectorRegister(i), t0); -+ } -+ } -+} ++ Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH; + -+void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) { -+ CompressibleRegion cr(this); -+ // vector registers -+ if (restore_vectors) { -+ vsetvli(t0, x0, Assembler::e64, Assembler::m8); -+ for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) { -+ vle64_v(as_VectorRegister(i), sp); -+ add(sp, sp, vector_size_in_bytes * 8); -+ } -+ } ++ Register ch1 = t0; ++ Register ch2 = t1; ++ Register nlen_tmp = tmp1; // needle len tmp ++ Register hlen_tmp = tmp2; // haystack len tmp ++ Register result_tmp = tmp4; + -+ // float registers -+ for (int i = 0; i < 32; i++) { -+ fld(as_FloatRegister(i), Address(sp, i * wordSize)); -+ } -+ addi(sp, sp, 32 * wordSize); ++ bool isLL = ae == StrIntrinsicNode::LL; + -+ // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) -+ pop_reg(0xffffffe0, sp); -+} ++ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; ++ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; ++ int needle_chr_shift = needle_isL ? 0 : 1; ++ int haystack_chr_shift = haystack_isL ? 0 : 1; ++ int needle_chr_size = needle_isL ? 1 : 2; ++ int haystack_chr_size = haystack_isL ? 1 : 2; ++ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : ++ (load_chr_insn)&MacroAssembler::lhu; ++ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : ++ (load_chr_insn)&MacroAssembler::lhu; + -+static int patch_offset_in_jal(address branch, int64_t offset) { -+ assert(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction!\n"); -+ Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1); // offset[20] ==> branch[31] -+ Assembler::patch(branch, 30, 21, (offset >> 1) & 0x3ff); // offset[10:1] ==> branch[30:21] -+ Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1); // offset[11] ==> branch[20] -+ Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff); // offset[19:12] ==> branch[19:12] -+ return NativeInstruction::instruction_size; // only one instruction -+} ++ BLOCK_COMMENT("string_indexof {"); + -+static int patch_offset_in_conditional_branch(address branch, int64_t offset) { -+ assert(is_imm_in_range(offset, 12, 1), "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne insrusction!\n"); -+ Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1); // offset[12] ==> branch[31] -+ Assembler::patch(branch, 30, 25, (offset >> 5) & 0x3f); // offset[10:5] ==> branch[30:25] -+ Assembler::patch(branch, 7, 7, (offset >> 11) & 0x1); // offset[11] ==> branch[7] -+ Assembler::patch(branch, 11, 8, (offset >> 1) & 0xf); // offset[4:1] ==> branch[11:8] -+ return NativeInstruction::instruction_size; // only one instruction -+} ++ // Note, inline_string_indexOf() generates checks: ++ // if (pattern.count > src.count) return -1; ++ // if (pattern.count == 0) return 0; + -+static int patch_offset_in_pc_relative(address branch, int64_t offset) { -+ const int PC_RELATIVE_INSTRUCTION_NUM = 2; // auipc, addi/jalr/load -+ Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff); // Auipc. offset[31:12] ==> branch[31:12] -+ Assembler::patch(branch + 4, 31, 20, offset & 0xfff); // Addi/Jalr/Load. offset[11:0] ==> branch[31:20] -+ return PC_RELATIVE_INSTRUCTION_NUM * NativeInstruction::instruction_size; -+} ++ // We have two strings, a source string in haystack, haystack_len and a pattern string ++ // in needle, needle_len. Find the first occurence of pattern in source or return -1. + -+static int patch_addr_in_movptr(address branch, address target) { -+ const int MOVPTR_INSTRUCTIONS_NUM = 6; // lui + addi + slli + addi + slli + addi/jalr/load -+ int32_t lower = ((intptr_t)target << 36) >> 36; -+ int64_t upper = ((intptr_t)target - lower) >> 28; -+ Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[47:28] + target[27] ==> branch[31:12] -+ Assembler::patch(branch + 4, 31, 20, (lower >> 16) & 0xfff); // Addi. target[27:16] ==> branch[31:20] -+ Assembler::patch(branch + 12, 31, 20, (lower >> 5) & 0x7ff); // Addi. target[15: 5] ==> branch[31:20] -+ Assembler::patch(branch + 20, 31, 20, lower & 0x1f); // Addi/Jalr/Load. target[ 4: 0] ==> branch[31:20] -+ return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; -+} ++ // For larger pattern and source we use a simplified Boyer Moore algorithm. ++ // With a small pattern and source we use linear scan. + -+static int patch_imm_in_li64(address branch, address target) { -+ const int LI64_INSTRUCTIONS_NUM = 8; // lui + addi + slli + addi + slli + addi + slli + addi -+ int64_t lower = (intptr_t)target & 0xffffffff; -+ lower = lower - ((lower << 44) >> 44); -+ int64_t tmp_imm = ((uint64_t)((intptr_t)target & 0xffffffff00000000)) + (uint64_t)lower; -+ int32_t upper = (tmp_imm - (int32_t)lower) >> 32; -+ int64_t tmp_upper = upper, tmp_lower = upper; -+ tmp_lower = (tmp_lower << 52) >> 52; -+ tmp_upper -= tmp_lower; -+ tmp_upper >>= 12; -+ // Load upper 32 bits. Upper = target[63:32], but if target[31] = 1 or (target[31:28] == 0x7ff && target[19] == 1), -+ // upper = target[63:32] + 1. -+ Assembler::patch(branch + 0, 31, 12, tmp_upper & 0xfffff); // Lui. -+ Assembler::patch(branch + 4, 31, 20, tmp_lower & 0xfff); // Addi. -+ // Load the rest 32 bits. -+ Assembler::patch(branch + 12, 31, 20, ((int32_t)lower >> 20) & 0xfff); // Addi. -+ Assembler::patch(branch + 20, 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff); // Addi. -+ Assembler::patch(branch + 28, 31, 20, (intptr_t)target & 0xff); // Addi. -+ return LI64_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; -+} ++ // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm. ++ sub(result_tmp, haystack_len, needle_len); ++ // needle_len < 8, use linear scan ++ sub(t0, needle_len, 8); ++ bltz(t0, LINEARSEARCH); ++ // needle_len >= 256, use linear scan ++ sub(t0, needle_len, 256); ++ bgez(t0, LINEARSTUB); ++ // needle_len >= haystack_len/4, use linear scan ++ srli(t0, haystack_len, 2); ++ bge(needle_len, t0, LINEARSTUB); + -+static int patch_imm_in_li32(address branch, int32_t target) { -+ const int LI32_INSTRUCTIONS_NUM = 2; // lui + addiw -+ int64_t upper = (intptr_t)target; -+ int32_t lower = (((int32_t)target) << 20) >> 20; -+ upper -= lower; -+ upper = (int32_t)upper; -+ Assembler::patch(branch + 0, 31, 12, (upper >> 12) & 0xfffff); // Lui. -+ Assembler::patch(branch + 4, 31, 20, lower & 0xfff); // Addiw. -+ return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; -+} ++ // Boyer-Moore-Horspool introduction: ++ // The Boyer Moore alogorithm is based on the description here:- ++ // ++ // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm ++ // ++ // This describes and algorithm with 2 shift rules. The 'Bad Character' rule ++ // and the 'Good Suffix' rule. ++ // ++ // These rules are essentially heuristics for how far we can shift the ++ // pattern along the search string. ++ // ++ // The implementation here uses the 'Bad Character' rule only because of the ++ // complexity of initialisation for the 'Good Suffix' rule. ++ // ++ // This is also known as the Boyer-Moore-Horspool algorithm: ++ // ++ // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm ++ // ++ // #define ASIZE 256 ++ // ++ // int bm(unsigned char *pattern, int m, unsigned char *src, int n) { ++ // int i, j; ++ // unsigned c; ++ // unsigned char bc[ASIZE]; ++ // ++ // /* Preprocessing */ ++ // for (i = 0; i < ASIZE; ++i) ++ // bc[i] = m; ++ // for (i = 0; i < m - 1; ) { ++ // c = pattern[i]; ++ // ++i; ++ // // c < 256 for Latin1 string, so, no need for branch ++ // #ifdef PATTERN_STRING_IS_LATIN1 ++ // bc[c] = m - i; ++ // #else ++ // if (c < ASIZE) bc[c] = m - i; ++ // #endif ++ // } ++ // ++ // /* Searching */ ++ // j = 0; ++ // while (j <= n - m) { ++ // c = src[i+j]; ++ // if (pattern[m-1] == c) ++ // int k; ++ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); ++ // if (k < 0) return j; ++ // // c < 256 for Latin1 string, so, no need for branch ++ // #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1 ++ // // LL case: (c< 256) always true. Remove branch ++ // j += bc[pattern[j+m-1]]; ++ // #endif ++ // #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF ++ // // UU case: need if (c if not. ++ // if (c < ASIZE) ++ // j += bc[pattern[j+m-1]]; ++ // else ++ // j += m ++ // #endif ++ // } ++ // return -1; ++ // } + -+static long get_offset_of_jal(address insn_addr) { -+ assert_cond(insn_addr != NULL); -+ long offset = 0; -+ unsigned insn = *(unsigned*)insn_addr; -+ long val = (long)Assembler::sextract(insn, 31, 12); -+ offset |= ((val >> 19) & 0x1) << 20; -+ offset |= (val & 0xff) << 12; -+ offset |= ((val >> 8) & 0x1) << 11; -+ offset |= ((val >> 9) & 0x3ff) << 1; -+ offset = (offset << 43) >> 43; -+ return offset; -+} ++ // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result ++ Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH, ++ BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP; + -+static long get_offset_of_conditional_branch(address insn_addr) { -+ long offset = 0; -+ assert_cond(insn_addr != NULL); -+ unsigned insn = *(unsigned*)insn_addr; -+ offset = (long)Assembler::sextract(insn, 31, 31); -+ offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11); -+ offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5); -+ offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1); -+ offset = (offset << 41) >> 41; -+ return offset; -+} ++ Register haystack_end = haystack_len; ++ Register skipch = tmp2; + -+static long get_offset_of_pc_relative(address insn_addr) { -+ long offset = 0; -+ assert_cond(insn_addr != NULL); -+ offset = ((long)(Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12))) << 12; // Auipc. -+ offset += ((long)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)); // Addi/Jalr/Load. -+ offset = (offset << 32) >> 32; -+ return offset; -+} ++ // pattern length is >=8, so, we can read at least 1 register for cases when ++ // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for ++ // UL case. We'll re-read last character in inner pre-loop code to have ++ // single outer pre-loop load ++ const int firstStep = isLL ? 7 : 3; + -+static address get_target_of_movptr(address insn_addr) { -+ assert_cond(insn_addr != NULL); -+ intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 28; // Lui. -+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 16; // Addi. -+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 5; // Addi. -+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)); // Addi/Jalr/Load. -+ return (address) target_address; -+} ++ const int ASIZE = 256; ++ const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd) + -+static address get_target_of_li64(address insn_addr) { -+ assert_cond(insn_addr != NULL); -+ intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 44; // Lui. -+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 32; // Addi. -+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 20; // Addi. -+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)) << 8; // Addi. -+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[7], 31, 20)); // Addi. -+ return (address)target_address; -+} ++ sub(sp, sp, ASIZE); + -+static address get_target_of_li32(address insn_addr) { -+ assert_cond(insn_addr != NULL); -+ intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 12; // Lui. -+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)); // Addiw. -+ return (address)target_address; -+} ++ // init BC offset table with default value: needle_len ++ slli(t0, needle_len, 8); ++ orr(t0, t0, needle_len); // [63...16][needle_len][needle_len] ++ slli(tmp1, t0, 16); ++ orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len] ++ slli(tmp1, t0, 32); ++ orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len] + -+// Patch any kind of instruction; there may be several instructions. -+// Return the total length (in bytes) of the instructions. -+int MacroAssembler::pd_patch_instruction_size(address branch, address target) { -+ assert_cond(branch != NULL); -+ int64_t offset = target - branch; -+ if (NativeInstruction::is_jal_at(branch)) { // jal -+ return patch_offset_in_jal(branch, offset); -+ } else if (NativeInstruction::is_branch_at(branch)) { // beq/bge/bgeu/blt/bltu/bne -+ return patch_offset_in_conditional_branch(branch, offset); -+ } else if (NativeInstruction::is_pc_relative_at(branch)) { // auipc, addi/jalr/load -+ return patch_offset_in_pc_relative(branch, offset); -+ } else if (NativeInstruction::is_movptr_at(branch)) { // movptr -+ return patch_addr_in_movptr(branch, target); -+ } else if (NativeInstruction::is_li64_at(branch)) { // li64 -+ return patch_imm_in_li64(branch, target); -+ } else if (NativeInstruction::is_li32_at(branch)) { // li32 -+ int64_t imm = (intptr_t)target; -+ return patch_imm_in_li32(branch, (int32_t)imm); -+ } else { -+#ifdef ASSERT -+ tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n", -+ *(unsigned*)branch, p2i(branch)); -+ Disassembler::decode(branch - 16, branch + 16); -+#endif -+ ShouldNotReachHere(); -+ return -1; ++ mv(ch1, sp); // ch1 is t0 ++ mv(tmp6, ASIZE / STORE_BYTES); // loop iterations ++ ++ bind(BM_INIT_LOOP); ++ // for (i = 0; i < ASIZE; ++i) ++ // bc[i] = m; ++ for (int i = 0; i < 4; i++) { ++ sd(tmp5, Address(ch1, i * wordSize)); + } -+} ++ add(ch1, ch1, 32); ++ sub(tmp6, tmp6, 4); ++ bgtz(tmp6, BM_INIT_LOOP); + -+address MacroAssembler::target_addr_for_insn(address insn_addr) { -+ long offset = 0; -+ assert_cond(insn_addr != NULL); -+ if (NativeInstruction::is_jal_at(insn_addr)) { // jal -+ offset = get_offset_of_jal(insn_addr); -+ } else if (NativeInstruction::is_branch_at(insn_addr)) { // beq/bge/bgeu/blt/bltu/bne -+ offset = get_offset_of_conditional_branch(insn_addr); -+ } else if (NativeInstruction::is_pc_relative_at(insn_addr)) { // auipc, addi/jalr/load -+ offset = get_offset_of_pc_relative(insn_addr); -+ } else if (NativeInstruction::is_movptr_at(insn_addr)) { // movptr -+ return get_target_of_movptr(insn_addr); -+ } else if (NativeInstruction::is_li64_at(insn_addr)) { // li64 -+ return get_target_of_li64(insn_addr); -+ } else if (NativeInstruction::is_li32_at(insn_addr)) { // li32 -+ return get_target_of_li32(insn_addr); ++ sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern ++ Register orig_haystack = tmp5; ++ mv(orig_haystack, haystack); ++ // result_tmp = tmp4 ++ shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift); ++ sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1 ++ mv(tmp3, needle); ++ ++ // for (i = 0; i < m - 1; ) { ++ // c = pattern[i]; ++ // ++i; ++ // // c < 256 for Latin1 string, so, no need for branch ++ // #ifdef PATTERN_STRING_IS_LATIN1 ++ // bc[c] = m - i; ++ // #else ++ // if (c < ASIZE) bc[c] = m - i; ++ // #endif ++ // } ++ bind(BCLOOP); ++ (this->*needle_load_1chr)(ch1, Address(tmp3), noreg); ++ add(tmp3, tmp3, needle_chr_size); ++ if (!needle_isL) { ++ // ae == StrIntrinsicNode::UU ++ mv(tmp6, ASIZE); ++ bgeu(ch1, tmp6, BCSKIP); ++ } ++ add(tmp4, sp, ch1); ++ sb(ch2, Address(tmp4)); // store skip offset to BC offset table ++ ++ bind(BCSKIP); ++ sub(ch2, ch2, 1); // for next pattern element, skip distance -1 ++ bgtz(ch2, BCLOOP); ++ ++ // tmp6: pattern end, address after needle ++ shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift); ++ if (needle_isL == haystack_isL) { ++ // load last 8 bytes (8LL/4UU symbols) ++ ld(tmp6, Address(tmp6, -wordSize)); + } else { -+ ShouldNotReachHere(); ++ // UL: from UTF-16(source) search Latin1(pattern) ++ lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols) ++ // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d ++ // We'll have to wait until load completed, but it's still faster than per-character loads+checks ++ srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a ++ slli(ch2, tmp6, XLEN - 24); ++ srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b ++ slli(ch1, tmp6, XLEN - 16); ++ srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c ++ andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d ++ slli(ch2, ch2, 16); ++ orr(ch2, ch2, ch1); // 0x00000b0c ++ slli(result, tmp3, 48); // use result as temp register ++ orr(tmp6, tmp6, result); // 0x0a00000d ++ slli(result, ch2, 16); ++ orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d + } -+ return address(((uintptr_t)insn_addr + offset)); -+} + -+int MacroAssembler::patch_oop(address insn_addr, address o) { -+ // OOPs are either narrow (32 bits) or wide (48 bits). We encode -+ // narrow OOPs by setting the upper 16 bits in the first -+ // instruction. -+ if (NativeInstruction::is_li32_at(insn_addr)) { -+ // Move narrow OOP -+ uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o)); -+ return patch_imm_in_li32(insn_addr, (int32_t)n); -+ } else if (NativeInstruction::is_movptr_at(insn_addr)) { -+ // Move wide OOP -+ return patch_addr_in_movptr(insn_addr, o); ++ // i = m - 1; ++ // skipch = j + i; ++ // if (skipch == pattern[m - 1] ++ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); ++ // else ++ // move j with bad char offset table ++ bind(BMLOOPSTR2); ++ // compare pattern to source string backward ++ shadd(result, nlen_tmp, haystack, result, haystack_chr_shift); ++ (this->*haystack_load_1chr)(skipch, Address(result), noreg); ++ sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8 ++ if (needle_isL == haystack_isL) { ++ // re-init tmp3. It's for free because it's executed in parallel with ++ // load above. Alternative is to initialize it before loop, but it'll ++ // affect performance on in-order systems with 2 or more ld/st pipelines ++ srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1] ++ } ++ if (!isLL) { // UU/UL case ++ slli(ch2, nlen_tmp, 1); // offsets in bytes ++ } ++ bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char ++ add(result, haystack, isLL ? nlen_tmp : ch2); ++ ld(ch2, Address(result)); // load 8 bytes from source string ++ mv(ch1, tmp6); ++ if (isLL) { ++ j(BMLOOPSTR1_AFTER_LOAD); ++ } else { ++ sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8 ++ j(BMLOOPSTR1_CMP); + } -+ ShouldNotReachHere(); -+ return -1; -+} + -+void MacroAssembler::reinit_heapbase() { -+ if (UseCompressedOops) { -+ if (Universe::is_fully_initialized()) { -+ mv(xheapbase, CompressedOops::ptrs_base()); ++ bind(BMLOOPSTR1); ++ shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift); ++ (this->*needle_load_1chr)(ch1, Address(ch1), noreg); ++ shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift); ++ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); ++ ++ bind(BMLOOPSTR1_AFTER_LOAD); ++ sub(nlen_tmp, nlen_tmp, 1); ++ bltz(nlen_tmp, BMLOOPSTR1_LASTCMP); ++ ++ bind(BMLOOPSTR1_CMP); ++ beq(ch1, ch2, BMLOOPSTR1); ++ ++ bind(BMSKIP); ++ if (!isLL) { ++ // if we've met UTF symbol while searching Latin1 pattern, then we can ++ // skip needle_len symbols ++ if (needle_isL != haystack_isL) { ++ mv(result_tmp, needle_len); + } else { -+ int32_t offset = 0; -+ la_patchable(xheapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()), offset); -+ ld(xheapbase, Address(xheapbase, offset)); ++ mv(result_tmp, 1); + } ++ mv(t0, ASIZE); ++ bgeu(skipch, t0, BMADV); + } -+} ++ add(result_tmp, sp, skipch); ++ lbu(result_tmp, Address(result_tmp)); // load skip offset + -+void MacroAssembler::mv(Register Rd, Address dest) { -+ assert(dest.getMode() == Address::literal, "Address mode should be Address::literal"); -+ code_section()->relocate(pc(), dest.rspec()); -+ movptr(Rd, dest.target()); -+} ++ bind(BMADV); ++ sub(nlen_tmp, needle_len, 1); ++ // move haystack after bad char skip offset ++ shadd(haystack, result_tmp, haystack, result, haystack_chr_shift); ++ ble(haystack, haystack_end, BMLOOPSTR2); ++ add(sp, sp, ASIZE); ++ j(NOMATCH); + -+void MacroAssembler::mv(Register Rd, address addr) { -+ // Here in case of use with relocation, use fix length instruciton -+ // movptr instead of li -+ movptr(Rd, addr); -+} ++ bind(BMLOOPSTR1_LASTCMP); ++ bne(ch1, ch2, BMSKIP); + -+void MacroAssembler::mv(Register Rd, RegisterOrConstant src) { -+ if (src.is_register()) { -+ mv(Rd, src.as_register()); ++ bind(BMMATCH); ++ sub(result, haystack, orig_haystack); ++ if (!haystack_isL) { ++ srli(result, result, 1); ++ } ++ add(sp, sp, ASIZE); ++ j(DONE); ++ ++ bind(LINEARSTUB); ++ sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm ++ bltz(t0, LINEARSEARCH); ++ mv(result, zr); ++ RuntimeAddress stub = NULL; ++ if (isLL) { ++ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll()); ++ assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated"); ++ } else if (needle_isL) { ++ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul()); ++ assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated"); + } else { -+ mv(Rd, src.as_constant()); ++ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu()); ++ assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated"); + } -+} ++ trampoline_call(stub); ++ j(DONE); + -+void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) { -+ andr(Rd, Rs1, Rs2); -+ // addw: The result is clipped to 32 bits, then the sign bit is extended, -+ // and the result is stored in Rd -+ addw(Rd, Rd, zr); -+} ++ bind(NOMATCH); ++ mv(result, -1); ++ j(DONE); + -+void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) { -+ orr(Rd, Rs1, Rs2); -+ // addw: The result is clipped to 32 bits, then the sign bit is extended, -+ // and the result is stored in Rd -+ addw(Rd, Rd, zr); -+} ++ bind(LINEARSEARCH); ++ string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae); + -+void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) { -+ xorr(Rd, Rs1, Rs2); -+ // addw: The result is clipped to 32 bits, then the sign bit is extended, -+ // and the result is stored in Rd -+ addw(Rd, Rd, zr); ++ bind(DONE); ++ BLOCK_COMMENT("} string_indexof"); +} + -+// Note: load_unsigned_short used to be called load_unsigned_word. -+int MacroAssembler::load_unsigned_short(Register dst, Address src) { -+ int off = offset(); -+ lhu(dst, src); -+ return off; -+} ++// string_indexof ++// result: x10 ++// src: x11 ++// src_count: x12 ++// pattern: x13 ++// pattern_count: x14 or 1/2/3/4 ++void MacroAssembler::string_indexof_linearscan(Register haystack, Register needle, ++ Register haystack_len, Register needle_len, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ int needle_con_cnt, Register result, int ae) ++{ ++ // Note: ++ // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant ++ // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1 ++ assert(needle_con_cnt <= 4, "Invalid needle constant count"); ++ assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); + -+int MacroAssembler::load_unsigned_byte(Register dst, Address src) { -+ int off = offset(); -+ lbu(dst, src); -+ return off; -+} ++ Register ch1 = t0; ++ Register ch2 = t1; ++ Register hlen_neg = haystack_len, nlen_neg = needle_len; ++ Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4; + -+int MacroAssembler::load_signed_short(Register dst, Address src) { -+ int off = offset(); -+ lh(dst, src); -+ return off; -+} ++ bool isLL = ae == StrIntrinsicNode::LL; + -+int MacroAssembler::load_signed_byte(Register dst, Address src) { -+ int off = offset(); -+ lb(dst, src); -+ return off; -+} ++ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; ++ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; ++ int needle_chr_shift = needle_isL ? 0 : 1; ++ int haystack_chr_shift = haystack_isL ? 0 : 1; ++ int needle_chr_size = needle_isL ? 1 : 2; ++ int haystack_chr_size = haystack_isL ? 1 : 2; + -+void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { -+ switch (size_in_bytes) { -+ case 8: ld(dst, src); break; -+ case 4: is_signed ? lw(dst, src) : lwu(dst, src); break; -+ case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; -+ case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; -+ default: ShouldNotReachHere(); -+ } -+} ++ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : ++ (load_chr_insn)&MacroAssembler::lhu; ++ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : ++ (load_chr_insn)&MacroAssembler::lhu; ++ load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu; ++ load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld; + -+void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { -+ switch (size_in_bytes) { -+ case 8: sd(src, dst); break; -+ case 4: sw(src, dst); break; -+ case 2: sh(src, dst); break; -+ case 1: sb(src, dst); break; -+ default: ShouldNotReachHere(); -+ } -+} ++ Label DO1, DO2, DO3, MATCH, NOMATCH, DONE; + -+// reverse bytes in halfword in lower 16 bits and sign-extend -+// Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits) -+void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) { -+ if (UseRVB) { -+ rev8(Rd, Rs); -+ srai(Rd, Rd, 48); -+ return; -+ } -+ assert_different_registers(Rs, tmp); -+ assert_different_registers(Rd, tmp); -+ srli(tmp, Rs, 8); -+ andi(tmp, tmp, 0xFF); -+ slli(Rd, Rs, 56); -+ srai(Rd, Rd, 48); // sign-extend -+ orr(Rd, Rd, tmp); -+} ++ Register first = tmp3; + -+// reverse bytes in lower word and sign-extend -+// Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits) -+void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { -+ if (UseRVB) { -+ rev8(Rd, Rs); -+ srai(Rd, Rd, 32); -+ return; -+ } -+ assert_different_registers(Rs, tmp1, tmp2); -+ assert_different_registers(Rd, tmp1, tmp2); -+ revb_h_w_u(Rd, Rs, tmp1, tmp2); -+ slli(tmp2, Rd, 48); -+ srai(tmp2, tmp2, 32); // sign-extend -+ srli(Rd, Rd, 16); -+ orr(Rd, Rd, tmp2); -+} ++ if (needle_con_cnt == -1) { ++ Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT; + -+// reverse bytes in halfword in lower 16 bits and zero-extend -+// Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits) -+void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) { -+ if (UseRVB) { -+ rev8(Rd, Rs); -+ srli(Rd, Rd, 48); -+ return; -+ } -+ assert_different_registers(Rs, tmp); -+ assert_different_registers(Rd, tmp); -+ srli(tmp, Rs, 8); -+ andi(tmp, tmp, 0xFF); -+ andi(Rd, Rs, 0xFF); -+ slli(Rd, Rd, 8); -+ orr(Rd, Rd, tmp); -+} ++ sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2); ++ bltz(t0, DOSHORT); + -+// reverse bytes in halfwords in lower 32 bits and zero-extend -+// Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits) -+void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) { -+ if (UseRVB) { -+ rev8(Rd, Rs); -+ rori(Rd, Rd, 32); -+ roriw(Rd, Rd, 16); -+ zext_w(Rd, Rd); -+ return; -+ } -+ assert_different_registers(Rs, tmp1, tmp2); -+ assert_different_registers(Rd, tmp1, tmp2); -+ srli(tmp2, Rs, 16); -+ revb_h_h_u(tmp2, tmp2, tmp1); -+ revb_h_h_u(Rd, Rs, tmp1); -+ slli(tmp2, tmp2, 16); -+ orr(Rd, Rd, tmp2); -+} ++ (this->*needle_load_1chr)(first, Address(needle), noreg); ++ slli(t0, needle_len, needle_chr_shift); ++ add(needle, needle, t0); ++ neg(nlen_neg, t0); ++ slli(t0, result_tmp, haystack_chr_shift); ++ add(haystack, haystack, t0); ++ neg(hlen_neg, t0); + -+// This method is only used for revb_h -+// Rd = Rs[47:0] Rs[55:48] Rs[63:56] -+void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) { -+ assert_different_registers(Rs, tmp1, tmp2); -+ assert_different_registers(Rd, tmp1); -+ srli(tmp1, Rs, 48); -+ andi(tmp2, tmp1, 0xFF); -+ slli(tmp2, tmp2, 8); -+ srli(tmp1, tmp1, 8); -+ orr(tmp1, tmp1, tmp2); -+ slli(Rd, Rs, 16); -+ orr(Rd, Rd, tmp1); -+} ++ bind(FIRST_LOOP); ++ add(t0, haystack, hlen_neg); ++ (this->*haystack_load_1chr)(ch2, Address(t0), noreg); ++ beq(first, ch2, STR1_LOOP); + -+// reverse bytes in each halfword -+// Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] -+void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) { -+ if (UseRVB) { -+ assert_different_registers(Rs, tmp1); -+ assert_different_registers(Rd, tmp1); -+ rev8(Rd, Rs); -+ zext_w(tmp1, Rd); -+ roriw(tmp1, tmp1, 16); -+ slli(tmp1, tmp1, 32); -+ srli(Rd, Rd, 32); -+ roriw(Rd, Rd, 16); -+ zext_w(Rd, Rd); -+ orr(Rd, Rd, tmp1); -+ return; -+ } -+ assert_different_registers(Rs, tmp1, tmp2); -+ assert_different_registers(Rd, tmp1, tmp2); -+ revb_h_helper(Rd, Rs, tmp1, tmp2); -+ for (int i = 0; i < 3; ++i) { -+ revb_h_helper(Rd, Rd, tmp1, tmp2); -+ } -+} ++ bind(STR2_NEXT); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, FIRST_LOOP); ++ j(NOMATCH); + -+// reverse bytes in each word -+// Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] -+void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { -+ if (UseRVB) { -+ rev8(Rd, Rs); -+ rori(Rd, Rd, 32); -+ return; -+ } -+ assert_different_registers(Rs, tmp1, tmp2); -+ assert_different_registers(Rd, tmp1, tmp2); -+ revb(Rd, Rs, tmp1, tmp2); -+ ror_imm(Rd, Rd, 32); -+} ++ bind(STR1_LOOP); ++ add(nlen_tmp, nlen_neg, needle_chr_size); ++ add(hlen_tmp, hlen_neg, haystack_chr_size); ++ bgez(nlen_tmp, MATCH); + -+// reverse bytes in doubleword -+// Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56] -+void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) { -+ if (UseRVB) { -+ rev8(Rd, Rs); -+ return; -+ } -+ assert_different_registers(Rs, tmp1, tmp2); -+ assert_different_registers(Rd, tmp1, tmp2); -+ andi(tmp1, Rs, 0xFF); -+ slli(tmp1, tmp1, 8); -+ for (int step = 8; step < 56; step += 8) { -+ srli(tmp2, Rs, step); -+ andi(tmp2, tmp2, 0xFF); -+ orr(tmp1, tmp1, tmp2); -+ slli(tmp1, tmp1, 8); -+ } -+ srli(Rd, Rs, 56); -+ andi(Rd, Rd, 0xFF); -+ orr(Rd, tmp1, Rd); -+} ++ bind(STR1_NEXT); ++ add(ch1, needle, nlen_tmp); ++ (this->*needle_load_1chr)(ch1, Address(ch1), noreg); ++ add(ch2, haystack, hlen_tmp); ++ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); ++ bne(ch1, ch2, STR2_NEXT); ++ add(nlen_tmp, nlen_tmp, needle_chr_size); ++ add(hlen_tmp, hlen_tmp, haystack_chr_size); ++ bltz(nlen_tmp, STR1_NEXT); ++ j(MATCH); + -+// rotate right with shift bits -+void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp) -+{ -+ if (UseRVB) { -+ rori(dst, src, shift); -+ return; ++ bind(DOSHORT); ++ if (needle_isL == haystack_isL) { ++ sub(t0, needle_len, 2); ++ bltz(t0, DO1); ++ bgtz(t0, DO3); ++ } + } + -+ assert_different_registers(dst, tmp); -+ assert_different_registers(src, tmp); -+ assert(shift < 64, "shift amount must be < 64"); -+ slli(tmp, src, 64 - shift); -+ srli(dst, src, shift); -+ orr(dst, dst, tmp); -+} ++ if (needle_con_cnt == 4) { ++ Label CH1_LOOP; ++ (this->*load_4chr)(ch1, Address(needle), noreg); ++ sub(result_tmp, haystack_len, 4); ++ slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp ++ add(haystack, haystack, tmp3); ++ neg(hlen_neg, tmp3); + -+void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) { -+ if (is_imm_in_range(imm, 12, 0)) { -+ and_imm12(Rd, Rn, imm); -+ } else { -+ assert_different_registers(Rn, tmp); -+ li(tmp, imm); -+ andr(Rd, Rn, tmp); ++ bind(CH1_LOOP); ++ add(ch2, haystack, hlen_neg); ++ (this->*load_4chr)(ch2, Address(ch2), noreg); ++ beq(ch1, ch2, MATCH); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, CH1_LOOP); ++ j(NOMATCH); + } -+} + -+void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) { -+ ld(tmp1, adr); -+ if (src.is_register()) { -+ orr(tmp1, tmp1, src.as_register()); -+ } else { -+ if (is_imm_in_range(src.as_constant(), 12, 0)) { -+ ori(tmp1, tmp1, src.as_constant()); -+ } else { -+ assert_different_registers(tmp1, tmp2); -+ li(tmp2, src.as_constant()); -+ orr(tmp1, tmp1, tmp2); ++ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) { ++ Label CH1_LOOP; ++ BLOCK_COMMENT("string_indexof DO2 {"); ++ bind(DO2); ++ (this->*load_2chr)(ch1, Address(needle), noreg); ++ if (needle_con_cnt == 2) { ++ sub(result_tmp, haystack_len, 2); + } -+ } -+ sd(tmp1, adr); -+} ++ slli(tmp3, result_tmp, haystack_chr_shift); ++ add(haystack, haystack, tmp3); ++ neg(hlen_neg, tmp3); + -+void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L) { -+ if (UseCompressedClassPointers) { -+ lwu(tmp, Address(oop, oopDesc::klass_offset_in_bytes())); -+ if (CompressedKlassPointers::base() == NULL) { -+ slli(tmp, tmp, CompressedKlassPointers::shift()); -+ beq(trial_klass, tmp, L); -+ return; -+ } -+ decode_klass_not_null(tmp); -+ } else { -+ ld(tmp, Address(oop, oopDesc::klass_offset_in_bytes())); ++ bind(CH1_LOOP); ++ add(tmp3, haystack, hlen_neg); ++ (this->*load_2chr)(ch2, Address(tmp3), noreg); ++ beq(ch1, ch2, MATCH); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, CH1_LOOP); ++ j(NOMATCH); ++ BLOCK_COMMENT("} string_indexof DO2"); + } -+ beq(trial_klass, tmp, L); -+} + -+// Move an oop into a register. immediate is true if we want -+// immediate instructions and nmethod entry barriers are not enabled. -+// i.e. we are not going to patch this instruction while the code is being -+// executed by another thread. -+void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) { -+ int oop_index; -+ if (obj == NULL) { -+ oop_index = oop_recorder()->allocate_oop_index(obj); -+ } else { -+#ifdef ASSERT -+ { -+ ThreadInVMfromUnknown tiv; -+ assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); -+ } -+#endif -+ oop_index = oop_recorder()->find_index(obj); -+ } -+ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) { ++ Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; ++ BLOCK_COMMENT("string_indexof DO3 {"); + -+ // nmethod entry barrier necessitate using the constant pool. They have to be -+ // ordered with respected to oop access. -+ // Using immediate literals would necessitate fence.i. -+ if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate) { -+ address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address -+ ld_constant(dst, Address(dummy, rspec)); -+ } else -+ mv(dst, Address((address)obj, rspec)); -+} ++ bind(DO3); ++ (this->*load_2chr)(first, Address(needle), noreg); ++ (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg); ++ if (needle_con_cnt == 3) { ++ sub(result_tmp, haystack_len, 3); ++ } ++ slli(hlen_tmp, result_tmp, haystack_chr_shift); ++ add(haystack, haystack, hlen_tmp); ++ neg(hlen_neg, hlen_tmp); + -+// Move a metadata address into a register. -+void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { -+ int oop_index; -+ if (obj == NULL) { -+ oop_index = oop_recorder()->allocate_metadata_index(obj); -+ } else { -+ oop_index = oop_recorder()->find_index(obj); -+ } -+ RelocationHolder rspec = metadata_Relocation::spec(oop_index); -+ mv(dst, Address((address)obj, rspec)); -+} ++ bind(FIRST_LOOP); ++ add(ch2, haystack, hlen_neg); ++ (this->*load_2chr)(ch2, Address(ch2), noreg); ++ beq(first, ch2, STR1_LOOP); + -+// Writes to stack successive pages until offset reached to check for -+// stack overflow + shadow pages. This clobbers tmp. -+void MacroAssembler::bang_stack_size(Register size, Register tmp) { -+ assert_different_registers(tmp, size, t0); -+ // Bang stack for total size given plus shadow page size. -+ // Bang one page at a time because large size can bang beyond yellow and -+ // red zones. -+ mv(t0, os::vm_page_size()); -+ Label loop; -+ bind(loop); -+ sub(tmp, sp, t0); -+ subw(size, size, t0); -+ sd(size, Address(tmp)); -+ bgtz(size, loop); ++ bind(STR2_NEXT); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, FIRST_LOOP); ++ j(NOMATCH); + -+ // Bang down shadow pages too. -+ // At this point, (tmp-0) is the last address touched, so don't -+ // touch it again. (It was touched as (tmp-pagesize) but then tmp -+ // was post-decremented.) Skip this address by starting at i=1, and -+ // touch a few more pages below. N.B. It is important to touch all -+ // the way down to and including i=StackShadowPages. -+ for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) { -+ // this could be any sized move but this is can be a debugging crumb -+ // so the bigger the better. -+ sub(tmp, tmp, os::vm_page_size()); -+ sd(size, Address(tmp, 0)); ++ bind(STR1_LOOP); ++ add(hlen_tmp, hlen_neg, 2 * haystack_chr_size); ++ add(ch2, haystack, hlen_tmp); ++ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); ++ bne(ch1, ch2, STR2_NEXT); ++ j(MATCH); ++ BLOCK_COMMENT("} string_indexof DO3"); + } -+} -+ -+SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) { -+ assert_cond(masm != NULL); -+ int32_t offset = 0; -+ _masm = masm; -+ _masm->la_patchable(t0, ExternalAddress((address)flag_addr), offset); -+ _masm->lbu(t0, Address(t0, offset)); -+ _masm->beqz(t0, _label); -+} -+ -+SkipIfEqual::~SkipIfEqual() { -+ assert_cond(_masm != NULL); -+ _masm->bind(_label); -+ _masm = NULL; -+} -+ -+void MacroAssembler::load_mirror(Register dst, Register method, Register tmp) { -+ const int mirror_offset = in_bytes(Klass::java_mirror_offset()); -+ ld(dst, Address(xmethod, Method::const_offset())); -+ ld(dst, Address(dst, ConstMethod::constants_offset())); -+ ld(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes())); -+ ld(dst, Address(dst, mirror_offset)); -+ resolve_oop_handle(dst, tmp); -+} -+ -+void MacroAssembler::resolve_oop_handle(Register result, Register tmp) { -+ // OopHandle::resolve is an indirection. -+ assert_different_registers(result, tmp); -+ access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, noreg); -+} -+ -+// ((WeakHandle)result).resolve() -+void MacroAssembler::resolve_weak_handle(Register result, Register tmp) { -+ assert_different_registers(result, tmp); -+ Label resolved; + -+ // A null weak handle resolves to null. -+ beqz(result, resolved); ++ if (needle_con_cnt == -1 || needle_con_cnt == 1) { ++ Label DO1_LOOP; + -+ // Only 64 bit platforms support GCs that require a tmp register -+ // Only IN_HEAP loads require a thread_tmp register -+ // WeakHandle::resolve is an indirection like jweak. -+ access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, -+ result, Address(result), tmp, noreg /* tmp_thread */); -+ bind(resolved); -+} ++ BLOCK_COMMENT("string_indexof DO1 {"); ++ bind(DO1); ++ (this->*needle_load_1chr)(ch1, Address(needle), noreg); ++ sub(result_tmp, haystack_len, 1); ++ mv(tmp3, result_tmp); ++ if (haystack_chr_shift) { ++ slli(tmp3, result_tmp, haystack_chr_shift); ++ } ++ add(haystack, haystack, tmp3); ++ neg(hlen_neg, tmp3); + -+void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, -+ Register dst, Address src, -+ Register tmp1, Register thread_tmp) { -+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ decorators = AccessInternal::decorator_fixup(decorators); -+ bool as_raw = (decorators & AS_RAW) != 0; -+ if (as_raw) { -+ bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp); -+ } else { -+ bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp); ++ bind(DO1_LOOP); ++ add(tmp3, haystack, hlen_neg); ++ (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg); ++ beq(ch1, ch2, MATCH); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, DO1_LOOP); ++ BLOCK_COMMENT("} string_indexof DO1"); + } -+} + -+void MacroAssembler::null_check(Register reg, int offset) { -+ if (needs_explicit_null_check(offset)) { -+ // provoke OS NULL exception if reg = NULL by -+ // accessing M[reg] w/o changing any registers -+ // NOTE: this is plenty to provoke a segv -+ ld(zr, Address(reg, 0)); -+ } else { -+ // nothing to do, (later) access of M[reg + offset] -+ // will provoke OS NULL exception if reg = NULL -+ } -+} ++ bind(NOMATCH); ++ mv(result, -1); ++ j(DONE); + -+void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, -+ Address dst, Register src, -+ Register tmp1, Register thread_tmp) { -+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ decorators = AccessInternal::decorator_fixup(decorators); -+ bool as_raw = (decorators & AS_RAW) != 0; -+ if (as_raw) { -+ bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, thread_tmp); -+ } else { -+ bs->store_at(this, decorators, type, dst, src, tmp1, thread_tmp); -+ } -+} ++ bind(MATCH); ++ srai(t0, hlen_neg, haystack_chr_shift); ++ add(result, result_tmp, t0); + -+// Algorithm must match CompressedOops::encode. -+void MacroAssembler::encode_heap_oop(Register d, Register s) { -+ verify_oop(s, "broken oop in encode_heap_oop"); -+ if (CompressedOops::base() == NULL) { -+ if (CompressedOops::shift() != 0) { -+ assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); -+ srli(d, s, LogMinObjAlignmentInBytes); -+ } else { -+ mv(d, s); -+ } -+ } else { -+ Label notNull; -+ sub(d, s, xheapbase); -+ bgez(d, notNull); -+ mv(d, zr); -+ bind(notNull); -+ if (CompressedOops::shift() != 0) { -+ assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); -+ srli(d, d, CompressedOops::shift()); -+ } -+ } ++ bind(DONE); +} + -+void MacroAssembler::load_klass(Register dst, Register src) { -+ if (UseCompressedClassPointers) { -+ lwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); -+ decode_klass_not_null(dst); -+ } else { -+ ld(dst, Address(src, oopDesc::klass_offset_in_bytes())); -+ } -+} ++// Compare strings. ++void MacroAssembler::string_compare(Register str1, Register str2, ++ Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, ++ Register tmp3, int ae) ++{ ++ Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB, ++ DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT, ++ SHORT_LOOP_START, TAIL_CHECK, L; + -+void MacroAssembler::store_klass(Register dst, Register src) { -+ // FIXME: Should this be a store release? concurrent gcs assumes -+ // klass length is valid if klass field is not null. -+ if (UseCompressedClassPointers) { -+ encode_klass_not_null(src); -+ sw(src, Address(dst, oopDesc::klass_offset_in_bytes())); -+ } else { -+ sd(src, Address(dst, oopDesc::klass_offset_in_bytes())); -+ } -+} ++ const int STUB_THRESHOLD = 64 + 8; ++ bool isLL = ae == StrIntrinsicNode::LL; ++ bool isLU = ae == StrIntrinsicNode::LU; ++ bool isUL = ae == StrIntrinsicNode::UL; + -+void MacroAssembler::store_klass_gap(Register dst, Register src) { -+ if (UseCompressedClassPointers) { -+ // Store to klass gap in destination -+ sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes())); -+ } -+} ++ bool str1_isL = isLL || isLU; ++ bool str2_isL = isLL || isUL; + -+void MacroAssembler::decode_klass_not_null(Register r) { -+ decode_klass_not_null(r, r); -+} ++ // for L strings, 1 byte for 1 character ++ // for U strings, 2 bytes for 1 character ++ int str1_chr_size = str1_isL ? 1 : 2; ++ int str2_chr_size = str2_isL ? 1 : 2; ++ int minCharsInWord = isLL ? wordSize : wordSize / 2; + -+void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) { -+ assert(UseCompressedClassPointers, "should only be used for compressed headers"); ++ load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; ++ load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; + -+ if (CompressedKlassPointers::base() == NULL) { -+ if (CompressedKlassPointers::shift() != 0) { -+ assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); -+ slli(dst, src, LogKlassAlignmentInBytes); -+ } else { -+ mv(dst, src); -+ } -+ return; -+ } ++ BLOCK_COMMENT("string_compare {"); + -+ Register xbase = dst; -+ if (dst == src) { -+ xbase = tmp; ++ // Bizzarely, the counts are passed in bytes, regardless of whether they ++ // are L or U strings, however the result is always in characters. ++ if (!str1_isL) { ++ sraiw(cnt1, cnt1, 1); + } -+ -+ assert_different_registers(src, xbase); -+ li(xbase, (uintptr_t)CompressedKlassPointers::base()); -+ -+ if (CompressedKlassPointers::shift() != 0) { -+ assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); -+ assert_different_registers(t0, xbase); -+ shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes); -+ } else { -+ add(dst, xbase, src); ++ if (!str2_isL) { ++ sraiw(cnt2, cnt2, 1); + } + -+ if (xbase == xheapbase) { reinit_heapbase(); } -+} -+ -+void MacroAssembler::encode_klass_not_null(Register r) { -+ encode_klass_not_null(r, r); -+} -+ -+void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) { -+ assert(UseCompressedClassPointers, "should only be used for compressed headers"); ++ // Compute the minimum of the string lengths and save the difference in result. ++ sub(result, cnt1, cnt2); ++ bgt(cnt1, cnt2, L); ++ mv(cnt2, cnt1); ++ bind(L); + -+ if (CompressedKlassPointers::base() == NULL) { -+ if (CompressedKlassPointers::shift() != 0) { -+ assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); -+ srli(dst, src, LogKlassAlignmentInBytes); -+ } else { -+ mv(dst, src); -+ } -+ return; -+ } ++ // A very short string ++ li(t0, minCharsInWord); ++ ble(cnt2, t0, SHORT_STRING); + -+ if (((uint64_t)(uintptr_t)CompressedKlassPointers::base() & 0xffffffff) == 0 && -+ CompressedKlassPointers::shift() == 0) { -+ zero_extend(dst, src, 32); -+ return; -+ } -+ -+ Register xbase = dst; -+ if (dst == src) { -+ xbase = tmp; -+ } ++ // Compare longwords ++ // load first parts of strings and finish initialization while loading ++ { ++ if (str1_isL == str2_isL) { // LL or UU ++ // load 8 bytes once to compare ++ ld(tmp1, Address(str1)); ++ beq(str1, str2, DONE); ++ ld(tmp2, Address(str2)); ++ li(t0, STUB_THRESHOLD); ++ bge(cnt2, t0, STUB); ++ sub(cnt2, cnt2, minCharsInWord); ++ beqz(cnt2, TAIL_CHECK); ++ // convert cnt2 from characters to bytes ++ if (!str1_isL) { ++ slli(cnt2, cnt2, 1); ++ } ++ add(str2, str2, cnt2); ++ add(str1, str1, cnt2); ++ sub(cnt2, zr, cnt2); ++ } else if (isLU) { // LU case ++ lwu(tmp1, Address(str1)); ++ ld(tmp2, Address(str2)); ++ li(t0, STUB_THRESHOLD); ++ bge(cnt2, t0, STUB); ++ addi(cnt2, cnt2, -4); ++ add(str1, str1, cnt2); ++ sub(cnt1, zr, cnt2); ++ slli(cnt2, cnt2, 1); ++ add(str2, str2, cnt2); ++ inflate_lo32(tmp3, tmp1); ++ mv(tmp1, tmp3); ++ sub(cnt2, zr, cnt2); ++ addi(cnt1, cnt1, 4); ++ } else { // UL case ++ ld(tmp1, Address(str1)); ++ lwu(tmp2, Address(str2)); ++ li(t0, STUB_THRESHOLD); ++ bge(cnt2, t0, STUB); ++ addi(cnt2, cnt2, -4); ++ slli(t0, cnt2, 1); ++ sub(cnt1, zr, t0); ++ add(str1, str1, t0); ++ add(str2, str2, cnt2); ++ inflate_lo32(tmp3, tmp2); ++ mv(tmp2, tmp3); ++ sub(cnt2, zr, cnt2); ++ addi(cnt1, cnt1, 8); ++ } ++ addi(cnt2, cnt2, isUL ? 4 : 8); ++ bgez(cnt2, TAIL); ++ xorr(tmp3, tmp1, tmp2); ++ bnez(tmp3, DIFFERENCE); + -+ assert_different_registers(src, xbase); -+ li(xbase, (intptr_t)CompressedKlassPointers::base()); -+ sub(dst, src, xbase); -+ if (CompressedKlassPointers::shift() != 0) { -+ assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); -+ srli(dst, dst, LogKlassAlignmentInBytes); -+ } -+ if (xbase == xheapbase) { -+ reinit_heapbase(); -+ } -+} ++ // main loop ++ bind(NEXT_WORD); ++ if (str1_isL == str2_isL) { // LL or UU ++ add(t0, str1, cnt2); ++ ld(tmp1, Address(t0)); ++ add(t0, str2, cnt2); ++ ld(tmp2, Address(t0)); ++ addi(cnt2, cnt2, 8); ++ } else if (isLU) { // LU case ++ add(t0, str1, cnt1); ++ lwu(tmp1, Address(t0)); ++ add(t0, str2, cnt2); ++ ld(tmp2, Address(t0)); ++ addi(cnt1, cnt1, 4); ++ inflate_lo32(tmp3, tmp1); ++ mv(tmp1, tmp3); ++ addi(cnt2, cnt2, 8); ++ } else { // UL case ++ add(t0, str2, cnt2); ++ lwu(tmp2, Address(t0)); ++ add(t0, str1, cnt1); ++ ld(tmp1, Address(t0)); ++ inflate_lo32(tmp3, tmp2); ++ mv(tmp2, tmp3); ++ addi(cnt1, cnt1, 8); ++ addi(cnt2, cnt2, 4); ++ } ++ bgez(cnt2, TAIL); + -+void MacroAssembler::decode_heap_oop_not_null(Register r) { -+ decode_heap_oop_not_null(r, r); -+} ++ xorr(tmp3, tmp1, tmp2); ++ beqz(tmp3, NEXT_WORD); ++ j(DIFFERENCE); ++ bind(TAIL); ++ xorr(tmp3, tmp1, tmp2); ++ bnez(tmp3, DIFFERENCE); ++ // Last longword. In the case where length == 4 we compare the ++ // same longword twice, but that's still faster than another ++ // conditional branch. ++ if (str1_isL == str2_isL) { // LL or UU ++ ld(tmp1, Address(str1)); ++ ld(tmp2, Address(str2)); ++ } else if (isLU) { // LU case ++ lwu(tmp1, Address(str1)); ++ ld(tmp2, Address(str2)); ++ inflate_lo32(tmp3, tmp1); ++ mv(tmp1, tmp3); ++ } else { // UL case ++ lwu(tmp2, Address(str2)); ++ ld(tmp1, Address(str1)); ++ inflate_lo32(tmp3, tmp2); ++ mv(tmp2, tmp3); ++ } ++ bind(TAIL_CHECK); ++ xorr(tmp3, tmp1, tmp2); ++ beqz(tmp3, DONE); + -+void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { -+ assert(UseCompressedOops, "should only be used for compressed headers"); -+ assert(Universe::heap() != NULL, "java heap should be initialized"); -+ // Cannot assert, unverified entry point counts instructions (see .ad file) -+ // vtableStubs also counts instructions in pd_code_size_limit. -+ // Also do not verify_oop as this is called by verify_oop. -+ if (CompressedOops::shift() != 0) { -+ assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); -+ slli(dst, src, LogMinObjAlignmentInBytes); -+ if (CompressedOops::base() != NULL) { -+ add(dst, xheapbase, dst); ++ // Find the first different characters in the longwords and ++ // compute their difference. ++ bind(DIFFERENCE); ++ ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb ++ srl(tmp1, tmp1, result); ++ srl(tmp2, tmp2, result); ++ if (isLL) { ++ andi(tmp1, tmp1, 0xFF); ++ andi(tmp2, tmp2, 0xFF); ++ } else { ++ andi(tmp1, tmp1, 0xFFFF); ++ andi(tmp2, tmp2, 0xFFFF); + } -+ } else { -+ assert(CompressedOops::base() == NULL, "sanity"); -+ mv(dst, src); ++ sub(result, tmp1, tmp2); ++ j(DONE); + } -+} + -+void MacroAssembler::decode_heap_oop(Register d, Register s) { -+ if (CompressedOops::base() == NULL) { -+ if (CompressedOops::shift() != 0 || d != s) { -+ slli(d, s, CompressedOops::shift()); -+ } -+ } else { -+ Label done; -+ mv(d, s); -+ beqz(s, done); -+ shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes); -+ bind(done); ++ bind(STUB); ++ RuntimeAddress stub = NULL; ++ switch (ae) { ++ case StrIntrinsicNode::LL: ++ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL()); ++ break; ++ case StrIntrinsicNode::UU: ++ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU()); ++ break; ++ case StrIntrinsicNode::LU: ++ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU()); ++ break; ++ case StrIntrinsicNode::UL: ++ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL()); ++ break; ++ default: ++ ShouldNotReachHere(); + } -+ verify_oop(d, "broken oop in decode_heap_oop"); -+} ++ assert(stub.target() != NULL, "compare_long_string stub has not been generated"); ++ trampoline_call(stub); ++ j(DONE); + -+void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1, -+ Register thread_tmp, DecoratorSet decorators) { -+ access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); -+} ++ bind(SHORT_STRING); ++ // Is the minimum length zero? ++ beqz(cnt2, DONE); ++ // arrange code to do most branches while loading and loading next characters ++ // while comparing previous ++ (this->*str1_load_chr)(tmp1, Address(str1), t0); ++ addi(str1, str1, str1_chr_size); ++ addi(cnt2, cnt2, -1); ++ beqz(cnt2, SHORT_LAST_INIT); ++ (this->*str2_load_chr)(cnt1, Address(str2), t0); ++ addi(str2, str2, str2_chr_size); ++ j(SHORT_LOOP_START); ++ bind(SHORT_LOOP); ++ addi(cnt2, cnt2, -1); ++ beqz(cnt2, SHORT_LAST); ++ bind(SHORT_LOOP_START); ++ (this->*str1_load_chr)(tmp2, Address(str1), t0); ++ addi(str1, str1, str1_chr_size); ++ (this->*str2_load_chr)(t0, Address(str2), t0); ++ addi(str2, str2, str2_chr_size); ++ bne(tmp1, cnt1, SHORT_LOOP_TAIL); ++ addi(cnt2, cnt2, -1); ++ beqz(cnt2, SHORT_LAST2); ++ (this->*str1_load_chr)(tmp1, Address(str1), t0); ++ addi(str1, str1, str1_chr_size); ++ (this->*str2_load_chr)(cnt1, Address(str2), t0); ++ addi(str2, str2, str2_chr_size); ++ beq(tmp2, t0, SHORT_LOOP); ++ sub(result, tmp2, t0); ++ j(DONE); ++ bind(SHORT_LOOP_TAIL); ++ sub(result, tmp1, cnt1); ++ j(DONE); ++ bind(SHORT_LAST2); ++ beq(tmp2, t0, DONE); ++ sub(result, tmp2, t0); + -+void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, -+ Register thread_tmp, DecoratorSet decorators) { -+ access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); -+} ++ j(DONE); ++ bind(SHORT_LAST_INIT); ++ (this->*str2_load_chr)(cnt1, Address(str2), t0); ++ addi(str2, str2, str2_chr_size); ++ bind(SHORT_LAST); ++ beq(tmp1, cnt1, DONE); ++ sub(result, tmp1, cnt1); + -+void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, -+ Register thread_tmp, DecoratorSet decorators) { -+ access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, thread_tmp); -+} ++ bind(DONE); + -+// Used for storing NULLs. -+void MacroAssembler::store_heap_oop_null(Address dst) { -+ access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg); ++ BLOCK_COMMENT("} string_compare"); +} + -+int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2, -+ bool want_remainder) -+{ -+ // Full implementation of Java idiv and irem. The function -+ // returns the (pc) offset of the div instruction - may be needed -+ // for implicit exceptions. -+ // -+ // input : rs1: dividend -+ // rs2: divisor -+ // -+ // result: either -+ // quotient (= rs1 idiv rs2) -+ // remainder (= rs1 irem rs2) -+ ++void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3, ++ Register tmp4, Register tmp5, Register tmp6, Register result, ++ Register cnt1, int elem_size) { ++ Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR; ++ Register tmp1 = t0; ++ Register tmp2 = t1; ++ Register cnt2 = tmp2; // cnt2 only used in array length compare ++ Register elem_per_word = tmp6; ++ int log_elem_size = exact_log2(elem_size); ++ int length_offset = arrayOopDesc::length_offset_in_bytes(); ++ int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); + -+ int idivl_offset = offset(); -+ if (!want_remainder) { -+ divw(result, rs1, rs2); -+ } else { -+ remw(result, rs1, rs2); // result = rs1 % rs2; -+ } -+ return idivl_offset; -+} ++ assert(elem_size == 1 || elem_size == 2, "must be char or byte"); ++ assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6); ++ li(elem_per_word, wordSize / elem_size); + -+int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2, -+ bool want_remainder) -+{ -+ // Full implementation of Java ldiv and lrem. The function -+ // returns the (pc) offset of the div instruction - may be needed -+ // for implicit exceptions. -+ // -+ // input : rs1: dividend -+ // rs2: divisor -+ // -+ // result: either -+ // quotient (= rs1 idiv rs2) -+ // remainder (= rs1 irem rs2) ++ BLOCK_COMMENT("arrays_equals {"); + -+ int idivq_offset = offset(); -+ if (!want_remainder) { -+ div(result, rs1, rs2); -+ } else { -+ rem(result, rs1, rs2); // result = rs1 % rs2; -+ } -+ return idivq_offset; -+} ++ // if (a1 == a2), return true ++ beq(a1, a2, SAME); + -+// Look up the method for a megamorpic invkkeinterface call. -+// The target method is determined by . -+// The receiver klass is in recv_klass. -+// On success, the result will be in method_result, and execution falls through. -+// On failure, execution transfers to the given label. -+void MacroAssembler::lookup_interface_method(Register recv_klass, -+ Register intf_klass, -+ RegisterOrConstant itable_index, -+ Register method_result, -+ Register scan_tmp, -+ Label& L_no_such_interface, -+ bool return_method) { -+ assert_different_registers(recv_klass, intf_klass, scan_tmp); -+ assert_different_registers(method_result, intf_klass, scan_tmp); -+ assert(recv_klass != method_result || !return_method, -+ "recv_klass can be destroyed when mehtid isn't needed"); -+ assert(itable_index.is_constant() || itable_index.as_register() == method_result, -+ "caller must be same register for non-constant itable index as for method"); ++ mv(result, false); ++ beqz(a1, DONE); ++ beqz(a2, DONE); ++ lwu(cnt1, Address(a1, length_offset)); ++ lwu(cnt2, Address(a2, length_offset)); ++ bne(cnt2, cnt1, DONE); ++ beqz(cnt1, SAME); + -+ // Compute start of first itableOffsetEntry (which is at the end of the vtable). -+ int vtable_base = in_bytes(Klass::vtable_start_offset()); -+ int itentry_off = itableMethodEntry::method_offset_in_bytes(); -+ int scan_step = itableOffsetEntry::size() * wordSize; -+ int vte_size = vtableEntry::size_in_bytes(); -+ assert(vte_size == wordSize, "else adjust times_vte_scale"); ++ slli(tmp5, cnt1, 3 + log_elem_size); ++ sub(tmp5, zr, tmp5); ++ add(a1, a1, base_offset); ++ add(a2, a2, base_offset); ++ ld(tmp3, Address(a1, 0)); ++ ld(tmp4, Address(a2, 0)); ++ ble(cnt1, elem_per_word, SHORT); // short or same + -+ lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset())); ++ // Main 16 byte comparison loop with 2 exits ++ bind(NEXT_DWORD); { ++ ld(tmp1, Address(a1, wordSize)); ++ ld(tmp2, Address(a2, wordSize)); ++ sub(cnt1, cnt1, 2 * wordSize / elem_size); ++ blez(cnt1, TAIL); ++ bne(tmp3, tmp4, DONE); ++ ld(tmp3, Address(a1, 2 * wordSize)); ++ ld(tmp4, Address(a2, 2 * wordSize)); ++ add(a1, a1, 2 * wordSize); ++ add(a2, a2, 2 * wordSize); ++ ble(cnt1, elem_per_word, TAIL2); ++ } beq(tmp1, tmp2, NEXT_DWORD); ++ j(DONE); + -+ // %%% Could store the aligned, prescaled offset in the klassoop. -+ shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3); -+ add(scan_tmp, scan_tmp, vtable_base); ++ bind(TAIL); ++ xorr(tmp4, tmp3, tmp4); ++ xorr(tmp2, tmp1, tmp2); ++ sll(tmp2, tmp2, tmp5); ++ orr(tmp5, tmp4, tmp2); ++ j(IS_TMP5_ZR); + -+ if (return_method) { -+ // Adjust recv_klass by scaled itable_index, so we can free itable_index. -+ assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); -+ if (itable_index.is_register()) { -+ slli(t0, itable_index.as_register(), 3); -+ } else { -+ li(t0, itable_index.as_constant() << 3); -+ } -+ add(recv_klass, recv_klass, t0); -+ if (itentry_off) { -+ add(recv_klass, recv_klass, itentry_off); -+ } -+ } ++ bind(TAIL2); ++ bne(tmp1, tmp2, DONE); + -+ Label search, found_method; ++ bind(SHORT); ++ xorr(tmp4, tmp3, tmp4); ++ sll(tmp5, tmp4, tmp5); + -+ ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes())); -+ beq(intf_klass, method_result, found_method); -+ bind(search); -+ // Check that the previous entry is non-null. A null entry means that -+ // the receiver class doens't implement the interface, and wasn't the -+ // same as when the caller was compiled. -+ beqz(method_result, L_no_such_interface, /* is_far */ true); -+ addi(scan_tmp, scan_tmp, scan_step); -+ ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes())); -+ bne(intf_klass, method_result, search); ++ bind(IS_TMP5_ZR); ++ bnez(tmp5, DONE); + -+ bind(found_method); ++ bind(SAME); ++ mv(result, true); ++ // That's it. ++ bind(DONE); + -+ // Got a hit. -+ if (return_method) { -+ lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset_in_bytes())); -+ add(method_result, recv_klass, scan_tmp); -+ ld(method_result, Address(method_result)); -+ } ++ BLOCK_COMMENT("} array_equals"); +} + -+// virtual method calling -+void MacroAssembler::lookup_virtual_method(Register recv_klass, -+ RegisterOrConstant vtable_index, -+ Register method_result) { -+ const int base = in_bytes(Klass::vtable_start_offset()); -+ assert(vtableEntry::size() * wordSize == 8, -+ "adjust the scaling in the code below"); -+ int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes(); ++// Compare Strings + -+ if (vtable_index.is_register()) { -+ shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord); -+ ld(method_result, Address(method_result, vtable_offset_in_bytes)); -+ } else { -+ vtable_offset_in_bytes += vtable_index.as_constant() * wordSize; -+ ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes)); -+ } -+} ++// For Strings we're passed the address of the first characters in a1 ++// and a2 and the length in cnt1. ++// elem_size is the element size in bytes: either 1 or 2. ++// There are two implementations. For arrays >= 8 bytes, all ++// comparisons (including the final one, which may overlap) are ++// performed 8 bytes at a time. For strings < 8 bytes, we compare a ++// halfword, then a short, and then a byte. + -+void MacroAssembler::membar(uint32_t order_constraint) { -+ address prev = pc() - NativeMembar::instruction_size; -+ address last = code()->last_insn(); ++void MacroAssembler::string_equals(Register a1, Register a2, ++ Register result, Register cnt1, int elem_size) ++{ ++ Label SAME, DONE, SHORT, NEXT_WORD; ++ Register tmp1 = t0; ++ Register tmp2 = t1; + -+ if (last != NULL && nativeInstruction_at(last)->is_membar() && prev == last) { -+ NativeMembar *bar = NativeMembar_at(prev); -+ // We are merging two memory barrier instructions. On RISCV we -+ // can do this simply by ORing them together. -+ bar->set_kind(bar->get_kind() | order_constraint); -+ BLOCK_COMMENT("merged membar"); -+ } else { -+ code()->set_last_insn(pc()); ++ assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte"); ++ assert_different_registers(a1, a2, result, cnt1, t0, t1); + -+ uint32_t predecessor = 0; -+ uint32_t successor = 0; ++ BLOCK_COMMENT("string_equals {"); + -+ membar_mask_to_pred_succ(order_constraint, predecessor, successor); -+ fence(predecessor, successor); -+ } -+} ++ mv(result, false); + -+// Form an addres from base + offset in Rd. Rd my or may not -+// actually be used: you must use the Address that is returned. It -+// is up to you to ensure that the shift provided mathces the size -+// of your data. -+Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset) { -+ if (is_offset_in_range(byte_offset, 12)) { // 12: imm in range 2^12 -+ return Address(base, byte_offset); -+ } ++ // Check for short strings, i.e. smaller than wordSize. ++ sub(cnt1, cnt1, wordSize); ++ bltz(cnt1, SHORT); + -+ // Do it the hard way -+ mv(Rd, byte_offset); -+ add(Rd, base, Rd); -+ return Address(Rd); -+} ++ // Main 8 byte comparison loop. ++ bind(NEXT_WORD); { ++ ld(tmp1, Address(a1, 0)); ++ add(a1, a1, wordSize); ++ ld(tmp2, Address(a2, 0)); ++ add(a2, a2, wordSize); ++ sub(cnt1, cnt1, wordSize); ++ bne(tmp1, tmp2, DONE); ++ } bgtz(cnt1, NEXT_WORD); + -+void MacroAssembler::check_klass_subtype(Register sub_klass, -+ Register super_klass, -+ Register tmp_reg, -+ Label& L_success) { -+ Label L_failure; -+ check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, NULL); -+ check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, NULL); -+ bind(L_failure); -+} ++ // Last longword. In the case where length == 4 we compare the ++ // same longword twice, but that's still faster than another ++ // conditional branch. ++ // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when ++ // length == 4. ++ add(tmp1, a1, cnt1); ++ ld(tmp1, Address(tmp1, 0)); ++ add(tmp2, a2, cnt1); ++ ld(tmp2, Address(tmp2, 0)); ++ bne(tmp1, tmp2, DONE); ++ j(SAME); + -+void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) { -+ ld(t0, Address(xthread, JavaThread::polling_word_offset())); -+ if (acquire) { -+ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); -+ } -+ if (at_return) { -+ bgtu(in_nmethod ? sp : fp, t0, slow_path, true /* is_far */); -+ } else { -+ andi(t0, t0, SafepointMechanism::poll_bit()); -+ bnez(t0, slow_path, true /* is_far */); -+ } -+} ++ bind(SHORT); ++ Label TAIL03, TAIL01; + -+void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, -+ Label &succeed, Label *fail) { -+ // oldv holds comparison value -+ // newv holds value to write in exchange -+ // addr identifies memory word to compare against/update -+ Label retry_load, nope; -+ bind(retry_load); -+ // Load reserved from the memory location -+ lr_d(tmp, addr, Assembler::aqrl); -+ // Fail and exit if it is not what we expect -+ bne(tmp, oldv, nope); -+ // If the store conditional succeeds, tmp will be zero -+ sc_d(tmp, newv, addr, Assembler::rl); -+ beqz(tmp, succeed); -+ // Retry only when the store conditional failed -+ j(retry_load); ++ // 0-7 bytes left. ++ andi(t0, cnt1, 4); ++ beqz(t0, TAIL03); ++ { ++ lwu(tmp1, Address(a1, 0)); ++ add(a1, a1, 4); ++ lwu(tmp2, Address(a2, 0)); ++ add(a2, a2, 4); ++ bne(tmp1, tmp2, DONE); ++ } + -+ bind(nope); -+ membar(AnyAny); -+ mv(oldv, tmp); -+ if (fail != NULL) { -+ j(*fail); ++ bind(TAIL03); ++ // 0-3 bytes left. ++ andi(t0, cnt1, 2); ++ beqz(t0, TAIL01); ++ { ++ lhu(tmp1, Address(a1, 0)); ++ add(a1, a1, 2); ++ lhu(tmp2, Address(a2, 0)); ++ add(a2, a2, 2); ++ bne(tmp1, tmp2, DONE); ++ } ++ ++ bind(TAIL01); ++ if (elem_size == 1) { // Only needed when comparing 1-byte elements ++ // 0-1 bytes left. ++ andi(t0, cnt1, 1); ++ beqz(t0, SAME); ++ { ++ lbu(tmp1, a1, 0); ++ lbu(tmp2, a2, 0); ++ bne(tmp1, tmp2, DONE); ++ } + } ++ ++ // Arrays are equal. ++ bind(SAME); ++ mv(result, true); ++ ++ // That's it. ++ bind(DONE); ++ BLOCK_COMMENT("} string_equals"); +} + -+void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, -+ Label &succeed, Label *fail) { -+ assert(oopDesc::mark_offset_in_bytes() == 0, "assumption"); -+ cmpxchgptr(oldv, newv, obj, tmp, succeed, fail); ++typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far); ++typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label, ++ bool is_far, bool is_unordered); ++ ++static conditional_branch_insn conditional_branches[] = ++{ ++ /* SHORT branches */ ++ (conditional_branch_insn)&Assembler::beq, ++ (conditional_branch_insn)&Assembler::bgt, ++ NULL, // BoolTest::overflow ++ (conditional_branch_insn)&Assembler::blt, ++ (conditional_branch_insn)&Assembler::bne, ++ (conditional_branch_insn)&Assembler::ble, ++ NULL, // BoolTest::no_overflow ++ (conditional_branch_insn)&Assembler::bge, ++ ++ /* UNSIGNED branches */ ++ (conditional_branch_insn)&Assembler::beq, ++ (conditional_branch_insn)&Assembler::bgtu, ++ NULL, ++ (conditional_branch_insn)&Assembler::bltu, ++ (conditional_branch_insn)&Assembler::bne, ++ (conditional_branch_insn)&Assembler::bleu, ++ NULL, ++ (conditional_branch_insn)&Assembler::bgeu ++}; ++ ++static float_conditional_branch_insn float_conditional_branches[] = ++{ ++ /* FLOAT SHORT branches */ ++ (float_conditional_branch_insn)&MacroAssembler::float_beq, ++ (float_conditional_branch_insn)&MacroAssembler::float_bgt, ++ NULL, // BoolTest::overflow ++ (float_conditional_branch_insn)&MacroAssembler::float_blt, ++ (float_conditional_branch_insn)&MacroAssembler::float_bne, ++ (float_conditional_branch_insn)&MacroAssembler::float_ble, ++ NULL, // BoolTest::no_overflow ++ (float_conditional_branch_insn)&MacroAssembler::float_bge, ++ ++ /* DOUBLE SHORT branches */ ++ (float_conditional_branch_insn)&MacroAssembler::double_beq, ++ (float_conditional_branch_insn)&MacroAssembler::double_bgt, ++ NULL, ++ (float_conditional_branch_insn)&MacroAssembler::double_blt, ++ (float_conditional_branch_insn)&MacroAssembler::double_bne, ++ (float_conditional_branch_insn)&MacroAssembler::double_ble, ++ NULL, ++ (float_conditional_branch_insn)&MacroAssembler::double_bge ++}; ++ ++void MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) { ++ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])), ++ "invalid conditional branch index"); ++ (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far); +} + -+void MacroAssembler::load_reserved(Register addr, -+ enum operand_size size, -+ Assembler::Aqrl acquire) { -+ switch (size) { -+ case int64: -+ lr_d(t0, addr, acquire); -+ break; -+ case int32: -+ lr_w(t0, addr, acquire); ++// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use ++// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode(). ++void MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) { ++ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])), ++ "invalid float conditional branch index"); ++ int booltest_flag = cmpFlag & ~(MacroAssembler::double_branch_mask); ++ (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far, ++ (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true); ++} ++ ++void MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { ++ switch (cmpFlag) { ++ case BoolTest::eq: ++ case BoolTest::le: ++ beqz(op1, L, is_far); + break; -+ case uint32: -+ lr_w(t0, addr, acquire); -+ zero_extend(t0, t0, 32); ++ case BoolTest::ne: ++ case BoolTest::gt: ++ bnez(op1, L, is_far); + break; + default: + ShouldNotReachHere(); + } +} + -+void MacroAssembler::store_conditional(Register addr, -+ Register new_val, -+ enum operand_size size, -+ Assembler::Aqrl release) { -+ switch (size) { -+ case int64: -+ sc_d(t0, new_val, addr, release); ++void MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { ++ switch (cmpFlag) { ++ case BoolTest::eq: ++ beqz(op1, L, is_far); + break; -+ case int32: -+ case uint32: -+ sc_w(t0, new_val, addr, release); ++ case BoolTest::ne: ++ bnez(op1, L, is_far); + break; + default: + ShouldNotReachHere(); + } +} + ++void MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) { ++ Label L; ++ cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L); ++ mv(dst, src); ++ bind(L); ++} + -+void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected, -+ Register new_val, -+ enum operand_size size, -+ Register tmp1, Register tmp2, Register tmp3) { -+ assert(size == int8 || size == int16, "unsupported operand size"); -+ -+ Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3; -+ -+ andi(shift, addr, 3); -+ slli(shift, shift, 3); -+ -+ andi(aligned_addr, addr, ~3); ++// Set dst to NaN if any NaN input. ++void MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2, ++ bool is_double, bool is_min) { ++ assert_different_registers(dst, src1, src2); + -+ if (size == int8) { -+ addi(mask, zr, 0xff); ++ Label Done; ++ fsflags(zr); ++ if (is_double) { ++ is_min ? fmin_d(dst, src1, src2) ++ : fmax_d(dst, src1, src2); ++ // Checking NaNs ++ flt_d(zr, src1, src2); + } else { -+ // size == int16 case -+ addi(mask, zr, -1); -+ zero_extend(mask, mask, 16); ++ is_min ? fmin_s(dst, src1, src2) ++ : fmax_s(dst, src1, src2); ++ // Checking NaNs ++ flt_s(zr, src1, src2); + } -+ sll(mask, mask, shift); + -+ xori(not_mask, mask, -1); ++ frflags(t0); ++ beqz(t0, Done); + -+ sll(expected, expected, shift); -+ andr(expected, expected, mask); ++ // In case of NaNs ++ is_double ? fadd_d(dst, src1, src2) ++ : fadd_s(dst, src1, src2); + -+ sll(new_val, new_val, shift); -+ andr(new_val, new_val, mask); ++ bind(Done); +} + -+// cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps. -+// It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w, -+// which are forced to work with 4-byte aligned address. -+void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected, -+ Register new_val, -+ enum operand_size size, -+ Assembler::Aqrl acquire, Assembler::Aqrl release, -+ Register result, bool result_as_bool, -+ Register tmp1, Register tmp2, Register tmp3) { -+ Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; -+ assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); -+ cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); ++#endif // COMPILER2 + -+ Label retry, fail, done; +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +new file mode 100644 +index 0000000000..c660bce437 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -0,0 +1,966 @@ ++/* ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ bind(retry); -+ lr_w(old, aligned_addr, acquire); -+ andr(tmp, old, mask); -+ bne(tmp, expected, fail); ++#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP ++#define CPU_RISCV_MACROASSEMBLER_RISCV_HPP + -+ andr(tmp, old, not_mask); -+ orr(tmp, tmp, new_val); -+ sc_w(tmp, tmp, aligned_addr, release); -+ bnez(tmp, retry); ++#include "asm/assembler.hpp" ++#include "metaprogramming/enableIf.hpp" + -+ if (result_as_bool) { -+ addi(result, zr, 1); -+ j(done); ++// MacroAssembler extends Assembler by frequently used macros. ++// ++// Instructions for which a 'better' code sequence exists depending ++// on arguments should also go in here. + -+ bind(fail); -+ mv(result, zr); ++class MacroAssembler: public Assembler { + -+ bind(done); -+ } else { -+ andr(tmp, old, mask); ++ public: ++ MacroAssembler(CodeBuffer* code) : Assembler(code) { ++ } ++ virtual ~MacroAssembler() {} + -+ bind(fail); -+ srl(result, tmp, shift); ++ void safepoint_poll(Label& slow_path); ++ void safepoint_poll_acquire(Label& slow_path); + -+ if (size == int8) { -+ sign_extend(result, result, 8); -+ } else { -+ // size == int16 case -+ sign_extend(result, result, 16); -+ } ++ // Biased locking support ++ // lock_reg and obj_reg must be loaded up with the appropriate values. ++ // swap_reg is killed. ++ // tmp_reg must be supplied and must not be rscratch1 or rscratch2 ++ // Optional slow case is for implementations (interpreter and C1) which branch to ++ // slow case directly. Leaves condition codes set for C2's Fast_Lock node. ++ // Returns offset of first potentially-faulting instruction for null ++ // check info (currently consumed only by C1). If ++ // swap_reg_contains_mark is true then returns -1 as it is assumed ++ // the calling code has already passed any potential faults. ++ int biased_locking_enter(Register lock_reg, Register obj_reg, ++ Register swap_reg, Register tmp_reg, ++ bool swap_reg_contains_mark, ++ Label& done, Label* slow_case = NULL, ++ BiasedLockingCounters* counters = NULL, ++ Register flag = noreg); ++ void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done, Register flag = noreg); ++ ++ // Helper functions for statistics gathering. ++ // Unconditional atomic increment. ++ void atomic_incw(Register counter_addr, Register tmp); ++ void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) { ++ la(tmp1, counter_addr); ++ atomic_incw(tmp1, tmp2); + } -+} + -+// weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement -+// the weak CAS stuff. The major difference is that it just failed when store conditional -+// failed. -+void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected, -+ Register new_val, -+ enum operand_size size, -+ Assembler::Aqrl acquire, Assembler::Aqrl release, -+ Register result, -+ Register tmp1, Register tmp2, Register tmp3) { -+ Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; -+ assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); -+ cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); ++ // Alignment ++ void align(int modulus, int extra_offset = 0); + -+ Label succ, fail, done; ++ // Stack frame creation/removal ++ // Note that SP must be updated to the right place before saving/restoring RA and FP ++ // because signal based thread suspend/resume could happen asynchronously. ++ void enter() { ++ addi(sp, sp, - 2 * wordSize); ++ sd(ra, Address(sp, wordSize)); ++ sd(fp, Address(sp)); ++ addi(fp, sp, 2 * wordSize); ++ } + -+ lr_w(old, aligned_addr, acquire); -+ andr(tmp, old, mask); -+ bne(tmp, expected, fail); ++ void leave() { ++ addi(sp, fp, - 2 * wordSize); ++ ld(fp, Address(sp)); ++ ld(ra, Address(sp, wordSize)); ++ addi(sp, sp, 2 * wordSize); ++ } + -+ andr(tmp, old, not_mask); -+ orr(tmp, tmp, new_val); -+ sc_w(tmp, tmp, aligned_addr, release); -+ beqz(tmp, succ); + -+ bind(fail); -+ addi(result, zr, 1); -+ j(done); ++ // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) ++ // The pointer will be loaded into the thread register. ++ void get_thread(Register thread); + -+ bind(succ); -+ mv(result, zr); ++ // Support for VM calls ++ // ++ // It is imperative that all calls into the VM are handled via the call_VM macros. ++ // They make sure that the stack linkage is setup correctly. call_VM's correspond ++ // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. + -+ bind(done); -+} ++ void call_VM(Register oop_result, ++ address entry_point, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, Register arg_2, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, Register arg_2, Register arg_3, ++ bool check_exceptions = true); + -+void MacroAssembler::cmpxchg(Register addr, Register expected, -+ Register new_val, -+ enum operand_size size, -+ Assembler::Aqrl acquire, Assembler::Aqrl release, -+ Register result, bool result_as_bool) { -+ assert(size != int8 && size != int16, "unsupported operand size"); ++ // Overloadings with last_Java_sp ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments = 0, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, Register arg_2, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, Register arg_2, Register arg_3, ++ bool check_exceptions = true); + -+ Label retry_load, done, ne_done; -+ bind(retry_load); -+ load_reserved(addr, size, acquire); -+ bne(t0, expected, ne_done); -+ store_conditional(addr, new_val, size, release); -+ bnez(t0, retry_load); ++ void get_vm_result(Register oop_result, Register java_thread); ++ void get_vm_result_2(Register metadata_result, Register java_thread); + -+ // equal, succeed -+ if (result_as_bool) { -+ li(result, 1); -+ } else { -+ mv(result, expected); -+ } -+ j(done); ++ // These always tightly bind to MacroAssembler::call_VM_leaf_base ++ // bypassing the virtual implementation ++ void call_VM_leaf(address entry_point, ++ int number_of_arguments = 0); ++ void call_VM_leaf(address entry_point, ++ Register arg_0); ++ void call_VM_leaf(address entry_point, ++ Register arg_0, Register arg_1); ++ void call_VM_leaf(address entry_point, ++ Register arg_0, Register arg_1, Register arg_2); + -+ // not equal, failed -+ bind(ne_done); -+ if (result_as_bool) { -+ mv(result, zr); -+ } else { -+ mv(result, t0); -+ } ++ // These always tightly bind to MacroAssembler::call_VM_base ++ // bypassing the virtual implementation ++ void super_call_VM_leaf(address entry_point, Register arg_0); ++ void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1); ++ void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2); ++ void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3); + -+ bind(done); -+} ++ // last Java Frame (fills frame anchor) ++ void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp); ++ void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp); ++ void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register tmp); + -+void MacroAssembler::cmpxchg_weak(Register addr, Register expected, -+ Register new_val, -+ enum operand_size size, -+ Assembler::Aqrl acquire, Assembler::Aqrl release, -+ Register result) { -+ Label fail, done, sc_done; -+ load_reserved(addr, size, acquire); -+ bne(t0, expected, fail); -+ store_conditional(addr, new_val, size, release); -+ beqz(t0, sc_done); ++ // thread in the default location (xthread) ++ void reset_last_Java_frame(bool clear_fp); + -+ // fail -+ bind(fail); -+ li(result, 1); -+ j(done); ++ virtual void call_VM_leaf_base( ++ address entry_point, // the entry point ++ int number_of_arguments, // the number of arguments to pop after the call ++ Label* retaddr = NULL ++ ); + -+ // sc_done -+ bind(sc_done); -+ mv(result, 0); -+ bind(done); -+} ++ virtual void call_VM_leaf_base( ++ address entry_point, // the entry point ++ int number_of_arguments, // the number of arguments to pop after the call ++ Label& retaddr) { ++ call_VM_leaf_base(entry_point, number_of_arguments, &retaddr); ++ } + -+#define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE) \ -+void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \ -+ prev = prev->is_valid() ? prev : zr; \ -+ if (incr.is_register()) { \ -+ AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ -+ } else { \ -+ mv(t0, incr.as_constant()); \ -+ AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ -+ } \ -+ return; \ -+} ++ virtual void call_VM_base( // returns the register containing the thread upon return ++ Register oop_result, // where an oop-result ends up if any; use noreg otherwise ++ Register java_thread, // the thread if computed before ; use noreg otherwise ++ Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise ++ address entry_point, // the entry point ++ int number_of_arguments, // the number of arguments (w/o thread) to pop after the call ++ bool check_exceptions // whether to check for pending exceptions after return ++ ); + -+ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed) -+ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed) -+ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl) -+ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl) ++ void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions); + -+#undef ATOMIC_OP ++ virtual void check_and_handle_earlyret(Register java_thread); ++ virtual void check_and_handle_popframe(Register java_thread); + -+#define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE) \ -+void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \ -+ prev = prev->is_valid() ? prev : zr; \ -+ AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ -+ return; \ -+} ++ void resolve_oop_handle(Register result, Register tmp = x15); ++ void resolve_jobject(Register value, Register thread, Register tmp); + -+ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed) -+ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed) -+ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl) -+ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl) ++ void movoop(Register dst, jobject obj, bool immediate = false); ++ void mov_metadata(Register dst, Metadata* obj); ++ void bang_stack_size(Register size, Register tmp); ++ void set_narrow_oop(Register dst, jobject obj); ++ void set_narrow_klass(Register dst, Klass* k); + -+#undef ATOMIC_XCHG ++ void load_mirror(Register dst, Register method, Register tmp = x15); ++ void access_load_at(BasicType type, DecoratorSet decorators, Register dst, ++ Address src, Register tmp1, Register thread_tmp); ++ void access_store_at(BasicType type, DecoratorSet decorators, Address dst, ++ Register src, Register tmp1, Register thread_tmp); ++ void load_klass(Register dst, Register src); ++ void store_klass(Register dst, Register src); ++ void cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L); + -+#define ATOMIC_XCHGU(OP1, OP2) \ -+void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \ -+ atomic_##OP2(prev, newv, addr); \ -+ zero_extend(prev, prev, 32); \ -+ return; \ -+} ++ void encode_klass_not_null(Register r); ++ void decode_klass_not_null(Register r); ++ void encode_klass_not_null(Register dst, Register src, Register tmp = xheapbase); ++ void decode_klass_not_null(Register dst, Register src, Register tmp = xheapbase); ++ void decode_heap_oop_not_null(Register r); ++ void decode_heap_oop_not_null(Register dst, Register src); ++ void decode_heap_oop(Register d, Register s); ++ void decode_heap_oop(Register r) { decode_heap_oop(r, r); } ++ void encode_heap_oop(Register d, Register s); ++ void encode_heap_oop(Register r) { encode_heap_oop(r, r); }; ++ void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, ++ Register thread_tmp = noreg, DecoratorSet decorators = 0); ++ void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, ++ Register thread_tmp = noreg, DecoratorSet decorators = 0); ++ void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, ++ Register thread_tmp = noreg, DecoratorSet decorators = 0); + -+ATOMIC_XCHGU(xchgwu, xchgw) -+ATOMIC_XCHGU(xchgalwu, xchgalw) ++ void store_klass_gap(Register dst, Register src); + -+#undef ATOMIC_XCHGU ++ // currently unimplemented ++ // Used for storing NULL. All other oop constants should be ++ // stored using routines that take a jobject. ++ void store_heap_oop_null(Address dst); + -+void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) { -+ assert(ReservedCodeCacheSize < 4*G, "branch out of range"); -+ assert(CodeCache::find_blob(entry.target()) != NULL, -+ "destination of far call not found in code cache"); -+ int32_t offset = 0; -+ if (far_branches()) { -+ // We can use auipc + jalr here because we know that the total size of -+ // the code cache cannot exceed 2Gb. -+ la_patchable(tmp, entry, offset); -+ if (cbuf != NULL) { cbuf->set_insts_mark(); } -+ jalr(x0, tmp, offset); -+ } else { -+ if (cbuf != NULL) { cbuf->set_insts_mark(); } -+ j(entry); -+ } -+} ++ void load_prototype_header(Register dst, Register src); + -+void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) { -+ assert(ReservedCodeCacheSize < 4*G, "branch out of range"); -+ assert(CodeCache::find_blob(entry.target()) != NULL, -+ "destination of far call not found in code cache"); -+ int32_t offset = 0; -+ if (far_branches()) { -+ // We can use auipc + jalr here because we know that the total size of -+ // the code cache cannot exceed 2Gb. -+ la_patchable(tmp, entry, offset); -+ if (cbuf != NULL) { cbuf->set_insts_mark(); } -+ jalr(x1, tmp, offset); // link -+ } else { -+ if (cbuf != NULL) { cbuf->set_insts_mark(); } -+ jal(entry); // link -+ } -+} ++ // This dummy is to prevent a call to store_heap_oop from ++ // converting a zero (linke NULL) into a Register by giving ++ // the compiler two choices it can't resolve + -+void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, -+ Register super_klass, -+ Register tmp_reg, -+ Label* L_success, -+ Label* L_failure, -+ Label* L_slow_path, -+ Register super_check_offset) { -+ assert_different_registers(sub_klass, super_klass, tmp_reg); -+ bool must_load_sco = (super_check_offset == noreg); -+ if (must_load_sco) { -+ assert(tmp_reg != noreg, "supply either a temp or a register offset"); -+ } else { -+ assert_different_registers(sub_klass, super_klass, super_check_offset); -+ } ++ void store_heap_oop(Address dst, void* dummy); + -+ Label L_fallthrough; -+ int label_nulls = 0; -+ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } -+ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } -+ if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } -+ assert(label_nulls <= 1, "at most one NULL in batch"); ++ // Support for NULL-checks ++ // ++ // Generates code that causes a NULL OS exception if the content of reg is NULL. ++ // If the accessed location is M[reg + offset] and the offset is known, provide the ++ // offset. No explicit code generateion is needed if the offset is within a certain ++ // range (0 <= offset <= page_size). + -+ int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); -+ int sco_offset = in_bytes(Klass::super_check_offset_offset()); -+ Address super_check_offset_addr(super_klass, sco_offset); ++ virtual void null_check(Register reg, int offset = -1); ++ static bool needs_explicit_null_check(intptr_t offset); ++ static bool uses_implicit_null_check(void* address); + -+ // Hacked jmp, which may only be used just before L_fallthrough. -+#define final_jmp(label) \ -+ if (&(label) == &L_fallthrough) { /*do nothing*/ } \ -+ else j(label) /*omit semi*/ ++ // idiv variant which deals with MINLONG as dividend and -1 as divisor ++ int corrected_idivl(Register result, Register rs1, Register rs2, ++ bool want_remainder); ++ int corrected_idivq(Register result, Register rs1, Register rs2, ++ bool want_remainder); + -+ // If the pointers are equal, we are done (e.g., String[] elements). -+ // This self-check enables sharing of secondary supertype arrays among -+ // non-primary types such as array-of-interface. Otherwise, each such -+ // type would need its own customized SSA. -+ // We move this check to the front fo the fast path because many -+ // type checks are in fact trivially successful in this manner, -+ // so we get a nicely predicted branch right at the start of the check. -+ beq(sub_klass, super_klass, *L_success); ++ // interface method calling ++ void lookup_interface_method(Register recv_klass, ++ Register intf_klass, ++ RegisterOrConstant itable_index, ++ Register method_result, ++ Register scan_tmp, ++ Label& no_such_interface, ++ bool return_method = true); + -+ // Check the supertype display: -+ if (must_load_sco) { -+ lwu(tmp_reg, super_check_offset_addr); -+ super_check_offset = tmp_reg; -+ } -+ add(t0, sub_klass, super_check_offset); -+ Address super_check_addr(t0); -+ ld(t0, super_check_addr); // load displayed supertype ++ // virtual method calling ++ // n.n. x86 allows RegisterOrConstant for vtable_index ++ void lookup_virtual_method(Register recv_klass, ++ RegisterOrConstant vtable_index, ++ Register method_result); + -+ // Ths check has worked decisively for primary supers. -+ // Secondary supers are sought in the super_cache ('super_cache_addr'). -+ // (Secondary supers are interfaces and very deeply nested subtypes.) -+ // This works in the same check above because of a tricky aliasing -+ // between the super_Cache and the primary super dispaly elements. -+ // (The 'super_check_addr' can address either, as the case requires.) -+ // Note that the cache is updated below if it does not help us find -+ // what we need immediately. -+ // So if it was a primary super, we can just fail immediately. -+ // Otherwise, it's the slow path for us (no success at this point). ++ // Form an addres from base + offset in Rd. Rd my or may not ++ // actually be used: you must use the Address that is returned. It ++ // is up to you to ensure that the shift provided mathces the size ++ // of your data. ++ Address form_address(Register Rd, Register base, long byte_offset); + -+ beq(super_klass, t0, *L_success); -+ mv(t1, sc_offset); -+ if (L_failure == &L_fallthrough) { -+ beq(super_check_offset, t1, *L_slow_path); -+ } else { -+ bne(super_check_offset, t1, *L_failure, /* is_far */ true); -+ final_jmp(*L_slow_path); -+ } ++ // allocation ++ void tlab_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register tmp1, // temp register ++ Register tmp2, // temp register ++ Label& slow_case, // continuation point of fast allocation fails ++ bool is_far = false ++ ); + -+ bind(L_fallthrough); ++ void eden_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register tmp, // temp register ++ Label& slow_case, // continuation point if fast allocation fails ++ bool is_far = false ++ ); + -+#undef final_jmp -+} ++ // Test sub_klass against super_klass, with fast and slow paths. + -+// Scans count pointer sized words at [addr] for occurence of value, -+// generic -+void MacroAssembler::repne_scan(Register addr, Register value, Register count, -+ Register tmp) { -+ Label Lloop, Lexit; -+ beqz(count, Lexit); -+ bind(Lloop); -+ ld(tmp, addr); -+ beq(value, tmp, Lexit); -+ add(addr, addr, wordSize); -+ sub(count, count, 1); -+ bnez(count, Lloop); -+ bind(Lexit); -+} ++ // The fast path produces a tri-state answer: yes / no / maybe-slow. ++ // One of the three labels can be NULL, meaning take the fall-through. ++ // If super_check_offset is -1, the value is loaded up from super_klass. ++ // No registers are killed, except tmp_reg ++ void check_klass_subtype_fast_path(Register sub_klass, ++ Register super_klass, ++ Register tmp_reg, ++ Label* L_success, ++ Label* L_failure, ++ Label* L_slow_path, ++ Register super_check_offset = noreg); + -+void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, -+ Register super_klass, -+ Register tmp1_reg, -+ Register tmp2_reg, -+ Label* L_success, -+ Label* L_failure) { -+ assert_different_registers(sub_klass, super_klass, tmp1_reg); -+ if (tmp2_reg != noreg) { -+ assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0); -+ } -+#define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg) ++ // The reset of the type cehck; must be wired to a corresponding fast path. ++ // It does not repeat the fast path logic, so don't use it standalone. ++ // The tmp1_reg and tmp2_reg can be noreg, if no temps are avaliable. ++ // Updates the sub's secondary super cache as necessary. ++ void check_klass_subtype_slow_path(Register sub_klass, ++ Register super_klass, ++ Register tmp1_reg, ++ Register tmp2_reg, ++ Label* L_success, ++ Label* L_failure); + -+ Label L_fallthrough; -+ int label_nulls = 0; -+ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } -+ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } ++ void check_klass_subtype(Register sub_klass, ++ Register super_klass, ++ Register tmp_reg, ++ Label& L_success); + -+ assert(label_nulls <= 1, "at most one NULL in the batch"); ++ Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); + -+ // A couple of usefule fields in sub_klass: -+ int ss_offset = in_bytes(Klass::secondary_supers_offset()); -+ int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); -+ Address secondary_supers_addr(sub_klass, ss_offset); -+ Address super_cache_addr( sub_klass, sc_offset); ++ // only if +VerifyOops ++ void verify_oop(Register reg, const char* s = "broken oop"); ++ void verify_oop_addr(Address addr, const char* s = "broken oop addr"); + -+ BLOCK_COMMENT("check_klass_subtype_slow_path"); ++ void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {} ++ void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {} + -+ // Do a linear scan of the secondary super-klass chain. -+ // This code is rarely used, so simplicity is a virtue here. -+ // The repne_scan instruction uses fixed registers, which we must spill. -+ // Don't worry too much about pre-existing connecitons with the input regs. ++#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) ++#define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) + -+ assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super) -+ assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter) ++ // A more convenient access to fence for our purposes ++ // We used four bit to indicate the read and write bits in the predecessors and successors, ++ // and extended i for r, o for w if UseConservativeFence enabled. ++ enum Membar_mask_bits { ++ StoreStore = 0b0101, // (pred = ow + succ = ow) ++ LoadStore = 0b1001, // (pred = ir + succ = ow) ++ StoreLoad = 0b0110, // (pred = ow + succ = ir) ++ LoadLoad = 0b1010, // (pred = ir + succ = ir) ++ AnyAny = LoadStore | StoreLoad // (pred = iorw + succ = iorw) ++ }; + -+ RegSet pushed_registers; -+ if (!IS_A_TEMP(x12)) { -+ pushed_registers += x12; -+ } -+ if (!IS_A_TEMP(x15)) { -+ pushed_registers += x15; -+ } ++ void membar(uint32_t order_constraint); + -+ if (super_klass != x10 || UseCompressedOops) { -+ if (!IS_A_TEMP(x10)) { -+ pushed_registers += x10; ++ static void membar_mask_to_pred_succ(uint32_t order_constraint, uint32_t& predecessor, uint32_t& successor) { ++ predecessor = (order_constraint >> 2) & 0x3; ++ successor = order_constraint & 0x3; ++ ++ // extend rw -> iorw: ++ // 01(w) -> 0101(ow) ++ // 10(r) -> 1010(ir) ++ // 11(rw)-> 1111(iorw) ++ if (UseConservativeFence) { ++ predecessor |= predecessor << 2; ++ successor |= successor << 2; + } + } + -+ push_reg(pushed_registers, sp); ++ static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) { ++ return ((predecessor & 0x3) << 2) | (successor & 0x3); ++ } + -+ // Get super_klass value into x10 (even if it was in x15 or x12) -+ mv(x10, super_klass); ++ // prints msg, dumps registers and stops execution ++ void stop(const char* msg); + -+#ifndef PRODUCT -+ mv(t1, (address)&SharedRuntime::_partial_subtype_ctr); -+ Address pst_counter_addr(t1); -+ ld(t0, pst_counter_addr); -+ add(t0, t0, 1); -+ sd(t0, pst_counter_addr); -+#endif // PRODUCT ++ static void debug64(char* msg, int64_t pc, int64_t regs[]); + -+ // We will consult the secondary-super array. -+ ld(x15, secondary_supers_addr); -+ // Load the array length. -+ lwu(x12, Address(x15, Array::length_offset_in_bytes())); -+ // Skip to start of data. -+ add(x15, x15, Array::base_offset_in_bytes()); ++ void unimplemented(const char* what = ""); + -+ // Set t0 to an obvious invalid value, falling through by default -+ li(t0, -1); -+ // Scan X12 words at [X15] for an occurrence of X10. -+ repne_scan(x15, x10, x12, t0); ++ void should_not_reach_here() { stop("should not reach here"); } + -+ // pop will restore x10, so we should use a temp register to keep its value -+ mv(t1, x10); ++ static address target_addr_for_insn(address insn_addr); + -+ // Unspill the temp registers: -+ pop_reg(pushed_registers, sp); ++ // Required platform-specific helpers for Label::patch_instructions. ++ // They _shadow_ the declarations in AbstractAssembler, which are undefined. ++ static int pd_patch_instruction_size(address branch, address target); ++ static void pd_patch_instruction(address branch, address target, const char* file = NULL, int line = 0) { ++ pd_patch_instruction_size(branch, target); ++ } ++ static address pd_call_destination(address branch) { ++ return target_addr_for_insn(branch); ++ } + -+ bne(t1, t0, *L_failure); ++ static int patch_oop(address insn_addr, address o); ++ address emit_trampoline_stub(int insts_call_instruction_offset, address target); ++ void emit_static_call_stub(); + -+ // Success. Cache the super we found an proceed in triumph. -+ sd(super_klass, super_cache_addr); ++ // The following 4 methods return the offset of the appropriate move instruction + -+ if (L_success != &L_fallthrough) { -+ j(*L_success); -+ } ++ // Support for fast byte/short loading with zero extension (depending on particular CPU) ++ int load_unsigned_byte(Register dst, Address src); ++ int load_unsigned_short(Register dst, Address src); + -+#undef IS_A_TEMP ++ // Support for fast byte/short loading with sign extension (depending on particular CPU) ++ int load_signed_byte(Register dst, Address src); ++ int load_signed_short(Register dst, Address src); + -+ bind(L_fallthrough); -+} ++ // Load and store values by size and signed-ness ++ void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); ++ void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); + -+// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes. -+void MacroAssembler::tlab_allocate(Register obj, -+ Register var_size_in_bytes, -+ int con_size_in_bytes, -+ Register tmp1, -+ Register tmp2, -+ Label& slow_case, -+ bool is_far) { -+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far); -+} ++ public: ++ // Standard pseudoinstruction ++ void nop(); ++ void mv(Register Rd, Register Rs); ++ void notr(Register Rd, Register Rs); ++ void neg(Register Rd, Register Rs); ++ void negw(Register Rd, Register Rs); ++ void sext_w(Register Rd, Register Rs); ++ void zext_b(Register Rd, Register Rs); ++ void seqz(Register Rd, Register Rs); // set if = zero ++ void snez(Register Rd, Register Rs); // set if != zero ++ void sltz(Register Rd, Register Rs); // set if < zero ++ void sgtz(Register Rd, Register Rs); // set if > zero + -+// Defines obj, preserves var_size_in_bytes -+void MacroAssembler::eden_allocate(Register obj, -+ Register var_size_in_bytes, -+ int con_size_in_bytes, -+ Register tmp, -+ Label& slow_case, -+ bool is_far) { -+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp, slow_case, is_far); -+} ++ // Float pseudoinstruction ++ void fmv_s(FloatRegister Rd, FloatRegister Rs); ++ void fabs_s(FloatRegister Rd, FloatRegister Rs); // single-precision absolute value ++ void fneg_s(FloatRegister Rd, FloatRegister Rs); + ++ // Double pseudoinstruction ++ void fmv_d(FloatRegister Rd, FloatRegister Rs); ++ void fabs_d(FloatRegister Rd, FloatRegister Rs); ++ void fneg_d(FloatRegister Rd, FloatRegister Rs); + -+// get_thread() can be called anywhere inside generated code so we -+// need to save whatever non-callee save context might get clobbered -+// by the call to Thread::current() or, indeed, the call setup code. -+void MacroAssembler::get_thread(Register thread) { -+ // save all call-clobbered regs except thread -+ RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) + -+ RegSet::range(x28, x31) + ra - thread; -+ push_reg(saved_regs, sp); ++ // Pseudoinstruction for control and status register ++ void rdinstret(Register Rd); // read instruction-retired counter ++ void rdcycle(Register Rd); // read cycle counter ++ void rdtime(Register Rd); // read time ++ void csrr(Register Rd, unsigned csr); // read csr ++ void csrw(unsigned csr, Register Rs); // write csr ++ void csrs(unsigned csr, Register Rs); // set bits in csr ++ void csrc(unsigned csr, Register Rs); // clear bits in csr ++ void csrwi(unsigned csr, unsigned imm); ++ void csrsi(unsigned csr, unsigned imm); ++ void csrci(unsigned csr, unsigned imm); ++ void frcsr(Register Rd); // read float-point csr ++ void fscsr(Register Rd, Register Rs); // swap float-point csr ++ void fscsr(Register Rs); // write float-point csr ++ void frrm(Register Rd); // read float-point rounding mode ++ void fsrm(Register Rd, Register Rs); // swap float-point rounding mode ++ void fsrm(Register Rs); // write float-point rounding mode ++ void fsrmi(Register Rd, unsigned imm); ++ void fsrmi(unsigned imm); ++ void frflags(Register Rd); // read float-point exception flags ++ void fsflags(Register Rd, Register Rs); // swap float-point exception flags ++ void fsflags(Register Rs); // write float-point exception flags ++ void fsflagsi(Register Rd, unsigned imm); ++ void fsflagsi(unsigned imm); + -+ int32_t offset = 0; -+ movptr_with_offset(ra, CAST_FROM_FN_PTR(address, Thread::current), offset); -+ jalr(ra, ra, offset); -+ if (thread != x10) { -+ mv(thread, x10); -+ } ++ void beqz(Register Rs, const address &dest); ++ void bnez(Register Rs, const address &dest); ++ void blez(Register Rs, const address &dest); ++ void bgez(Register Rs, const address &dest); ++ void bltz(Register Rs, const address &dest); ++ void bgtz(Register Rs, const address &dest); ++ void la(Register Rd, Label &label); ++ void la(Register Rd, const address &dest); ++ void la(Register Rd, const Address &adr); ++ //label ++ void beqz(Register Rs, Label &l, bool is_far = false); ++ void bnez(Register Rs, Label &l, bool is_far = false); ++ void blez(Register Rs, Label &l, bool is_far = false); ++ void bgez(Register Rs, Label &l, bool is_far = false); ++ void bltz(Register Rs, Label &l, bool is_far = false); ++ void bgtz(Register Rs, Label &l, bool is_far = false); ++ void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); + -+ // restore pushed registers -+ pop_reg(saved_regs, sp); -+} ++ void push_reg(RegSet regs, Register stack) { if (regs.bits()) { push_reg(regs.bits(), stack); } } ++ void pop_reg(RegSet regs, Register stack) { if (regs.bits()) { pop_reg(regs.bits(), stack); } } ++ void push_reg(Register Rs); ++ void pop_reg(Register Rd); ++ int push_reg(unsigned int bitset, Register stack); ++ int pop_reg(unsigned int bitset, Register stack); + -+void MacroAssembler::load_byte_map_base(Register reg) { -+ CardTable::CardValue* byte_map_base = -+ ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); -+ li(reg, (uint64_t)byte_map_base); -+} ++ // Push and pop everything that might be clobbered by a native ++ // runtime call except t0 and t1. (They are always ++ // temporary registers, so we don't have to protect them.) ++ // Additional registers can be excluded in a passed RegSet. ++ void push_call_clobbered_registers_except(RegSet exclude); ++ void pop_call_clobbered_registers_except(RegSet exclude); + -+void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) { -+ relocInfo::relocType rtype = dest.rspec().reloc()->type(); -+ unsigned long low_address = (uintptr_t)CodeCache::low_bound(); -+ unsigned long high_address = (uintptr_t)CodeCache::high_bound(); -+ unsigned long dest_address = (uintptr_t)dest.target(); -+ long offset_low = dest_address - low_address; -+ long offset_high = dest_address - high_address; ++ void push_call_clobbered_registers() { ++ push_call_clobbered_registers_except(RegSet()); ++ } ++ void pop_call_clobbered_registers() { ++ pop_call_clobbered_registers_except(RegSet()); ++ } + -+ assert(is_valid_riscv64_address(dest.target()), "bad address"); -+ assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address"); ++ void pusha(); ++ void popa(); ++ void push_CPU_state(); ++ void pop_CPU_state(); + -+ InstructionMark im(this); -+ code_section()->relocate(inst_mark(), dest.rspec()); -+ // RISC-V doesn't compute a page-aligned address, in order to partially -+ // compensate for the use of *signed* offsets in its base+disp12 -+ // addressing mode (RISC-V's PC-relative reach remains asymmetric -+ // [-(2G + 2K), 2G - 2k). -+ if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) { -+ int64_t distance = dest.target() - pc(); -+ auipc(reg1, (int32_t)distance + 0x800); -+ offset = ((int32_t)distance << 20) >> 20; -+ } else { -+ movptr_with_offset(reg1, dest.target(), offset); ++ // if heap base register is used - reinit it with the correct value ++ void reinit_heapbase(); ++ ++ void bind(Label& L) { ++ Assembler::bind(L); ++ // fences across basic blocks should not be merged ++ code()->clear_last_insn(); + } -+} + -+void MacroAssembler::build_frame(int framesize) { -+ assert(framesize >= 2, "framesize must include space for FP/RA"); -+ assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); -+ sub(sp, sp, framesize); -+ sd(fp, Address(sp, framesize - 2 * wordSize)); -+ sd(ra, Address(sp, framesize - wordSize)); -+ if (PreserveFramePointer) { add(fp, sp, framesize); } -+ verify_cross_modify_fence_not_required(); -+} ++ // mv ++ inline void mv(Register Rd, int imm64) { li(Rd, (int64_t)imm64); } ++ inline void mv(Register Rd, long imm64) { li(Rd, (int64_t)imm64); } ++ inline void mv(Register Rd, long long imm64) { li(Rd, (int64_t)imm64); } ++ inline void mv(Register Rd, unsigned int imm64) { li(Rd, (int64_t)imm64); } ++ inline void mv(Register Rd, unsigned long imm64) { li(Rd, (int64_t)imm64); } ++ inline void mv(Register Rd, unsigned long long imm64) { li(Rd, (int64_t)imm64); } + -+void MacroAssembler::remove_frame(int framesize) { -+ assert(framesize >= 2, "framesize must include space for FP/RA"); -+ assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); -+ ld(fp, Address(sp, framesize - 2 * wordSize)); -+ ld(ra, Address(sp, framesize - wordSize)); -+ add(sp, sp, framesize); -+} ++ inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); } + -+void MacroAssembler::reserved_stack_check() { -+ // testing if reserved zone needs to be enabled -+ Label no_reserved_zone_enabling; ++ void mv(Register Rd, Address dest); ++ void mv(Register Rd, address dest); ++ void mv(Register Rd, RegisterOrConstant src); + -+ ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset())); -+ bltu(sp, t0, no_reserved_zone_enabling); ++ // logic ++ void andrw(Register Rd, Register Rs1, Register Rs2); ++ void orrw(Register Rd, Register Rs1, Register Rs2); ++ void xorrw(Register Rd, Register Rs1, Register Rs2); + -+ enter(); // RA and FP are live. -+ mv(c_rarg0, xthread); -+ int32_t offset = 0; -+ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)), offset); -+ jalr(x1, t0, offset); -+ leave(); ++ // revb ++ void revb_h_h(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, sign-extend ++ void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in lower word, sign-extend ++ void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, zero-extend ++ void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in halfwords in lower 32 bits, zero-extend ++ void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in upper 16 bits (48:63) and move to lower ++ void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each halfword ++ void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each word ++ void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in doubleword + -+ // We have already removed our own frame. -+ // throw_delayed_StackOverflowError will think that it's been -+ // called by our caller. -+ offset = 0; -+ la_patchable(t0, RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()), offset); -+ jalr(x0, t0, offset); -+ should_not_reach_here(); ++ void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0); ++ void andi(Register Rd, Register Rn, int64_t imm, Register tmp = t0); ++ void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1); + -+ bind(no_reserved_zone_enabling); -+} ++ void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail); ++ void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail); ++ void cmpxchg(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, ++ Register result, bool result_as_bool = false); ++ void cmpxchg_weak(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, ++ Register result); ++ void cmpxchg_narrow_value_helper(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Register tmp1, Register tmp2, Register tmp3); ++ void cmpxchg_narrow_value(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, ++ Register result, bool result_as_bool, ++ Register tmp1, Register tmp2, Register tmp3); ++ void weak_cmpxchg_narrow_value(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, ++ Register result, ++ Register tmp1, Register tmp2, Register tmp3); + -+// Move the address of the polling page into dest. -+void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) { -+ ld(dest, Address(xthread, JavaThread::polling_page_offset())); -+} ++ void atomic_add(Register prev, RegisterOrConstant incr, Register addr); ++ void atomic_addw(Register prev, RegisterOrConstant incr, Register addr); ++ void atomic_addal(Register prev, RegisterOrConstant incr, Register addr); ++ void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr); + -+// Read the polling page. The address of the polling page must -+// already be in r. -+address MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) { -+ address mark; -+ { -+ InstructionMark im(this); -+ code_section()->relocate(inst_mark(), rtype); -+ lwu(zr, Address(r, offset)); -+ mark = inst_mark(); -+ } -+ verify_cross_modify_fence_not_required(); -+ return mark; -+} ++ void atomic_xchg(Register prev, Register newv, Register addr); ++ void atomic_xchgw(Register prev, Register newv, Register addr); ++ void atomic_xchgal(Register prev, Register newv, Register addr); ++ void atomic_xchgalw(Register prev, Register newv, Register addr); ++ void atomic_xchgwu(Register prev, Register newv, Register addr); ++ void atomic_xchgalwu(Register prev, Register newv, Register addr); + -+void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { -+#ifdef ASSERT -+ { -+ ThreadInVMfromUnknown tiv; -+ assert (UseCompressedOops, "should only be used for compressed oops"); -+ assert (Universe::heap() != NULL, "java heap should be initialized"); -+ assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); -+ assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); ++ static bool far_branches() { ++ return ReservedCodeCacheSize > branch_range; + } -+#endif -+ int oop_index = oop_recorder()->find_index(obj); -+ InstructionMark im(this); -+ RelocationHolder rspec = oop_Relocation::spec(oop_index); -+ code_section()->relocate(inst_mark(), rspec); -+ li32(dst, 0xDEADBEEF); -+ zero_extend(dst, dst, 32); -+} -+ -+void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { -+ assert (UseCompressedClassPointers, "should only be used for compressed headers"); -+ assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); -+ int index = oop_recorder()->find_index(k); -+ assert(!Universe::heap()->is_in(k), "should not be an oop"); -+ -+ InstructionMark im(this); -+ RelocationHolder rspec = metadata_Relocation::spec(index); -+ code_section()->relocate(inst_mark(), rspec); -+ narrowKlass nk = CompressedKlassPointers::encode(k); -+ li32(dst, nk); -+ zero_extend(dst, dst, 32); -+} + -+// Maybe emit a call via a trampoline. If the code cache is small -+// trampolines won't be emitted. -+address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) { -+ assert(JavaThread::current()->is_Compiler_thread(), "just checking"); -+ assert(entry.rspec().type() == relocInfo::runtime_call_type || -+ entry.rspec().type() == relocInfo::opt_virtual_call_type || -+ entry.rspec().type() == relocInfo::static_call_type || -+ entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); ++ // Jumps that can reach anywhere in the code cache. ++ // Trashes tmp. ++ void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0); ++ void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0); + -+ // We need a trampoline if branches are far. -+ if (far_branches()) { -+ bool in_scratch_emit_size = false; -+#ifdef COMPILER2 -+ // We don't want to emit a trampoline if C2 is generating dummy -+ // code during its branch shortening phase. -+ CompileTask* task = ciEnv::current()->task(); -+ in_scratch_emit_size = -+ (task != NULL && is_c2_compile(task->comp_level()) && -+ Compile::current()->output()->in_scratch_emit_size()); -+#endif -+ if (!in_scratch_emit_size) { -+ address stub = emit_trampoline_stub(offset(), entry.target()); -+ if (stub == NULL) { -+ postcond(pc() == badAddress); -+ return NULL; // CodeCache is full -+ } ++ static int far_branch_size() { ++ if (far_branches()) { ++ return 2 * 4; // auipc + jalr, see far_call() & far_jump() ++ } else { ++ return 4; + } + } + -+ if (cbuf != NULL) { cbuf->set_insts_mark(); } -+ relocate(entry.rspec()); -+ if (!far_branches()) { -+ jal(entry.target()); -+ } else { -+ jal(pc()); -+ } -+ // just need to return a non-null address -+ postcond(pc() != badAddress); -+ return pc(); -+} ++ void load_byte_map_base(Register reg); + -+address MacroAssembler::ic_call(address entry, jint method_index) { -+ RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); -+ movptr(t1, (address)Universe::non_oop_word()); -+ assert_cond(entry != NULL); -+ return trampoline_call(Address(entry, rh)); -+} ++ void bang_stack_with_offset(int offset) { ++ // stack grows down, caller passes positive offset ++ assert(offset > 0, "must bang with negative offset"); ++ sub(t0, sp, offset); ++ sd(zr, Address(t0)); ++ } + -+// Emit a trampoline stub for a call to a target which is too far away. -+// -+// code sequences: -+// -+// call-site: -+// branch-and-link to or -+// -+// Related trampoline stub for this call site in the stub section: -+// load the call target from the constant pool -+// branch (RA still points to the call site above) ++ void la_patchable(Register reg1, const Address &dest, int32_t &offset); + -+address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, -+ address dest) { -+ address stub = start_a_stub(NativeInstruction::instruction_size -+ + NativeCallTrampolineStub::instruction_size); -+ if (stub == NULL) { -+ return NULL; // CodeBuffer::expand failed ++ virtual void _call_Unimplemented(address call_site) { ++ mv(t1, call_site); + } + -+ // Create a trampoline stub relocation which relates this trampoline stub -+ // with the call instruction at insts_call_instruction_offset in the -+ // instructions code-section. -+ -+ // make sure 4 byte aligned here, so that the destination address would be -+ // 8 byte aligned after 3 intructions -+ // when we reach here we may get a 2-byte alignment so need to align it -+ align(wordSize, NativeCallTrampolineStub::data_offset); ++ #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__) + -+ relocate(trampoline_stub_Relocation::spec(code()->insts()->start() + -+ insts_call_instruction_offset)); -+ const int stub_start_offset = offset(); ++ // Frame creation and destruction shared between JITs. ++ void build_frame(int framesize); ++ void remove_frame(int framesize); + -+ // Now, create the trampoline stub's code: -+ // - load the call -+ // - call -+ Label target; -+ ld(t0, target); // auipc + ld -+ jr(t0); // jalr -+ bind(target); -+ assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset, -+ "should be"); -+ assert(offset() % wordSize == 0, "bad alignment"); -+ emit_int64((intptr_t)dest); ++ void reserved_stack_check(); + -+ const address stub_start_addr = addr_at(stub_start_offset); ++ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset); + -+ assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline"); ++ void get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype); ++ void read_polling_page(Register r, address page, relocInfo::relocType rtype); ++ void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype); + -+ end_a_stub(); -+ return stub_start_addr; -+} ++ address trampoline_call(Address entry, CodeBuffer* cbuf = NULL); ++ address ic_call(address entry, jint method_index = 0); + -+Address MacroAssembler::add_memory_helper(const Address dst) { -+ switch (dst.getMode()) { -+ case Address::base_plus_offset: -+ // This is the expected mode, although we allow all the other -+ // forms below. -+ return form_address(t1, dst.base(), dst.offset()); -+ default: -+ la(t1, dst); -+ return Address(t1); -+ } -+} ++ void add_memory_int64(const Address dst, int64_t imm); ++ void add_memory_int32(const Address dst, int32_t imm); + -+void MacroAssembler::add_memory_int64(const Address dst, int64_t imm) { -+ Address adr = add_memory_helper(dst); -+ assert_different_registers(adr.base(), t0); -+ ld(t0, adr); -+ addi(t0, t0, imm); -+ sd(t0, adr); -+} ++ void cmpptr(Register src1, Address src2, Label& equal); + -+void MacroAssembler::add_memory_int32(const Address dst, int32_t imm) { -+ Address adr = add_memory_helper(dst); -+ assert_different_registers(adr.base(), t0); -+ lwu(t0, adr); -+ addiw(t0, t0, imm); -+ sw(t0, adr); -+} ++ void compute_index(Register str1, Register trailing_zeros, Register match_mask, ++ Register result, Register char_tmp, Register tmp, ++ bool haystack_isL); ++ void compute_match_mask(Register src, Register pattern, Register match_mask, ++ Register mask1, Register mask2); + -+void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) { -+ assert_different_registers(src1, t0); -+ int32_t offset; -+ la_patchable(t0, src2, offset); -+ ld(t0, Address(t0, offset)); -+ beq(src1, t0, equal); -+} ++#ifdef COMPILER2 ++ void mul_add(Register out, Register in, Register offset, ++ Register len, Register k, Register tmp); ++ void cad(Register dst, Register src1, Register src2, Register carry); ++ void cadc(Register dst, Register src1, Register src2, Register carry); ++ void adc(Register dst, Register src1, Register src2, Register carry); ++ void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, ++ Register src1, Register src2, Register carry); ++ void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, ++ Register y, Register y_idx, Register z, ++ Register carry, Register product, ++ Register idx, Register kdx); ++ void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, ++ Register y, Register y_idx, Register z, ++ Register carry, Register product, ++ Register idx, Register kdx); ++ void multiply_128_x_128_loop(Register y, Register z, ++ Register carry, Register carry2, ++ Register idx, Register jdx, ++ Register yz_idx1, Register yz_idx2, ++ Register tmp, Register tmp3, Register tmp4, ++ Register tmp6, Register product_hi); ++ void multiply_to_len(Register x, Register xlen, Register y, Register ylen, ++ Register z, Register zlen, ++ Register tmp1, Register tmp2, Register tmp3, Register tmp4, ++ Register tmp5, Register tmp6, Register product_hi); ++#endif + -+void MacroAssembler::load_method_holder_cld(Register result, Register method) { -+ load_method_holder(result, method); -+ ld(result, Address(result, InstanceKlass::class_loader_data_offset())); -+} ++ void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); ++ void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); + -+void MacroAssembler::load_method_holder(Register holder, Register method) { -+ ld(holder, Address(method, Method::const_offset())); // ConstMethod* -+ ld(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool* -+ ld(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass* -+} ++ void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1); + -+// string indexof -+// compute index by trailing zeros -+void MacroAssembler::compute_index(Register haystack, Register trailing_zeros, -+ Register match_mask, Register result, -+ Register ch2, Register tmp, -+ bool haystack_isL) -+{ -+ int haystack_chr_shift = haystack_isL ? 0 : 1; -+ srl(match_mask, match_mask, trailing_zeros); -+ srli(match_mask, match_mask, 1); -+ srli(tmp, trailing_zeros, LogBitsPerByte); -+ if (!haystack_isL) andi(tmp, tmp, 0xE); -+ add(haystack, haystack, tmp); -+ ld(ch2, Address(haystack)); -+ if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift); -+ add(result, result, tmp); -+} ++ void zero_words(Register base, u_int64_t cnt); ++ address zero_words(Register ptr, Register cnt); ++ void fill_words(Register base, Register cnt, Register value); ++ void zero_memory(Register addr, Register len, Register tmp); + -+// string indexof -+// Find pattern element in src, compute match mask, -+// only the first occurrence of 0x80/0x8000 at low bits is the valid match index -+// match mask patterns and corresponding indices would be like: -+// - 0x8080808080808080 (Latin1) -+// - 7 6 5 4 3 2 1 0 (match index) -+// - 0x8000800080008000 (UTF16) -+// - 3 2 1 0 (match index) -+void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask, -+ Register mask1, Register mask2) -+{ -+ xorr(src, pattern, src); -+ sub(match_mask, src, mask1); -+ orr(src, src, mask2); -+ notr(src, src); -+ andr(match_mask, match_mask, src); -+} ++ // shift left by shamt and add ++ void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt); + -+#ifdef COMPILER2 -+// Code for BigInteger::mulAdd instrinsic -+// out = x10 -+// in = x11 -+// offset = x12 (already out.length-offset) -+// len = x13 -+// k = x14 -+// tmp = x28 -+// -+// pseudo code from java implementation: -+// long kLong = k & LONG_MASK; -+// carry = 0; -+// offset = out.length-offset - 1; -+// for (int j = len - 1; j >= 0; j--) { -+// product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry; -+// out[offset--] = (int)product; -+// carry = product >>> 32; -+// } -+// return (int)carry; -+void MacroAssembler::mul_add(Register out, Register in, Register offset, -+ Register len, Register k, Register tmp) { -+ Label L_tail_loop, L_unroll, L_end; -+ mv(tmp, out); -+ mv(out, zr); -+ blez(len, L_end); -+ zero_extend(k, k, 32); -+ slliw(t0, offset, LogBytesPerInt); -+ add(offset, tmp, t0); -+ slliw(t0, len, LogBytesPerInt); -+ add(in, in, t0); ++ // Here the float instructions with safe deal with some exceptions. ++ // e.g. convert from NaN, +Inf, -Inf to int, float, double ++ // will trigger exception, we need to deal with these situations ++ // to get correct results. ++ void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0); ++ void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0); ++ void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0); ++ void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0); + -+ const int unroll = 8; -+ li(tmp, unroll); -+ blt(len, tmp, L_tail_loop); -+ bind(L_unroll); -+ for (int i = 0; i < unroll; i++) { -+ sub(in, in, BytesPerInt); -+ lwu(t0, Address(in, 0)); -+ mul(t1, t0, k); -+ add(t0, t1, out); -+ sub(offset, offset, BytesPerInt); -+ lwu(t1, Address(offset, 0)); -+ add(t0, t0, t1); -+ sw(t0, Address(offset, 0)); -+ srli(out, t0, 32); ++ // vector load/store unit-stride instructions ++ void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) { ++ switch (sew) { ++ case Assembler::e64: ++ vle64_v(vd, base, vm); ++ break; ++ case Assembler::e32: ++ vle32_v(vd, base, vm); ++ break; ++ case Assembler::e16: ++ vle16_v(vd, base, vm); ++ break; ++ case Assembler::e8: // fall through ++ default: ++ vle8_v(vd, base, vm); ++ break; ++ } + } -+ subw(len, len, tmp); -+ bge(len, tmp, L_unroll); + -+ bind(L_tail_loop); -+ blez(len, L_end); -+ sub(in, in, BytesPerInt); -+ lwu(t0, Address(in, 0)); -+ mul(t1, t0, k); -+ add(t0, t1, out); -+ sub(offset, offset, BytesPerInt); -+ lwu(t1, Address(offset, 0)); -+ add(t0, t0, t1); -+ sw(t0, Address(offset, 0)); -+ srli(out, t0, 32); -+ subw(len, len, 1); -+ j(L_tail_loop); ++ void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) { ++ switch (sew) { ++ case Assembler::e64: ++ vse64_v(store_data, base, vm); ++ break; ++ case Assembler::e32: ++ vse32_v(store_data, base, vm); ++ break; ++ case Assembler::e16: ++ vse16_v(store_data, base, vm); ++ break; ++ case Assembler::e8: // fall through ++ default: ++ vse8_v(store_data, base, vm); ++ break; ++ } ++ } + -+ bind(L_end); -+} ++ static const int zero_words_block_size; + -+// add two unsigned input and output carry -+void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry) -+{ -+ assert_different_registers(dst, carry); -+ assert_different_registers(dst, src2); -+ add(dst, src1, src2); -+ sltu(carry, dst, src2); -+} ++ void cast_primitive_type(BasicType type, Register Rt) { ++ switch (type) { ++ case T_BOOLEAN: ++ sltu(Rt, zr, Rt); ++ break; ++ case T_CHAR : ++ zero_extend(Rt, Rt, 16); ++ break; ++ case T_BYTE : ++ sign_extend(Rt, Rt, 8); ++ break; ++ case T_SHORT : ++ sign_extend(Rt, Rt, 16); ++ break; ++ case T_INT : ++ addw(Rt, Rt, zr); ++ break; ++ case T_LONG : /* nothing to do */ break; ++ case T_VOID : /* nothing to do */ break; ++ case T_FLOAT : /* nothing to do */ break; ++ case T_DOUBLE : /* nothing to do */ break; ++ default: ShouldNotReachHere(); ++ } ++ } + -+// add two input with carry -+void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry) -+{ -+ assert_different_registers(dst, carry); -+ add(dst, src1, src2); -+ add(dst, dst, carry); -+} ++ // float cmp with unordered_result ++ void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result); ++ void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result); + -+// add two unsigned input with carry and output carry -+void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry) -+{ -+ assert_different_registers(dst, src2); -+ adc(dst, src1, src2, carry); -+ sltu(carry, dst, src2); -+} ++ // Zero/Sign-extend ++ void zero_extend(Register dst, Register src, int bits); ++ void sign_extend(Register dst, Register src, int bits); + -+void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, -+ Register src1, Register src2, Register carry) -+{ -+ cad(dest_lo, dest_lo, src1, carry); -+ add(dest_hi, dest_hi, carry); -+ cad(dest_lo, dest_lo, src2, carry); -+ add(final_dest_hi, dest_hi, carry); -+} ++ // compare src1 and src2 and get -1/0/1 in dst. ++ // if [src1 > src2], dst = 1; ++ // if [src1 == src2], dst = 0; ++ // if [src1 < src2], dst = -1; ++ void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0); + -+/** -+ * Multiply 32 bit by 32 bit first loop. -+ */ -+void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, -+ Register y, Register y_idx, Register z, -+ Register carry, Register product, -+ Register idx, Register kdx) -+{ -+ // jlong carry, x[], y[], z[]; -+ // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { -+ // long product = y[idx] * x[xstart] + carry; -+ // z[kdx] = (int)product; -+ // carry = product >>> 32; -+ // } -+ // z[xstart] = (int)carry; ++ int push_fp(unsigned int bitset, Register stack); ++ int pop_fp(unsigned int bitset, Register stack); + -+ Label L_first_loop, L_first_loop_exit; -+ blez(idx, L_first_loop_exit); ++ // vext ++ void vmnot_m(VectorRegister vd, VectorRegister vs); ++ void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked); ++ void vfneg_v(VectorRegister vd, VectorRegister vs); + -+ shadd(t0, xstart, x, t0, LogBytesPerInt); -+ lwu(x_xstart, Address(t0, 0)); ++private: + -+ bind(L_first_loop); -+ subw(idx, idx, 1); -+ shadd(t0, idx, y, t0, LogBytesPerInt); -+ lwu(y_idx, Address(t0, 0)); -+ mul(product, x_xstart, y_idx); -+ add(product, product, carry); -+ srli(carry, product, 32); -+ subw(kdx, kdx, 1); -+ shadd(t0, kdx, z, t0, LogBytesPerInt); -+ sw(product, Address(t0, 0)); -+ bgtz(idx, L_first_loop); ++#ifdef ASSERT ++ // Macro short-hand support to clean-up after a failed call to trampoline ++ // call generation (see trampoline_call() below), when a set of Labels must ++ // be reset (before returning). ++#define reset_labels1(L1) L1.reset() ++#define reset_labels2(L1, L2) L1.reset(); L2.reset() ++#define reset_labels3(L1, L2, L3) L1.reset(); reset_labels2(L2, L3) ++#define reset_labels5(L1, L2, L3, L4, L5) reset_labels2(L1, L2); reset_labels3(L3, L4, L5) ++#endif ++ void repne_scan(Register addr, Register value, Register count, Register tmp); + -+ bind(L_first_loop_exit); -+} ++ // Return true if an address is within the 48-bit RISCV64 address space. ++ bool is_valid_riscv64_address(address addr) { ++ // sv48: must have bits 63–48 all equal to bit 47 ++ return ((uintptr_t)addr >> 47) == 0; ++ } + -+/** -+ * Multiply 64 bit by 64 bit first loop. -+ */ -+void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, -+ Register y, Register y_idx, Register z, -+ Register carry, Register product, -+ Register idx, Register kdx) -+{ -+ // -+ // jlong carry, x[], y[], z[]; -+ // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { -+ // huge_128 product = y[idx] * x[xstart] + carry; -+ // z[kdx] = (jlong)product; -+ // carry = (jlong)(product >>> 64); -+ // } -+ // z[xstart] = carry; -+ // ++ void ld_constant(Register dest, const Address &const_addr) { ++ if (NearCpool) { ++ ld(dest, const_addr); ++ } else { ++ int32_t offset = 0; ++ la_patchable(dest, InternalAddress(const_addr.target()), offset); ++ ld(dest, Address(dest, offset)); ++ } ++ } + -+ Label L_first_loop, L_first_loop_exit; -+ Label L_one_x, L_one_y, L_multiply; ++ int bitset_to_regs(unsigned int bitset, unsigned char* regs); ++ Address add_memory_helper(const Address dst); + -+ subw(xstart, xstart, 1); -+ bltz(xstart, L_one_x); ++ void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire); ++ void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release); + -+ shadd(t0, xstart, x, t0, LogBytesPerInt); -+ ld(x_xstart, Address(t0, 0)); -+ ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian ++public: ++ void string_compare(Register str1, Register str2, ++ Register cnt1, Register cnt2, Register result, ++ Register tmp1, Register tmp2, Register tmp3, ++ int ae); + -+ bind(L_first_loop); -+ subw(idx, idx, 1); -+ bltz(idx, L_first_loop_exit); -+ subw(idx, idx, 1); -+ bltz(idx, L_one_y); ++ void string_indexof_char_short(Register str1, Register cnt1, ++ Register ch, Register result, ++ bool isL); + -+ shadd(t0, idx, y, t0, LogBytesPerInt); -+ ld(y_idx, Address(t0, 0)); -+ ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian -+ bind(L_multiply); ++ void string_indexof_char(Register str1, Register cnt1, ++ Register ch, Register result, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ bool isL); + -+ mulhu(t0, x_xstart, y_idx); -+ mul(product, x_xstart, y_idx); -+ cad(product, product, carry, t1); -+ adc(carry, t0, zr, t1); ++ void string_indexof(Register str1, Register str2, ++ Register cnt1, Register cnt2, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ Register tmp5, Register tmp6, ++ Register result, int ae); + -+ subw(kdx, kdx, 2); -+ ror_imm(product, product, 32); // back to big-endian -+ shadd(t0, kdx, z, t0, LogBytesPerInt); -+ sd(product, Address(t0, 0)); ++ void string_indexof_linearscan(Register haystack, Register needle, ++ Register haystack_len, Register needle_len, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ int needle_con_cnt, Register result, int ae); + -+ j(L_first_loop); ++ void arrays_equals(Register r1, Register r2, ++ Register tmp3, Register tmp4, ++ Register tmp5, Register tmp6, ++ Register result, Register cnt1, ++ int elem_size); + -+ bind(L_one_y); -+ lwu(y_idx, Address(y, 0)); -+ j(L_multiply); ++ void string_equals(Register r1, Register r2, ++ Register result, Register cnt1, ++ int elem_size); + -+ bind(L_one_x); -+ lwu(x_xstart, Address(x, 0)); -+ j(L_first_loop); ++ // refer to conditional_branches and float_conditional_branches ++ static const int bool_test_bits = 3; ++ static const int neg_cond_bits = 2; ++ static const int unsigned_branch_mask = 1 << bool_test_bits; ++ static const int double_branch_mask = 1 << bool_test_bits; + -+ bind(L_first_loop_exit); -+} ++ // cmp ++ void cmp_branch(int cmpFlag, ++ Register op1, Register op2, ++ Label& label, bool is_far = false); + -+/** -+ * Multiply 128 bit by 128 bit. Unrolled inner loop. -+ * -+ */ -+void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, -+ Register carry, Register carry2, -+ Register idx, Register jdx, -+ Register yz_idx1, Register yz_idx2, -+ Register tmp, Register tmp3, Register tmp4, -+ Register tmp6, Register product_hi) -+{ -+ // jlong carry, x[], y[], z[]; -+ // int kdx = xstart+1; -+ // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop -+ // huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry; -+ // jlong carry2 = (jlong)(tmp3 >>> 64); -+ // huge_128 tmp4 = (y[idx] * product_hi) + z[kdx+idx] + carry2; -+ // carry = (jlong)(tmp4 >>> 64); -+ // z[kdx+idx+1] = (jlong)tmp3; -+ // z[kdx+idx] = (jlong)tmp4; -+ // } -+ // idx += 2; -+ // if (idx > 0) { -+ // yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry; -+ // z[kdx+idx] = (jlong)yz_idx1; -+ // carry = (jlong)(yz_idx1 >>> 64); -+ // } -+ // ++ void float_cmp_branch(int cmpFlag, ++ FloatRegister op1, FloatRegister op2, ++ Label& label, bool is_far = false); + -+ Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; ++ void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op, ++ Label& L, bool is_far = false); + -+ srliw(jdx, idx, 2); ++ void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op, ++ Label& L, bool is_far = false); + -+ bind(L_third_loop); ++ void enc_cmove(int cmpFlag, ++ Register op1, Register op2, ++ Register dst, Register src); + -+ subw(jdx, jdx, 1); -+ bltz(jdx, L_third_loop_exit); -+ subw(idx, idx, 4); ++ void spill(Register r, bool is64, int offset) { ++ is64 ? sd(r, Address(sp, offset)) ++ : sw(r, Address(sp, offset)); ++ } + -+ shadd(t0, idx, y, t0, LogBytesPerInt); -+ ld(yz_idx2, Address(t0, 0)); -+ ld(yz_idx1, Address(t0, wordSize)); ++ void spill(FloatRegister f, bool is64, int offset) { ++ is64 ? fsd(f, Address(sp, offset)) ++ : fsw(f, Address(sp, offset)); ++ } + -+ shadd(tmp6, idx, z, t0, LogBytesPerInt); ++ void spill(VectorRegister v, int offset) { ++ add(t0, sp, offset); ++ vs1r_v(v, t0); ++ } + -+ ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian -+ ror_imm(yz_idx2, yz_idx2, 32); ++ void unspill(Register r, bool is64, int offset) { ++ is64 ? ld(r, Address(sp, offset)) ++ : lw(r, Address(sp, offset)); ++ } + -+ ld(t1, Address(tmp6, 0)); -+ ld(t0, Address(tmp6, wordSize)); ++ void unspillu(Register r, bool is64, int offset) { ++ is64 ? ld(r, Address(sp, offset)) ++ : lwu(r, Address(sp, offset)); ++ } + -+ mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 -+ mulhu(tmp4, product_hi, yz_idx1); ++ void unspill(FloatRegister f, bool is64, int offset) { ++ is64 ? fld(f, Address(sp, offset)) ++ : flw(f, Address(sp, offset)); ++ } + -+ ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian -+ ror_imm(t1, t1, 32, tmp); ++ void unspill(VectorRegister v, int offset) { ++ add(t0, sp, offset); ++ vl1r_v(v, t0); ++ } + -+ mul(tmp, product_hi, yz_idx2); // yz_idx2 * product_hi -> carry2:tmp -+ mulhu(carry2, product_hi, yz_idx2); ++ void minmax_FD(FloatRegister dst, ++ FloatRegister src1, FloatRegister src2, ++ bool is_double, bool is_min); + -+ cad(tmp3, tmp3, carry, carry); -+ adc(tmp4, tmp4, zr, carry); -+ cad(tmp3, tmp3, t0, t0); -+ cadc(tmp4, tmp4, tmp, t0); -+ adc(carry, carry2, zr, t0); -+ cad(tmp4, tmp4, t1, carry2); -+ adc(carry, carry, zr, carry2); ++}; + -+ ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian -+ ror_imm(tmp4, tmp4, 32); -+ sd(tmp4, Address(tmp6, 0)); -+ sd(tmp3, Address(tmp6, wordSize)); ++#ifdef ASSERT ++inline bool AbstractAssembler::pd_check_instruction_mark() { return false; } ++#endif + -+ j(L_third_loop); ++/** ++ * class SkipIfEqual: ++ * ++ * Instantiating this class will result in assembly code being output that will ++ * jump around any code emitted between the creation of the instance and it's ++ * automatic destruction at the end of a scope block, depending on the value of ++ * the flag passed to the constructor, which will be checked at run-time. ++ */ ++class SkipIfEqual { ++ private: ++ MacroAssembler* _masm; ++ Label _label; + -+ bind(L_third_loop_exit); ++ public: ++ SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); ++ ~SkipIfEqual(); ++}; + -+ andi(idx, idx, 0x3); -+ beqz(idx, L_post_third_loop_done); ++#endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp +new file mode 100644 +index 0000000000..ef968ccd96 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp +@@ -0,0 +1,31 @@ ++/* ++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ Label L_check_1; -+ subw(idx, idx, 2); -+ bltz(idx, L_check_1); ++#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP ++#define CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP + -+ shadd(t0, idx, y, t0, LogBytesPerInt); -+ ld(yz_idx1, Address(t0, 0)); -+ ror_imm(yz_idx1, yz_idx1, 32); ++// Still empty. + -+ mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 -+ mulhu(tmp4, product_hi, yz_idx1); ++#endif // CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP +diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp +new file mode 100644 +index 0000000000..fd907f77af +--- /dev/null ++++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp +@@ -0,0 +1,450 @@ ++/* ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ shadd(t0, idx, z, t0, LogBytesPerInt); -+ ld(yz_idx2, Address(t0, 0)); -+ ror_imm(yz_idx2, yz_idx2, 32, tmp); ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "classfile/javaClasses.inline.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "memory/allocation.inline.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/flags/flagSetting.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/stubRoutines.hpp" + -+ add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp); ++#define __ _masm-> + -+ ror_imm(tmp3, tmp3, 32, tmp); -+ sd(tmp3, Address(t0, 0)); ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#endif + -+ bind(L_check_1); ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + -+ andi(idx, idx, 0x1); -+ subw(idx, idx, 1); -+ bltz(idx, L_post_third_loop_done); -+ shadd(t0, idx, y, t0, LogBytesPerInt); -+ lwu(tmp4, Address(t0, 0)); -+ mul(tmp3, tmp4, product_hi); // tmp4 * product_hi -> carry2:tmp3 -+ mulhu(carry2, tmp4, product_hi); ++void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) { ++ assert_cond(_masm != NULL); ++ if (VerifyMethodHandles) { ++ verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class), ++ "MH argument is a Class"); ++ } ++ __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes())); ++} + -+ shadd(t0, idx, z, t0, LogBytesPerInt); -+ lwu(tmp4, Address(t0, 0)); ++#ifdef ASSERT ++static int check_nonzero(const char* xname, int x) { ++ assert(x != 0, "%s should be nonzero", xname); ++ return x; ++} ++#define NONZERO(x) check_nonzero(#x, x) ++#else //ASSERT ++#define NONZERO(x) (x) ++#endif //PRODUCT + -+ add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0); ++#ifdef ASSERT ++void MethodHandles::verify_klass(MacroAssembler* _masm, ++ Register obj, SystemDictionary::WKID klass_id, ++ const char* error_message) { ++ assert_cond(_masm != NULL); ++ InstanceKlass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id); ++ Klass* klass = SystemDictionary::well_known_klass(klass_id); ++ Register temp = t1; ++ Register temp2 = t0; // used by MacroAssembler::cmpptr ++ Label L_ok, L_bad; ++ BLOCK_COMMENT("verify_klass {"); ++ __ verify_oop(obj); ++ __ beqz(obj, L_bad); ++ __ push_reg(RegSet::of(temp, temp2), sp); ++ __ load_klass(temp, obj); ++ __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok); ++ intptr_t super_check_offset = klass->super_check_offset(); ++ __ ld(temp, Address(temp, super_check_offset)); ++ __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok); ++ __ pop_reg(RegSet::of(temp, temp2), sp); ++ __ bind(L_bad); ++ __ stop(error_message); ++ __ BIND(L_ok); ++ __ pop_reg(RegSet::of(temp, temp2), sp); ++ BLOCK_COMMENT("} verify_klass"); ++} + -+ shadd(t0, idx, z, t0, LogBytesPerInt); -+ sw(tmp3, Address(t0, 0)); ++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {} + -+ slli(t0, carry2, 32); -+ srli(carry, tmp3, 32); -+ orr(carry, carry, t0); ++#endif //ASSERT + -+ bind(L_post_third_loop_done); -+} ++void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, ++ bool for_compiler_entry) { ++ assert_cond(_masm != NULL); ++ assert(method == xmethod, "interpreter calling convention"); ++ Label L_no_such_method; ++ __ beqz(xmethod, L_no_such_method); ++ __ verify_method_ptr(method); + -+/** -+ * Code for BigInteger::multiplyToLen() intrinsic. -+ * -+ * x10: x -+ * x11: xlen -+ * x12: y -+ * x13: ylen -+ * x14: z -+ * x15: zlen -+ * x16: tmp1 -+ * x17: tmp2 -+ * x7: tmp3 -+ * x28: tmp4 -+ * x29: tmp5 -+ * x30: tmp6 -+ * x31: tmp7 -+ */ -+void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, -+ Register z, Register zlen, -+ Register tmp1, Register tmp2, Register tmp3, Register tmp4, -+ Register tmp5, Register tmp6, Register product_hi) -+{ -+ assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); ++ if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) { ++ Label run_compiled_code; ++ // JVMTI events, such as single-stepping, are implemented partly by avoiding running ++ // compiled code in threads for which the event is enabled. Check here for ++ // interp_only_mode if these events CAN be enabled. + -+ const Register idx = tmp1; -+ const Register kdx = tmp2; -+ const Register xstart = tmp3; ++ __ lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset())); ++ __ beqz(t0, run_compiled_code); ++ __ ld(t0, Address(method, Method::interpreter_entry_offset())); ++ __ jr(t0); ++ __ BIND(run_compiled_code); ++ } + -+ const Register y_idx = tmp4; -+ const Register carry = tmp5; -+ const Register product = xlen; -+ const Register x_xstart = zlen; // reuse register ++ const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : ++ Method::from_interpreted_offset(); ++ __ ld(t0,Address(method, entry_offset)); ++ __ jr(t0); ++ __ bind(L_no_such_method); ++ __ far_jump(RuntimeAddress(StubRoutines::throw_AbstractMethodError_entry())); ++} + -+ mv(idx, ylen); // idx = ylen; -+ mv(kdx, zlen); // kdx = xlen+ylen; -+ mv(carry, zr); // carry = 0; ++void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, ++ Register recv, Register method_temp, ++ Register temp2, ++ bool for_compiler_entry) { ++ assert_cond(_masm != NULL); ++ BLOCK_COMMENT("jump_to_lambda_form {"); ++ // This is the initial entry point of a lazy method handle. ++ // After type checking, it picks up the invoker from the LambdaForm. ++ assert_different_registers(recv, method_temp, temp2); ++ assert(recv != noreg, "required register"); ++ assert(method_temp == xmethod, "required register for loading method"); + -+ Label L_multiply_64_x_64_loop, L_done; ++ // Load the invoker, as MH -> MH.form -> LF.vmentry ++ __ verify_oop(recv); ++ __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2); ++ __ verify_oop(method_temp); ++ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2); ++ __ verify_oop(method_temp); ++ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())), temp2); ++ __ verify_oop(method_temp); ++ __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), noreg, noreg); + -+ subw(xstart, xlen, 1); -+ bltz(xstart, L_done); ++ if (VerifyMethodHandles && !for_compiler_entry) { ++ // make sure recv is already on stack ++ __ ld(temp2, Address(method_temp, Method::const_offset())); ++ __ load_sized_value(temp2, ++ Address(temp2, ConstMethod::size_of_parameters_offset()), ++ sizeof(u2), /*is_signed*/ false); ++ Label L; ++ __ ld(t0, __ argument_address(temp2, -1)); ++ __ beq(recv, t0, L); ++ __ ld(x10, __ argument_address(temp2, -1)); ++ __ ebreak(); ++ __ BIND(L); ++ } + -+ const Register jdx = tmp1; ++ jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry); ++ BLOCK_COMMENT("} jump_to_lambda_form"); ++} + -+ if (AvoidUnalignedAccesses) { -+ // Check if x and y are both 8-byte aligned. -+ orr(t0, xlen, ylen); -+ andi(t0, t0, 0x1); -+ beqz(t0, L_multiply_64_x_64_loop); ++// Code generation ++address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm, ++ vmIntrinsics::ID iid) { ++ assert_cond(_masm != NULL); ++ const bool not_for_compiler_entry = false; // this is the interpreter entry ++ assert(is_signature_polymorphic(iid), "expected invoke iid"); ++ if (iid == vmIntrinsics::_invokeGeneric || ++ iid == vmIntrinsics::_compiledLambdaForm) { ++ // Perhaps surprisingly, the symbolic references visible to Java are not directly used. ++ // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod. ++ // They all allow an appendix argument. ++ __ ebreak(); // empty stubs make SG sick ++ return NULL; ++ } + -+ multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); -+ shadd(t0, xstart, z, t0, LogBytesPerInt); -+ sw(carry, Address(t0, 0)); ++ // x30: sender SP (must preserve; see prepare_to_jump_from_interpreted) ++ // xmethod: Method* ++ // x13: argument locator (parameter slot count, added to sp) ++ // x11: used as temp to hold mh or receiver ++ // x10, x29: garbage temps, blown away ++ Register argp = x13; // argument list ptr, live on error paths ++ Register mh = x11; // MH receiver; dies quickly and is recycled + -+ Label L_second_loop_unaligned; -+ bind(L_second_loop_unaligned); -+ mv(carry, zr); -+ mv(jdx, ylen); -+ subw(xstart, xstart, 1); -+ bltz(xstart, L_done); -+ sub(sp, sp, 2 * wordSize); -+ sd(z, Address(sp, 0)); -+ sd(zr, Address(sp, wordSize)); -+ shadd(t0, xstart, z, t0, LogBytesPerInt); -+ addi(z, t0, 4); -+ shadd(t0, xstart, x, t0, LogBytesPerInt); -+ lwu(product, Address(t0, 0)); -+ Label L_third_loop, L_third_loop_exit; ++ // here's where control starts out: ++ __ align(CodeEntryAlignment); ++ address entry_point = __ pc(); + -+ blez(jdx, L_third_loop_exit); ++ if (VerifyMethodHandles) { ++ assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); + -+ bind(L_third_loop); -+ subw(jdx, jdx, 1); -+ shadd(t0, jdx, y, t0, LogBytesPerInt); -+ lwu(t0, Address(t0, 0)); -+ mul(t1, t0, product); -+ add(t0, t1, carry); -+ shadd(tmp6, jdx, z, t1, LogBytesPerInt); -+ lwu(t1, Address(tmp6, 0)); -+ add(t0, t0, t1); -+ sw(t0, Address(tmp6, 0)); -+ srli(carry, t0, 32); -+ bgtz(jdx, L_third_loop); ++ Label L; ++ BLOCK_COMMENT("verify_intrinsic_id {"); ++ __ lhu(t0, Address(xmethod, Method::intrinsic_id_offset_in_bytes())); ++ __ mv(t1, (int) iid); ++ __ beq(t0, t1, L); ++ if (iid == vmIntrinsics::_linkToVirtual || ++ iid == vmIntrinsics::_linkToSpecial) { ++ // could do this for all kinds, but would explode assembly code size ++ trace_method_handle(_masm, "bad Method*::intrinsic_id"); ++ } ++ __ ebreak(); ++ __ bind(L); ++ BLOCK_COMMENT("} verify_intrinsic_id"); ++ } + -+ bind(L_third_loop_exit); -+ ld(z, Address(sp, 0)); -+ addi(sp, sp, 2 * wordSize); -+ shadd(t0, xstart, z, t0, LogBytesPerInt); -+ sw(carry, Address(t0, 0)); ++ // First task: Find out how big the argument list is. ++ Address x13_first_arg_addr; ++ int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid); ++ assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic"); ++ if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) { ++ __ ld(argp, Address(xmethod, Method::const_offset())); ++ __ load_sized_value(argp, ++ Address(argp, ConstMethod::size_of_parameters_offset()), ++ sizeof(u2), /*is_signed*/ false); ++ x13_first_arg_addr = __ argument_address(argp, -1); ++ } else { ++ DEBUG_ONLY(argp = noreg); ++ } + -+ j(L_second_loop_unaligned); ++ if (!is_signature_polymorphic_static(iid)) { ++ __ ld(mh, x13_first_arg_addr); ++ DEBUG_ONLY(argp = noreg); + } + -+ bind(L_multiply_64_x_64_loop); -+ multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); ++ // x13_first_arg_addr is live! + -+ Label L_second_loop_aligned; -+ beqz(kdx, L_second_loop_aligned); ++ trace_method_handle_interpreter_entry(_masm, iid); ++ if (iid == vmIntrinsics::_invokeBasic) { ++ generate_method_handle_dispatch(_masm, iid, mh, noreg, not_for_compiler_entry); ++ } else { ++ // Adjust argument list by popping the trailing MemberName argument. ++ Register recv = noreg; ++ if (MethodHandles::ref_kind_has_receiver(ref_kind)) { ++ // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack. ++ __ ld(recv = x12, x13_first_arg_addr); ++ } ++ DEBUG_ONLY(argp = noreg); ++ Register xmember = xmethod; // MemberName ptr; incoming method ptr is dead now ++ __ pop_reg(xmember); // extract last argument ++ generate_method_handle_dispatch(_masm, iid, recv, xmember, not_for_compiler_entry); ++ } + -+ Label L_carry; -+ subw(kdx, kdx, 1); -+ beqz(kdx, L_carry); ++ return entry_point; ++} + -+ shadd(t0, kdx, z, t0, LogBytesPerInt); -+ sw(carry, Address(t0, 0)); -+ srli(carry, carry, 32); -+ subw(kdx, kdx, 1); + -+ bind(L_carry); -+ shadd(t0, kdx, z, t0, LogBytesPerInt); -+ sw(carry, Address(t0, 0)); ++void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, ++ vmIntrinsics::ID iid, ++ Register receiver_reg, ++ Register member_reg, ++ bool for_compiler_entry) { ++ assert_cond(_masm != NULL); ++ assert(is_signature_polymorphic(iid), "expected invoke iid"); ++ // temps used in this code are not used in *either* compiled or interpreted calling sequences ++ Register temp1 = x7; ++ Register temp2 = x28; ++ Register temp3 = x29; // x30 is live by this point: it contains the sender SP ++ if (for_compiler_entry) { ++ assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment"); ++ assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); ++ assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); ++ assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); ++ } + -+ // Second and third (nested) loops. -+ // -+ // for (int i = xstart-1; i >= 0; i--) { // Second loop -+ // carry = 0; -+ // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop -+ // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + -+ // (z[k] & LONG_MASK) + carry; -+ // z[k] = (int)product; -+ // carry = product >>> 32; -+ // } -+ // z[i] = (int)carry; -+ // } -+ // -+ // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi ++ assert_different_registers(temp1, temp2, temp3, receiver_reg); ++ assert_different_registers(temp1, temp2, temp3, member_reg); + -+ bind(L_second_loop_aligned); -+ mv(carry, zr); // carry = 0; -+ mv(jdx, ylen); // j = ystart+1 ++ if (iid == vmIntrinsics::_invokeBasic) { ++ // indirect through MH.form.vmentry.vmtarget ++ jump_to_lambda_form(_masm, receiver_reg, xmethod, temp1, for_compiler_entry); ++ } else { ++ // The method is a member invoker used by direct method handles. ++ if (VerifyMethodHandles) { ++ // make sure the trailing argument really is a MemberName (caller responsibility) ++ verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName), ++ "MemberName required for invokeVirtual etc."); ++ } + -+ subw(xstart, xstart, 1); // i = xstart-1; -+ bltz(xstart, L_done); ++ Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes())); ++ Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes())); ++ Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())); ++ Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())); + -+ sub(sp, sp, 4 * wordSize); -+ sd(z, Address(sp, 0)); ++ Register temp1_recv_klass = temp1; ++ if (iid != vmIntrinsics::_linkToStatic) { ++ __ verify_oop(receiver_reg); ++ if (iid == vmIntrinsics::_linkToSpecial) { ++ // Don't actually load the klass; just null-check the receiver. ++ __ null_check(receiver_reg); ++ } else { ++ // load receiver klass itself ++ __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(temp1_recv_klass, receiver_reg); ++ __ verify_klass_ptr(temp1_recv_klass); ++ } ++ BLOCK_COMMENT("check_receiver {"); ++ // The receiver for the MemberName must be in receiver_reg. ++ // Check the receiver against the MemberName.clazz ++ if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) { ++ // Did not load it above... ++ __ load_klass(temp1_recv_klass, receiver_reg); ++ __ verify_klass_ptr(temp1_recv_klass); ++ } ++ if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) { ++ Label L_ok; ++ Register temp2_defc = temp2; ++ __ load_heap_oop(temp2_defc, member_clazz, temp3); ++ load_klass_from_Class(_masm, temp2_defc); ++ __ verify_klass_ptr(temp2_defc); ++ __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok); ++ // If we get here, the type check failed! ++ __ ebreak(); ++ __ bind(L_ok); ++ } ++ BLOCK_COMMENT("} check_receiver"); ++ } ++ if (iid == vmIntrinsics::_linkToSpecial || ++ iid == vmIntrinsics::_linkToStatic) { ++ DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass ++ } + -+ Label L_last_x; -+ shadd(t0, xstart, z, t0, LogBytesPerInt); -+ addi(z, t0, 4); -+ subw(xstart, xstart, 1); // i = xstart-1; -+ bltz(xstart, L_last_x); ++ // Live registers at this point: ++ // member_reg - MemberName that was the trailing argument ++ // temp1_recv_klass - klass of stacked receiver, if needed ++ // x30 - interpreter linkage (if interpreted) ++ // x11 ... x10 - compiler arguments (if compiled) + -+ shadd(t0, xstart, x, t0, LogBytesPerInt); -+ ld(product_hi, Address(t0, 0)); -+ ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian ++ Label L_incompatible_class_change_error; ++ switch (iid) { ++ case vmIntrinsics::_linkToSpecial: ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3); ++ } ++ __ load_heap_oop(xmethod, member_vmtarget); ++ __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg); ++ break; + -+ Label L_third_loop_prologue; -+ bind(L_third_loop_prologue); ++ case vmIntrinsics::_linkToStatic: ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3); ++ } ++ __ load_heap_oop(xmethod, member_vmtarget); ++ __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg); ++ break; + -+ sd(ylen, Address(sp, wordSize)); -+ sd(x, Address(sp, 2 * wordSize)); -+ sd(xstart, Address(sp, 3 * wordSize)); -+ multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product, -+ tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi); -+ ld(z, Address(sp, 0)); -+ ld(ylen, Address(sp, wordSize)); -+ ld(x, Address(sp, 2 * wordSize)); -+ ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen -+ addi(sp, sp, 4 * wordSize); ++ case vmIntrinsics::_linkToVirtual: ++ { ++ // same as TemplateTable::invokevirtual, ++ // minus the CP setup and profiling: + -+ addiw(tmp3, xlen, 1); -+ shadd(t0, tmp3, z, t0, LogBytesPerInt); -+ sw(carry, Address(t0, 0)); ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3); ++ } + -+ subw(tmp3, tmp3, 1); -+ bltz(tmp3, L_done); ++ // pick out the vtable index from the MemberName, and then we can discard it: ++ Register temp2_index = temp2; ++ __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg); + -+ srli(carry, carry, 32); -+ shadd(t0, tmp3, z, t0, LogBytesPerInt); -+ sw(carry, Address(t0, 0)); -+ j(L_second_loop_aligned); ++ if (VerifyMethodHandles) { ++ Label L_index_ok; ++ __ bgez(temp2_index, L_index_ok); ++ __ ebreak(); ++ __ BIND(L_index_ok); ++ } + -+ // Next infrequent code is moved outside loops. -+ bind(L_last_x); -+ lwu(product_hi, Address(x, 0)); -+ j(L_third_loop_prologue); ++ // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget ++ // at this point. And VerifyMethodHandles has already checked clazz, if needed. + -+ bind(L_done); -+} -+#endif ++ // get target Method* & entry point ++ __ lookup_virtual_method(temp1_recv_klass, temp2_index, xmethod); ++ break; ++ } + -+// Count bits of trailing zero chars from lsb to msb until first non-zero element. -+// For LL case, one byte for one element, so shift 8 bits once, and for other case, -+// shift 16 bits once. -+void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2) -+{ -+ if (UseRVB) { -+ assert_different_registers(Rd, Rs, tmp1); -+ int step = isLL ? 8 : 16; -+ ctz(Rd, Rs); -+ andi(tmp1, Rd, step - 1); -+ sub(Rd, Rd, tmp1); -+ return; -+ } -+ assert_different_registers(Rd, Rs, tmp1, tmp2); -+ Label Loop; -+ int step = isLL ? 8 : 16; -+ li(Rd, -step); -+ mv(tmp2, Rs); ++ case vmIntrinsics::_linkToInterface: ++ { ++ // same as TemplateTable::invokeinterface ++ // (minus the CP setup and profiling, with different argument motion) ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3); ++ } + -+ bind(Loop); -+ addi(Rd, Rd, step); -+ andi(tmp1, tmp2, ((1 << step) - 1)); -+ srli(tmp2, tmp2, step); -+ beqz(tmp1, Loop); -+} ++ Register temp3_intf = temp3; ++ __ load_heap_oop(temp3_intf, member_clazz); ++ load_klass_from_Class(_masm, temp3_intf); ++ __ verify_klass_ptr(temp3_intf); + -+// This instruction reads adjacent 4 bytes from the lower half of source register, -+// inflate into a register, for example: -+// Rs: A7A6A5A4A3A2A1A0 -+// Rd: 00A300A200A100A0 -+void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2) -+{ -+ assert_different_registers(Rd, Rs, tmp1, tmp2); -+ li(tmp1, 0xFF); -+ mv(Rd, zr); -+ for (int i = 0; i <= 3; i++) -+ { -+ andr(tmp2, Rs, tmp1); -+ if (i) { -+ slli(tmp2, tmp2, i * 8); -+ } -+ orr(Rd, Rd, tmp2); -+ if (i != 3) { -+ slli(tmp1, tmp1, 8); ++ Register rindex = xmethod; ++ __ access_load_at(T_ADDRESS, IN_HEAP, rindex, member_vmindex, noreg, noreg); ++ if (VerifyMethodHandles) { ++ Label L; ++ __ bgez(rindex, L); ++ __ ebreak(); ++ __ bind(L); ++ } ++ ++ // given intf, index, and recv klass, dispatch to the implementation method ++ __ lookup_interface_method(temp1_recv_klass, temp3_intf, ++ // note: next two args must be the same: ++ rindex, xmethod, ++ temp2, ++ L_incompatible_class_change_error); ++ break; ++ } ++ ++ default: ++ fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)); ++ break; + } -+ } -+} + -+// This instruction reads adjacent 4 bytes from the upper half of source register, -+// inflate into a register, for example: -+// Rs: A7A6A5A4A3A2A1A0 -+// Rd: 00A700A600A500A4 -+void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2) -+{ -+ assert_different_registers(Rd, Rs, tmp1, tmp2); -+ li(tmp1, 0xFF00000000); -+ mv(Rd, zr); -+ for (int i = 0; i <= 3; i++) -+ { -+ andr(tmp2, Rs, tmp1); -+ orr(Rd, Rd, tmp2); -+ srli(Rd, Rd, 8); -+ if (i != 3) { -+ slli(tmp1, tmp1, 8); ++ // live at this point: xmethod, x30 (if interpreted) ++ ++ // After figuring out which concrete method to call, jump into it. ++ // Note that this works in the interpreter with no data motion. ++ // But the compiled version will require that r2_recv be shifted out. ++ __ verify_method_ptr(xmethod); ++ jump_from_method_handle(_masm, xmethod, temp1, for_compiler_entry); ++ if (iid == vmIntrinsics::_linkToInterface) { ++ __ bind(L_incompatible_class_change_error); ++ __ far_jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry())); + } + } ++ +} + -+// The size of the blocks erased by the zero_blocks stub. We must -+// handle anything smaller than this ourselves in zero_words(). -+const int MacroAssembler::zero_words_block_size = 8; ++#ifndef PRODUCT ++void trace_method_handle_stub(const char* adaptername, ++ oopDesc* mh, ++ intptr_t* saved_regs, ++ intptr_t* entry_sp) { } + -+// zero_words() is used by C2 ClearArray patterns. It is as small as -+// possible, handling small word counts locally and delegating -+// anything larger to the zero_blocks stub. It is expanded many times -+// in compiled code, so it is important to keep it short. ++// The stub wraps the arguments in a struct on the stack to avoid ++// dealing with the different calling conventions for passing 6 ++// arguments. ++struct MethodHandleStubArguments { ++ const char* adaptername; ++ oopDesc* mh; ++ intptr_t* saved_regs; ++ intptr_t* entry_sp; ++}; ++void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) { } + -+// ptr: Address of a buffer to be zeroed. -+// cnt: Count in HeapWords. -+// -+// ptr, cnt, and t0 are clobbered. -+address MacroAssembler::zero_words(Register ptr, Register cnt) -+{ -+ assert(is_power_of_2(zero_words_block_size), "adjust this"); -+ assert(ptr == x28 && cnt == x29, "mismatch in register usage"); -+ assert_different_registers(cnt, t0); -+ -+ BLOCK_COMMENT("zero_words {"); -+ mv(t0, zero_words_block_size); -+ Label around, done, done16; -+ bltu(cnt, t0, around); -+ { -+ RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks()); -+ assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated"); -+ if (StubRoutines::riscv::complete()) { -+ address tpc = trampoline_call(zero_blocks); -+ if (tpc == NULL) { -+ DEBUG_ONLY(reset_labels(around)); -+ postcond(pc() == badAddress); -+ return NULL; -+ } -+ } else { -+ jal(zero_blocks); -+ } -+ } -+ bind(around); -+ for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) { -+ Label l; -+ andi(t0, cnt, i); -+ beqz(t0, l); -+ for (int j = 0; j < i; j++) { -+ sd(zr, Address(ptr, 0)); -+ addi(ptr, ptr, 8); -+ } -+ bind(l); -+ } -+ { -+ Label l; -+ andi(t0, cnt, 1); -+ beqz(t0, l); -+ sd(zr, Address(ptr, 0)); -+ bind(l); -+ } -+ BLOCK_COMMENT("} zero_words"); -+ postcond(pc() != badAddress); -+ return pc(); -+} -+ -+#define SmallArraySize (18 * BytesPerLong) -+ -+// base: Address of a buffer to be zeroed, 8 bytes aligned. -+// cnt: Immediate count in HeapWords. -+void MacroAssembler::zero_words(Register base, u_int64_t cnt) -+{ -+ assert_different_registers(base, t0, t1); -+ -+ BLOCK_COMMENT("zero_words {"); -+ -+ if (cnt <= SmallArraySize / BytesPerLong) { -+ for (int i = 0; i < (int)cnt; i++) { -+ sd(zr, Address(base, i * wordSize)); -+ } -+ } else { -+ const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll -+ int remainder = cnt % unroll; -+ for (int i = 0; i < remainder; i++) { -+ sd(zr, Address(base, i * wordSize)); -+ } -+ -+ Label loop; -+ Register cnt_reg = t0; -+ Register loop_base = t1; -+ cnt = cnt - remainder; -+ li(cnt_reg, cnt); -+ add(loop_base, base, remainder * wordSize); -+ bind(loop); -+ sub(cnt_reg, cnt_reg, unroll); -+ for (int i = 0; i < unroll; i++) { -+ sd(zr, Address(loop_base, i * wordSize)); -+ } -+ add(loop_base, loop_base, unroll * wordSize); -+ bnez(cnt_reg, loop); -+ } -+ -+ BLOCK_COMMENT("} zero_words"); -+} -+ -+// base: Address of a buffer to be filled, 8 bytes aligned. -+// cnt: Count in 8-byte unit. -+// value: Value to be filled with. -+// base will point to the end of the buffer after filling. -+void MacroAssembler::fill_words(Register base, Register cnt, Register value) -+{ -+// Algorithm: -+// -+// t0 = cnt & 7 -+// cnt -= t0 -+// p += t0 -+// switch (t0): -+// switch start: -+// do while cnt -+// cnt -= 8 -+// p[-8] = value -+// case 7: -+// p[-7] = value -+// case 6: -+// p[-6] = value -+// // ... -+// case 1: -+// p[-1] = value -+// case 0: -+// p += 8 -+// do-while end -+// switch end -+ -+ assert_different_registers(base, cnt, value, t0, t1); -+ -+ Label fini, skip, entry, loop; -+ const int unroll = 8; // Number of sd instructions we'll unroll -+ -+ beqz(cnt, fini); -+ -+ andi(t0, cnt, unroll - 1); -+ sub(cnt, cnt, t0); -+ // align 8, so first sd n % 8 = mod, next loop sd 8 * n. -+ shadd(base, t0, base, t1, 3); -+ la(t1, entry); -+ slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst) -+ sub(t1, t1, t0); -+ jr(t1); -+ -+ bind(loop); -+ add(base, base, unroll * 8); -+ for (int i = -unroll; i < 0; i++) { -+ sd(value, Address(base, i * 8)); -+ } -+ bind(entry); -+ sub(cnt, cnt, unroll); -+ bgez(cnt, loop); -+ -+ bind(fini); -+} -+ -+#define FCVT_SAFE(FLOATCVT, FLOATEQ) \ -+void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \ -+ Label L_Okay; \ -+ fscsr(zr); \ -+ FLOATCVT(dst, src); \ -+ frcsr(tmp); \ -+ andi(tmp, tmp, 0x1E); \ -+ beqz(tmp, L_Okay); \ -+ FLOATEQ(tmp, src, src); \ -+ bnez(tmp, L_Okay); \ -+ mv(dst, zr); \ -+ bind(L_Okay); \ -+} -+ -+FCVT_SAFE(fcvt_w_s, feq_s) -+FCVT_SAFE(fcvt_l_s, feq_s) -+FCVT_SAFE(fcvt_w_d, feq_d) -+FCVT_SAFE(fcvt_l_d, feq_d) -+ -+#undef FCVT_SAFE -+ -+#define FCMP(FLOATTYPE, FLOATSIG) \ -+void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1, \ -+ FloatRegister Rs2, int unordered_result) { \ -+ Label Ldone; \ -+ if (unordered_result < 0) { \ -+ /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */ \ -+ /* installs 1 if gt else 0 */ \ -+ flt_##FLOATSIG(result, Rs2, Rs1); \ -+ /* Rs1 > Rs2, install 1 */ \ -+ bgtz(result, Ldone); \ -+ feq_##FLOATSIG(result, Rs1, Rs2); \ -+ addi(result, result, -1); \ -+ /* Rs1 = Rs2, install 0 */ \ -+ /* NaN or Rs1 < Rs2, install -1 */ \ -+ bind(Ldone); \ -+ } else { \ -+ /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */ \ -+ /* installs 1 if gt or unordered else 0 */ \ -+ flt_##FLOATSIG(result, Rs1, Rs2); \ -+ /* Rs1 < Rs2, install -1 */ \ -+ bgtz(result, Ldone); \ -+ feq_##FLOATSIG(result, Rs1, Rs2); \ -+ addi(result, result, -1); \ -+ /* Rs1 = Rs2, install 0 */ \ -+ /* NaN or Rs1 > Rs2, install 1 */ \ -+ bind(Ldone); \ -+ neg(result, result); \ -+ } \ -+} -+ -+FCMP(float, s); -+FCMP(double, d); -+ -+#undef FCMP -+ -+// Zero words; len is in bytes -+// Destroys all registers except addr -+// len must be a nonzero multiple of wordSize -+void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) { -+ assert_different_registers(addr, len, tmp, t0, t1); -+ -+#ifdef ASSERT -+ { -+ Label L; -+ andi(t0, len, BytesPerWord - 1); -+ beqz(t0, L); -+ stop("len is not a multiple of BytesPerWord"); -+ bind(L); -+ } -+#endif // ASSERT -+ -+#ifndef PRODUCT -+ block_comment("zero memory"); -+#endif // PRODUCT -+ -+ Label loop; -+ Label entry; -+ -+ // Algorithm: -+ // -+ // t0 = cnt & 7 -+ // cnt -= t0 -+ // p += t0 -+ // switch (t0) { -+ // do { -+ // cnt -= 8 -+ // p[-8] = 0 -+ // case 7: -+ // p[-7] = 0 -+ // case 6: -+ // p[-6] = 0 -+ // ... -+ // case 1: -+ // p[-1] = 0 -+ // case 0: -+ // p += 8 -+ // } while (cnt) -+ // } -+ -+ const int unroll = 8; // Number of sd(zr) instructions we'll unroll -+ -+ srli(len, len, LogBytesPerWord); -+ andi(t0, len, unroll - 1); // t0 = cnt % unroll -+ sub(len, len, t0); // cnt -= unroll -+ // tmp always points to the end of the region we're about to zero -+ shadd(tmp, t0, addr, t1, LogBytesPerWord); -+ la(t1, entry); -+ slli(t0, t0, 2); -+ sub(t1, t1, t0); -+ jr(t1); -+ bind(loop); -+ sub(len, len, unroll); -+ for (int i = -unroll; i < 0; i++) { -+ Assembler::sd(zr, Address(tmp, i * wordSize)); -+ } -+ bind(entry); -+ add(tmp, tmp, unroll * wordSize); -+ bnez(len, loop); -+} -+ -+// shift left by shamt and add -+// Rd = (Rs1 << shamt) + Rs2 -+void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) { -+ if (UseRVB) { -+ if (shamt == 1) { -+ sh1add(Rd, Rs1, Rs2); -+ return; -+ } else if (shamt == 2) { -+ sh2add(Rd, Rs1, Rs2); -+ return; -+ } else if (shamt == 3) { -+ sh3add(Rd, Rs1, Rs2); -+ return; -+ } -+ } ++void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { } ++#endif //PRODUCT +diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp +new file mode 100644 +index 0000000000..65493eba76 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp +@@ -0,0 +1,57 @@ ++/* ++ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ if (shamt != 0) { -+ slli(tmp, Rs1, shamt); -+ add(Rd, Rs2, tmp); -+ } else { -+ add(Rd, Rs1, Rs2); -+ } -+} ++// Platform-specific definitions for method handles. ++// These definitions are inlined into class MethodHandles. + -+void MacroAssembler::zero_extend(Register dst, Register src, int bits) { -+ if (UseRVB) { -+ if (bits == 16) { -+ zext_h(dst, src); -+ return; -+ } else if (bits == 32) { -+ zext_w(dst, src); -+ return; -+ } -+ } ++// Adapters ++enum /* platform_dependent_constants */ { ++ adapter_code_size = 32000 DEBUG_ONLY(+ 120000) ++}; + -+ if (bits == 8) { -+ zext_b(dst, src); -+ } else { -+ slli(dst, src, XLEN - bits); -+ srli(dst, dst, XLEN - bits); -+ } -+} ++public: + -+void MacroAssembler::sign_extend(Register dst, Register src, int bits) { -+ if (UseRVB) { -+ if (bits == 8) { -+ sext_b(dst, src); -+ return; -+ } else if (bits == 16) { -+ sext_h(dst, src); -+ return; -+ } -+ } ++ static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg); + -+ if (bits == 32) { -+ sext_w(dst, src); -+ } else { -+ slli(dst, src, XLEN - bits); -+ srai(dst, dst, XLEN - bits); -+ } -+} ++ static void verify_klass(MacroAssembler* _masm, ++ Register obj, SystemDictionary::WKID klass_id, ++ const char* error_message = "wrong klass") NOT_DEBUG_RETURN; + -+void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp) -+{ -+ if (src1 == src2) { -+ mv(dst, zr); -+ return; -+ } -+ Label done; -+ Register left = src1; -+ Register right = src2; -+ if (dst == src1) { -+ assert_different_registers(dst, src2, tmp); -+ mv(tmp, src1); -+ left = tmp; -+ } else if (dst == src2) { -+ assert_different_registers(dst, src1, tmp); -+ mv(tmp, src2); -+ right = tmp; ++ static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) { ++ verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle), ++ "reference is a MH"); + } + -+ // installs 1 if gt else 0 -+ slt(dst, right, left); -+ bnez(dst, done); -+ slt(dst, left, right); -+ // dst = -1 if lt; else if eq , dst = 0 -+ neg(dst, dst); -+ bind(done); -+} ++ static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN; + -+void MacroAssembler::safepoint_ifence() { -+ ifence(); -+#ifndef PRODUCT -+ if (VerifyCrossModifyFence) { -+ // Clear the thread state. -+ sb(zr, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset()))); -+ } -+#endif -+} ++ // Similar to InterpreterMacroAssembler::jump_from_interpreted. ++ // Takes care of special dispatch from single stepping too. ++ static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, ++ bool for_compiler_entry); + -+#ifndef PRODUCT -+void MacroAssembler::verify_cross_modify_fence_not_required() { -+ if (VerifyCrossModifyFence) { -+ // Check if thread needs a cross modify fence. -+ lbu(t0, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset()))); -+ Label fence_not_required; -+ beqz(t0, fence_not_required); -+ // If it does then fail. -+ la(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::verify_cross_modify_fence_failure))); -+ mv(c_rarg0, xthread); -+ jalr(t0); -+ bind(fence_not_required); -+ } -+} -+#endif -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++ static void jump_to_lambda_form(MacroAssembler* _masm, ++ Register recv, Register method_temp, ++ Register temp2, ++ bool for_compiler_entry); +diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp new file mode 100644 -index 00000000000..23e09475be1 +index 0000000000..27011ad128 --- /dev/null -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -@@ -0,0 +1,858 @@ ++++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp +@@ -0,0 +1,417 @@ +/* + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. @@ -26571,1058 +25813,1014 @@ index 00000000000..23e09475be1 + * + */ + -+#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP -+#define CPU_RISCV_MACROASSEMBLER_RISCV_HPP ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/compiledIC.hpp" ++#include "memory/resourceArea.hpp" ++#include "nativeInst_riscv.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/handles.hpp" ++#include "runtime/orderAccess.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/ostream.hpp" ++#ifdef COMPILER1 ++#include "c1/c1_Runtime1.hpp" ++#endif + -+#include "asm/assembler.hpp" -+#include "metaprogramming/enableIf.hpp" -+#include "oops/compressedOops.hpp" -+#include "utilities/powerOfTwo.hpp" ++Register NativeInstruction::extract_rs1(address instr) { ++ assert_cond(instr != NULL); ++ return as_Register(Assembler::extract(((unsigned*)instr)[0], 19, 15)); ++} + -+// MacroAssembler extends Assembler by frequently used macros. -+// -+// Instructions for which a 'better' code sequence exists depending -+// on arguments should also go in here. ++Register NativeInstruction::extract_rs2(address instr) { ++ assert_cond(instr != NULL); ++ return as_Register(Assembler::extract(((unsigned*)instr)[0], 24, 20)); ++} + -+class MacroAssembler: public Assembler { ++Register NativeInstruction::extract_rd(address instr) { ++ assert_cond(instr != NULL); ++ return as_Register(Assembler::extract(((unsigned*)instr)[0], 11, 7)); ++} + -+ public: -+ MacroAssembler(CodeBuffer* code) : Assembler(code) { -+ } -+ virtual ~MacroAssembler() {} ++uint32_t NativeInstruction::extract_opcode(address instr) { ++ assert_cond(instr != NULL); ++ return Assembler::extract(((unsigned*)instr)[0], 6, 0); ++} + -+ void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod); ++uint32_t NativeInstruction::extract_funct3(address instr) { ++ assert_cond(instr != NULL); ++ return Assembler::extract(((unsigned*)instr)[0], 14, 12); ++} + -+ // Place a fence.i after code may have been modified due to a safepoint. -+ void safepoint_ifence(); ++bool NativeInstruction::is_pc_relative_at(address instr) { ++ // auipc + jalr ++ // auipc + addi ++ // auipc + load ++ // auipc + fload_load ++ return (is_auipc_at(instr)) && ++ (is_addi_at(instr + instruction_size) || ++ is_jalr_at(instr + instruction_size) || ++ is_load_at(instr + instruction_size) || ++ is_float_load_at(instr + instruction_size)) && ++ check_pc_relative_data_dependency(instr); ++} + -+ // Alignment -+ void align(int modulus, int extra_offset = 0); ++// ie:ld(Rd, Label) ++bool NativeInstruction::is_load_pc_relative_at(address instr) { ++ return is_auipc_at(instr) && // auipc ++ is_ld_at(instr + instruction_size) && // ld ++ check_load_pc_relative_data_dependency(instr); ++} + -+ // Stack frame creation/removal -+ // Note that SP must be updated to the right place before saving/restoring RA and FP -+ // because signal based thread suspend/resume could happen asynchronously. -+ void enter() { -+ addi(sp, sp, - 2 * wordSize); -+ sd(ra, Address(sp, wordSize)); -+ sd(fp, Address(sp)); -+ addi(fp, sp, 2 * wordSize); -+ } ++bool NativeInstruction::is_movptr_at(address instr) { ++ return is_lui_at(instr) && // Lui ++ is_addi_at(instr + instruction_size) && // Addi ++ is_slli_shift_at(instr + instruction_size * 2, 11) && // Slli Rd, Rs, 11 ++ is_addi_at(instr + instruction_size * 3) && // Addi ++ is_slli_shift_at(instr + instruction_size * 4, 6) && // Slli Rd, Rs, 6 ++ (is_addi_at(instr + instruction_size * 5) || ++ is_jalr_at(instr + instruction_size * 5) || ++ is_load_at(instr + instruction_size * 5)) && // Addi/Jalr/Load ++ check_movptr_data_dependency(instr); ++} + -+ void leave() { -+ addi(sp, fp, - 2 * wordSize); -+ ld(fp, Address(sp)); -+ ld(ra, Address(sp, wordSize)); -+ addi(sp, sp, 2 * wordSize); -+ } ++bool NativeInstruction::is_li32_at(address instr) { ++ return is_lui_at(instr) && // lui ++ is_addiw_at(instr + instruction_size) && // addiw ++ check_li32_data_dependency(instr); ++} + ++bool NativeInstruction::is_li64_at(address instr) { ++ return is_lui_at(instr) && // lui ++ is_addi_at(instr + instruction_size) && // addi ++ is_slli_shift_at(instr + instruction_size * 2, 12) && // Slli Rd, Rs, 12 ++ is_addi_at(instr + instruction_size * 3) && // addi ++ is_slli_shift_at(instr + instruction_size * 4, 12) && // Slli Rd, Rs, 12 ++ is_addi_at(instr + instruction_size * 5) && // addi ++ is_slli_shift_at(instr + instruction_size * 6, 8) && // Slli Rd, Rs, 8 ++ is_addi_at(instr + instruction_size * 7) && // addi ++ check_li64_data_dependency(instr); ++} + -+ // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) -+ // The pointer will be loaded into the thread register. -+ void get_thread(Register thread); ++void NativeCall::verify() { ++ assert(NativeCall::is_call_at((address)this), "unexpected code at call site"); ++} + -+ // Support for VM calls -+ // -+ // It is imperative that all calls into the VM are handled via the call_VM macros. -+ // They make sure that the stack linkage is setup correctly. call_VM's correspond -+ // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. ++address NativeCall::destination() const { ++ address addr = (address)this; ++ assert(NativeInstruction::is_jal_at(instruction_address()), "inst must be jal."); ++ address destination = MacroAssembler::target_addr_for_insn(instruction_address()); + -+ void call_VM(Register oop_result, -+ address entry_point, -+ bool check_exceptions = true); -+ void call_VM(Register oop_result, -+ address entry_point, -+ Register arg_1, -+ bool check_exceptions = true); -+ void call_VM(Register oop_result, -+ address entry_point, -+ Register arg_1, Register arg_2, -+ bool check_exceptions = true); -+ void call_VM(Register oop_result, -+ address entry_point, -+ Register arg_1, Register arg_2, Register arg_3, -+ bool check_exceptions = true); ++ // Do we use a trampoline stub for this call? ++ CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // Else we get assertion if nmethod is zombie. ++ assert(cb && cb->is_nmethod(), "sanity"); ++ nmethod *nm = (nmethod *)cb; ++ if (nm != NULL && nm->stub_contains(destination) && is_NativeCallTrampolineStub_at(destination)) { ++ // Yes we do, so get the destination from the trampoline stub. ++ const address trampoline_stub_addr = destination; ++ destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination(); ++ } + -+ // Overloadings with last_Java_sp -+ void call_VM(Register oop_result, -+ Register last_java_sp, -+ address entry_point, -+ int number_of_arguments = 0, -+ bool check_exceptions = true); -+ void call_VM(Register oop_result, -+ Register last_java_sp, -+ address entry_point, -+ Register arg_1, -+ bool check_exceptions = true); -+ void call_VM(Register oop_result, -+ Register last_java_sp, -+ address entry_point, -+ Register arg_1, Register arg_2, -+ bool check_exceptions = true); -+ void call_VM(Register oop_result, -+ Register last_java_sp, -+ address entry_point, -+ Register arg_1, Register arg_2, Register arg_3, -+ bool check_exceptions = true); ++ return destination; ++} + -+ void get_vm_result(Register oop_result, Register java_thread); -+ void get_vm_result_2(Register metadata_result, Register java_thread); ++// Similar to replace_mt_safe, but just changes the destination. The ++// important thing is that free-running threads are able to execute this ++// call instruction at all times. ++// ++// Used in the runtime linkage of calls; see class CompiledIC. ++// ++// Add parameter assert_lock to switch off assertion ++// during code generation, where no patching lock is needed. ++void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) { ++ assert(!assert_lock || ++ (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()), ++ "concurrent code patching"); + -+ // These always tightly bind to MacroAssembler::call_VM_leaf_base -+ // bypassing the virtual implementation -+ void call_VM_leaf(address entry_point, -+ int number_of_arguments = 0); -+ void call_VM_leaf(address entry_point, -+ Register arg_0); -+ void call_VM_leaf(address entry_point, -+ Register arg_0, Register arg_1); -+ void call_VM_leaf(address entry_point, -+ Register arg_0, Register arg_1, Register arg_2); ++ ResourceMark rm; ++ address addr_call = addr_at(0); ++ assert(NativeCall::is_call_at(addr_call), "unexpected code at call site"); + -+ // These always tightly bind to MacroAssembler::call_VM_base -+ // bypassing the virtual implementation -+ void super_call_VM_leaf(address entry_point, Register arg_0); -+ void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1); -+ void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2); -+ void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3); ++ // Patch the constant in the call's trampoline stub. ++ address trampoline_stub_addr = get_trampoline(); ++ if (trampoline_stub_addr != NULL) { ++ assert (!is_NativeCallTrampolineStub_at(dest), "chained trampolines"); ++ nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest); ++ } + -+ // last Java Frame (fills frame anchor) -+ void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp); -+ void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp); -+ void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register tmp); ++ // Patch the call. ++ if (Assembler::reachable_from_branch_at(addr_call, dest)) { ++ set_destination(dest); ++ } else { ++ assert (trampoline_stub_addr != NULL, "we need a trampoline"); ++ set_destination(trampoline_stub_addr); ++ } + -+ // thread in the default location (xthread) -+ void reset_last_Java_frame(bool clear_fp); ++ ICache::invalidate_range(addr_call, instruction_size); ++} + -+ void call_native(address entry_point, -+ Register arg_0); -+ void call_native_base( -+ address entry_point, // the entry point -+ Label* retaddr = NULL -+ ); ++address NativeCall::get_trampoline() { ++ address call_addr = addr_at(0); + -+ virtual void call_VM_leaf_base( -+ address entry_point, // the entry point -+ int number_of_arguments, // the number of arguments to pop after the call -+ Label* retaddr = NULL -+ ); ++ CodeBlob *code = CodeCache::find_blob(call_addr); ++ assert(code != NULL, "Could not find the containing code blob"); + -+ virtual void call_VM_leaf_base( -+ address entry_point, // the entry point -+ int number_of_arguments, // the number of arguments to pop after the call -+ Label& retaddr) { -+ call_VM_leaf_base(entry_point, number_of_arguments, &retaddr); ++ address jal_destination = MacroAssembler::pd_call_destination(call_addr); ++ if (code != NULL && code->contains(jal_destination) && is_NativeCallTrampolineStub_at(jal_destination)) { ++ return jal_destination; + } + -+ virtual void call_VM_base( // returns the register containing the thread upon return -+ Register oop_result, // where an oop-result ends up if any; use noreg otherwise -+ Register java_thread, // the thread if computed before ; use noreg otherwise -+ Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise -+ address entry_point, // the entry point -+ int number_of_arguments, // the number of arguments (w/o thread) to pop after the call -+ bool check_exceptions // whether to check for pending exceptions after return -+ ); ++ if (code != NULL && code->is_nmethod()) { ++ return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code); ++ } + -+ void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions); ++ return NULL; ++} + -+ virtual void check_and_handle_earlyret(Register java_thread); -+ virtual void check_and_handle_popframe(Register java_thread); ++// Inserts a native call instruction at a given pc ++void NativeCall::insert(address code_pos, address entry) { Unimplemented(); } + -+ void resolve_weak_handle(Register result, Register tmp); -+ void resolve_oop_handle(Register result, Register tmp = x15); -+ void resolve_jobject(Register value, Register thread, Register tmp); ++//------------------------------------------------------------------- + -+ void movoop(Register dst, jobject obj, bool immediate = false); -+ void mov_metadata(Register dst, Metadata* obj); -+ void bang_stack_size(Register size, Register tmp); -+ void set_narrow_oop(Register dst, jobject obj); -+ void set_narrow_klass(Register dst, Klass* k); ++void NativeMovConstReg::verify() { ++ if (!(nativeInstruction_at(instruction_address())->is_movptr() || ++ is_auipc_at(instruction_address()))) { ++ fatal("should be MOVPTR or AUIPC"); ++ } ++} + -+ void load_mirror(Register dst, Register method, Register tmp = x15); -+ void access_load_at(BasicType type, DecoratorSet decorators, Register dst, -+ Address src, Register tmp1, Register thread_tmp); -+ void access_store_at(BasicType type, DecoratorSet decorators, Address dst, -+ Register src, Register tmp1, Register thread_tmp); -+ void load_klass(Register dst, Register src); -+ void store_klass(Register dst, Register src); -+ void cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L); ++intptr_t NativeMovConstReg::data() const { ++ address addr = MacroAssembler::target_addr_for_insn(instruction_address()); ++ if (maybe_cpool_ref(instruction_address())) { ++ return *(intptr_t*)addr; ++ } else { ++ return (intptr_t)addr; ++ } ++} + -+ void encode_klass_not_null(Register r); -+ void decode_klass_not_null(Register r); -+ void encode_klass_not_null(Register dst, Register src, Register tmp = xheapbase); -+ void decode_klass_not_null(Register dst, Register src, Register tmp = xheapbase); -+ void decode_heap_oop_not_null(Register r); -+ void decode_heap_oop_not_null(Register dst, Register src); -+ void decode_heap_oop(Register d, Register s); -+ void decode_heap_oop(Register r) { decode_heap_oop(r, r); } -+ void encode_heap_oop(Register d, Register s); -+ void encode_heap_oop(Register r) { encode_heap_oop(r, r); }; -+ void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, -+ Register thread_tmp = noreg, DecoratorSet decorators = 0); -+ void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, -+ Register thread_tmp = noreg, DecoratorSet decorators = 0); -+ void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, -+ Register thread_tmp = noreg, DecoratorSet decorators = 0); ++void NativeMovConstReg::set_data(intptr_t x) { ++ if (maybe_cpool_ref(instruction_address())) { ++ address addr = MacroAssembler::target_addr_for_insn(instruction_address()); ++ *(intptr_t*)addr = x; ++ } else { ++ // Store x into the instruction stream. ++ MacroAssembler::pd_patch_instruction_size(instruction_address(), (address)x); ++ ICache::invalidate_range(instruction_address(), movptr_instruction_size); ++ } + -+ void store_klass_gap(Register dst, Register src); ++ // Find and replace the oop/metadata corresponding to this ++ // instruction in oops section. ++ CodeBlob* cb = CodeCache::find_blob(instruction_address()); ++ nmethod* nm = cb->as_nmethod_or_null(); ++ if (nm != NULL) { ++ RelocIterator iter(nm, instruction_address(), next_instruction_address()); ++ while (iter.next()) { ++ if (iter.type() == relocInfo::oop_type) { ++ oop* oop_addr = iter.oop_reloc()->oop_addr(); ++ *oop_addr = cast_to_oop(x); ++ break; ++ } else if (iter.type() == relocInfo::metadata_type) { ++ Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr(); ++ *metadata_addr = (Metadata*)x; ++ break; ++ } ++ } ++ } ++} + -+ // currently unimplemented -+ // Used for storing NULL. All other oop constants should be -+ // stored using routines that take a jobject. -+ void store_heap_oop_null(Address dst); ++void NativeMovConstReg::print() { ++ tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT, ++ p2i(instruction_address()), data()); ++} + -+ // This dummy is to prevent a call to store_heap_oop from -+ // converting a zero (linke NULL) into a Register by giving -+ // the compiler two choices it can't resolve ++//------------------------------------------------------------------- + -+ void store_heap_oop(Address dst, void* dummy); ++int NativeMovRegMem::offset() const { ++ Unimplemented(); ++ return 0; ++} + -+ // Support for NULL-checks -+ // -+ // Generates code that causes a NULL OS exception if the content of reg is NULL. -+ // If the accessed location is M[reg + offset] and the offset is known, provide the -+ // offset. No explicit code generateion is needed if the offset is within a certain -+ // range (0 <= offset <= page_size). ++void NativeMovRegMem::set_offset(int x) { Unimplemented(); } + -+ virtual void null_check(Register reg, int offset = -1); -+ static bool needs_explicit_null_check(intptr_t offset); -+ static bool uses_implicit_null_check(void* address); ++void NativeMovRegMem::verify() { ++ Unimplemented(); ++} + -+ // idiv variant which deals with MINLONG as dividend and -1 as divisor -+ int corrected_idivl(Register result, Register rs1, Register rs2, -+ bool want_remainder); -+ int corrected_idivq(Register result, Register rs1, Register rs2, -+ bool want_remainder); ++//-------------------------------------------------------------------------------- + -+ // interface method calling -+ void lookup_interface_method(Register recv_klass, -+ Register intf_klass, -+ RegisterOrConstant itable_index, -+ Register method_result, -+ Register scan_tmp, -+ Label& no_such_interface, -+ bool return_method = true); ++void NativeJump::verify() { } + -+ // virtual method calling -+ // n.n. x86 allows RegisterOrConstant for vtable_index -+ void lookup_virtual_method(Register recv_klass, -+ RegisterOrConstant vtable_index, -+ Register method_result); + -+ // Form an addres from base + offset in Rd. Rd my or may not -+ // actually be used: you must use the Address that is returned. It -+ // is up to you to ensure that the shift provided mathces the size -+ // of your data. -+ Address form_address(Register Rd, Register base, long byte_offset); ++void NativeJump::check_verified_entry_alignment(address entry, address verified_entry) { ++} + -+ // allocation -+ void tlab_allocate( -+ Register obj, // result: pointer to object after successful allocation -+ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise -+ int con_size_in_bytes, // object size in bytes if known at compile time -+ Register tmp1, // temp register -+ Register tmp2, // temp register -+ Label& slow_case, // continuation point of fast allocation fails -+ bool is_far = false -+ ); + -+ void eden_allocate( -+ Register obj, // result: pointer to object after successful allocation -+ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise -+ int con_size_in_bytes, // object size in bytes if known at compile time -+ Register tmp, // temp register -+ Label& slow_case, // continuation point if fast allocation fails -+ bool is_far = false -+ ); ++address NativeJump::jump_destination() const { ++ address dest = MacroAssembler::target_addr_for_insn(instruction_address()); + -+ // Test sub_klass against super_klass, with fast and slow paths. ++ // We use jump to self as the unresolved address which the inline ++ // cache code (and relocs) know about + -+ // The fast path produces a tri-state answer: yes / no / maybe-slow. -+ // One of the three labels can be NULL, meaning take the fall-through. -+ // If super_check_offset is -1, the value is loaded up from super_klass. -+ // No registers are killed, except tmp_reg -+ void check_klass_subtype_fast_path(Register sub_klass, -+ Register super_klass, -+ Register tmp_reg, -+ Label* L_success, -+ Label* L_failure, -+ Label* L_slow_path, -+ Register super_check_offset = noreg); ++ // return -1 if jump to self ++ dest = (dest == (address) this) ? (address) -1 : dest; ++ return dest; ++}; + -+ // The reset of the type cehck; must be wired to a corresponding fast path. -+ // It does not repeat the fast path logic, so don't use it standalone. -+ // The tmp1_reg and tmp2_reg can be noreg, if no temps are avaliable. -+ // Updates the sub's secondary super cache as necessary. -+ void check_klass_subtype_slow_path(Register sub_klass, -+ Register super_klass, -+ Register tmp1_reg, -+ Register tmp2_reg, -+ Label* L_success, -+ Label* L_failure); ++void NativeJump::set_jump_destination(address dest) { ++ // We use jump to self as the unresolved address which the inline ++ // cache code (and relocs) know about ++ if (dest == (address) -1) ++ dest = instruction_address(); + -+ void check_klass_subtype(Register sub_klass, -+ Register super_klass, -+ Register tmp_reg, -+ Label& L_success); ++ MacroAssembler::pd_patch_instruction(instruction_address(), dest); ++ ICache::invalidate_range(instruction_address(), instruction_size); ++} + -+ Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); ++//------------------------------------------------------------------- + -+ // only if +VerifyOops -+ void verify_oop(Register reg, const char* s = "broken oop"); -+ void verify_oop_addr(Address addr, const char* s = "broken oop addr"); ++address NativeGeneralJump::jump_destination() const { ++ NativeMovConstReg* move = nativeMovConstReg_at(instruction_address()); ++ address dest = (address) move->data(); + -+ void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {} -+ void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {} ++ // We use jump to self as the unresolved address which the inline ++ // cache code (and relocs) know about + -+#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) -+#define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) ++ // return -1 if jump to self ++ dest = (dest == (address) this) ? (address) -1 : dest; ++ return dest; ++} + -+ // A more convenient access to fence for our purposes -+ // We used four bit to indicate the read and write bits in the predecessors and successors, -+ // and extended i for r, o for w if UseConservativeFence enabled. -+ enum Membar_mask_bits { -+ StoreStore = 0b0101, // (pred = ow + succ = ow) -+ LoadStore = 0b1001, // (pred = ir + succ = ow) -+ StoreLoad = 0b0110, // (pred = ow + succ = ir) -+ LoadLoad = 0b1010, // (pred = ir + succ = ir) -+ AnyAny = LoadStore | StoreLoad // (pred = iorw + succ = iorw) -+ }; ++//------------------------------------------------------------------- + -+ void membar(uint32_t order_constraint); ++bool NativeInstruction::is_safepoint_poll() { ++ return is_lwu_to_zr(address(this)); ++} + -+ static void membar_mask_to_pred_succ(uint32_t order_constraint, uint32_t& predecessor, uint32_t& successor) { -+ predecessor = (order_constraint >> 2) & 0x3; -+ successor = order_constraint & 0x3; ++bool NativeInstruction::is_lwu_to_zr(address instr) { ++ assert_cond(instr != NULL); ++ return (extract_opcode(instr) == 0b0000011 && ++ extract_funct3(instr) == 0b110 && ++ extract_rd(instr) == zr); // zr ++} + -+ // extend rw -> iorw: -+ // 01(w) -> 0101(ow) -+ // 10(r) -> 1010(ir) -+ // 11(rw)-> 1111(iorw) -+ if (UseConservativeFence) { -+ predecessor |= predecessor << 2; -+ successor |= successor << 2; -+ } -+ } ++// A 16-bit instruction with all bits ones is permanently reserved as an illegal instruction. ++bool NativeInstruction::is_sigill_zombie_not_entrant() { ++ // jvmci ++ return uint_at(0) == 0xffffffff; ++} + -+ static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) { -+ return ((predecessor & 0x3) << 2) | (successor & 0x3); -+ } ++void NativeIllegalInstruction::insert(address code_pos) { ++ assert_cond(code_pos != NULL); ++ *(juint*)code_pos = 0xffffffff; // all bits ones is permanently reserved as an illegal instruction ++} + -+ // prints msg, dumps registers and stops execution -+ void stop(const char* msg); ++bool NativeInstruction::is_stop() { ++ return uint_at(0) == 0xffffffff; // an illegal instruction ++} + -+ static void debug64(char* msg, int64_t pc, int64_t regs[]); ++//------------------------------------------------------------------- + -+ void unimplemented(const char* what = ""); ++// MT-safe inserting of a jump over a jump or a nop (used by ++// nmethod::make_not_entrant_or_zombie) + -+ void should_not_reach_here() { stop("should not reach here"); } ++void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { + -+ static address target_addr_for_insn(address insn_addr); ++ assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch"); + -+ // Required platform-specific helpers for Label::patch_instructions. -+ // They _shadow_ the declarations in AbstractAssembler, which are undefined. -+ static int pd_patch_instruction_size(address branch, address target); -+ static void pd_patch_instruction(address branch, address target, const char* file = NULL, int line = 0) { -+ pd_patch_instruction_size(branch, target); -+ } -+ static address pd_call_destination(address branch) { -+ return target_addr_for_insn(branch); ++ assert(nativeInstruction_at(verified_entry)->is_jump_or_nop() || ++ nativeInstruction_at(verified_entry)->is_sigill_zombie_not_entrant(), ++ "riscv cannot replace non-jump with jump"); ++ ++ // Patch this nmethod atomically. ++ if (Assembler::reachable_from_branch_at(verified_entry, dest)) { ++ ptrdiff_t offset = dest - verified_entry; ++ guarantee(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction."); // 1M ++ ++ uint32_t insn = 0; ++ address pInsn = (address)&insn; ++ Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1); ++ Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff); ++ Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1); ++ Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff); ++ Assembler::patch(pInsn, 11, 7, 0); // zero, no link jump ++ Assembler::patch(pInsn, 6, 0, 0b1101111); // j, (jal x0 offset) ++ *(unsigned int*)verified_entry = insn; ++ } else { ++ // We use an illegal instruction for marking a method as ++ // not_entrant or zombie. ++ NativeIllegalInstruction::insert(verified_entry); + } + -+ static int patch_oop(address insn_addr, address o); -+ address emit_trampoline_stub(int insts_call_instruction_offset, address target); -+ void emit_static_call_stub(); ++ ICache::invalidate_range(verified_entry, instruction_size); ++} + -+ // The following 4 methods return the offset of the appropriate move instruction ++void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { ++ CodeBuffer cb(code_pos, instruction_size); ++ MacroAssembler a(&cb); + -+ // Support for fast byte/short loading with zero extension (depending on particular CPU) -+ int load_unsigned_byte(Register dst, Address src); -+ int load_unsigned_short(Register dst, Address src); ++ int32_t offset = 0; ++ a.movptr_with_offset(t0, entry, offset); // lui, addi, slli, addi, slli ++ a.jalr(x0, t0, offset); // jalr + -+ // Support for fast byte/short loading with sign extension (depending on particular CPU) -+ int load_signed_byte(Register dst, Address src); -+ int load_signed_short(Register dst, Address src); ++ ICache::invalidate_range(code_pos, instruction_size); ++} + -+ // Load and store values by size and signed-ness -+ void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); -+ void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); ++// MT-safe patching of a long jump instruction. ++void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { ++ ShouldNotCallThis(); ++} + -+ public: -+ // Standard pseudoinstruction -+ void nop(); -+ void mv(Register Rd, Register Rs); -+ void notr(Register Rd, Register Rs); -+ void neg(Register Rd, Register Rs); -+ void negw(Register Rd, Register Rs); -+ void sext_w(Register Rd, Register Rs); -+ void zext_b(Register Rd, Register Rs); -+ void seqz(Register Rd, Register Rs); // set if = zero -+ void snez(Register Rd, Register Rs); // set if != zero -+ void sltz(Register Rd, Register Rs); // set if < zero -+ void sgtz(Register Rd, Register Rs); // set if > zero + -+ // Float pseudoinstruction -+ void fmv_s(FloatRegister Rd, FloatRegister Rs); -+ void fabs_s(FloatRegister Rd, FloatRegister Rs); // single-precision absolute value -+ void fneg_s(FloatRegister Rd, FloatRegister Rs); ++address NativeCallTrampolineStub::destination(nmethod *nm) const { ++ return ptr_at(data_offset); ++} + -+ // Double pseudoinstruction -+ void fmv_d(FloatRegister Rd, FloatRegister Rs); -+ void fabs_d(FloatRegister Rd, FloatRegister Rs); -+ void fneg_d(FloatRegister Rd, FloatRegister Rs); ++void NativeCallTrampolineStub::set_destination(address new_destination) { ++ set_ptr_at(data_offset, new_destination); ++ OrderAccess::release(); ++} + -+ // Pseudoinstruction for control and status register -+ void rdinstret(Register Rd); // read instruction-retired counter -+ void rdcycle(Register Rd); // read cycle counter -+ void rdtime(Register Rd); // read time -+ void csrr(Register Rd, unsigned csr); // read csr -+ void csrw(unsigned csr, Register Rs); // write csr -+ void csrs(unsigned csr, Register Rs); // set bits in csr -+ void csrc(unsigned csr, Register Rs); // clear bits in csr -+ void csrwi(unsigned csr, unsigned imm); -+ void csrsi(unsigned csr, unsigned imm); -+ void csrci(unsigned csr, unsigned imm); -+ void frcsr(Register Rd); // read float-point csr -+ void fscsr(Register Rd, Register Rs); // swap float-point csr -+ void fscsr(Register Rs); // write float-point csr -+ void frrm(Register Rd); // read float-point rounding mode -+ void fsrm(Register Rd, Register Rs); // swap float-point rounding mode -+ void fsrm(Register Rs); // write float-point rounding mode -+ void fsrmi(Register Rd, unsigned imm); -+ void fsrmi(unsigned imm); -+ void frflags(Register Rd); // read float-point exception flags -+ void fsflags(Register Rd, Register Rs); // swap float-point exception flags -+ void fsflags(Register Rs); // write float-point exception flags -+ void fsflagsi(Register Rd, unsigned imm); -+ void fsflagsi(unsigned imm); -+ -+ void beqz(Register Rs, const address &dest); -+ void bnez(Register Rs, const address &dest); -+ void blez(Register Rs, const address &dest); -+ void bgez(Register Rs, const address &dest); -+ void bltz(Register Rs, const address &dest); -+ void bgtz(Register Rs, const address &dest); -+ void la(Register Rd, Label &label); -+ void la(Register Rd, const address &dest); -+ void la(Register Rd, const Address &adr); -+ //label -+ void beqz(Register Rs, Label &l, bool is_far = false); -+ void bnez(Register Rs, Label &l, bool is_far = false); -+ void blez(Register Rs, Label &l, bool is_far = false); -+ void bgez(Register Rs, Label &l, bool is_far = false); -+ void bltz(Register Rs, Label &l, bool is_far = false); -+ void bgtz(Register Rs, Label &l, bool is_far = false); -+ void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); -+ void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); -+ void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); -+ void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); -+ void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); -+ void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); -+ void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); -+ void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); -+ void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); -+ void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); -+ void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); -+ void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); -+ -+ void push_reg(RegSet regs, Register stack) { if (regs.bits()) { push_reg(regs.bits(), stack); } } -+ void pop_reg(RegSet regs, Register stack) { if (regs.bits()) { pop_reg(regs.bits(), stack); } } -+ void push_reg(Register Rs); -+ void pop_reg(Register Rd); -+ int push_reg(unsigned int bitset, Register stack); -+ int pop_reg(unsigned int bitset, Register stack); -+ void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); } -+ void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); } -+#ifdef COMPILER2 -+ void push_vp(VectorRegSet regs, Register stack) { if (regs.bits()) push_vp(regs.bits(), stack); } -+ void pop_vp(VectorRegSet regs, Register stack) { if (regs.bits()) pop_vp(regs.bits(), stack); } -+#endif // COMPILER2 ++uint32_t NativeMembar::get_kind() { ++ uint32_t insn = uint_at(0); + -+ // Push and pop everything that might be clobbered by a native -+ // runtime call except t0 and t1. (They are always -+ // temporary registers, so we don't have to protect them.) -+ // Additional registers can be excluded in a passed RegSet. -+ void push_call_clobbered_registers_except(RegSet exclude); -+ void pop_call_clobbered_registers_except(RegSet exclude); ++ uint32_t predecessor = Assembler::extract(insn, 27, 24); ++ uint32_t successor = Assembler::extract(insn, 23, 20); + -+ void push_call_clobbered_registers() { -+ push_call_clobbered_registers_except(RegSet()); -+ } -+ void pop_call_clobbered_registers() { -+ pop_call_clobbered_registers_except(RegSet()); -+ } ++ return MacroAssembler::pred_succ_to_membar_mask(predecessor, successor); ++} + -+ void pusha(); -+ void popa(); -+ void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0); -+ void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0); ++void NativeMembar::set_kind(uint32_t order_kind) { ++ uint32_t predecessor = 0; ++ uint32_t successor = 0; + -+ // if heap base register is used - reinit it with the correct value -+ void reinit_heapbase(); ++ MacroAssembler::membar_mask_to_pred_succ(order_kind, predecessor, successor); + -+ void bind(Label& L) { -+ Assembler::bind(L); -+ // fences across basic blocks should not be merged -+ code()->clear_last_insn(); -+ } ++ uint32_t insn = uint_at(0); ++ address pInsn = (address) &insn; ++ Assembler::patch(pInsn, 27, 24, predecessor); ++ Assembler::patch(pInsn, 23, 20, successor); + -+ // mv -+ template::value)> -+ inline void mv(Register Rd, T o) { -+ li(Rd, (int64_t)o); -+ } ++ address membar = addr_at(0); ++ *(unsigned int*) membar = insn; ++} +diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp +new file mode 100644 +index 0000000000..2e5c84ee3b +--- /dev/null ++++ b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp +@@ -0,0 +1,555 @@ ++/* ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); } ++#ifndef CPU_RISCV_NATIVEINST_RISCV_HPP ++#define CPU_RISCV_NATIVEINST_RISCV_HPP + -+ void mv(Register Rd, Address dest); -+ void mv(Register Rd, address addr); -+ void mv(Register Rd, RegisterOrConstant src); ++#include "asm/assembler.hpp" ++#include "runtime/icache.hpp" ++#include "runtime/os.hpp" + -+ // logic -+ void andrw(Register Rd, Register Rs1, Register Rs2); -+ void orrw(Register Rd, Register Rs1, Register Rs2); -+ void xorrw(Register Rd, Register Rs1, Register Rs2); ++// We have interfaces for the following instructions: ++// - NativeInstruction ++// - - NativeCall ++// - - NativeMovConstReg ++// - - NativeMovRegMem ++// - - NativeJump ++// - - NativeGeneralJump ++// - - NativeIllegalInstruction ++// - - NativeCallTrampolineStub ++// - - NativeMembar + -+ // revb -+ void revb_h_h(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, sign-extend -+ void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in lower word, sign-extend -+ void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, zero-extend -+ void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in halfwords in lower 32 bits, zero-extend -+ void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in upper 16 bits (48:63) and move to lower -+ void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each halfword -+ void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each word -+ void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in doubleword ++// The base class for different kinds of native instruction abstractions. ++// Provides the primitive operations to manipulate code relative to this. + -+ void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0); -+ void andi(Register Rd, Register Rn, int64_t imm, Register tmp = t0); -+ void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1); ++class NativeCall; + -+ void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail); -+ void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail); -+ void cmpxchg(Register addr, Register expected, -+ Register new_val, -+ enum operand_size size, -+ Assembler::Aqrl acquire, Assembler::Aqrl release, -+ Register result, bool result_as_bool = false); -+ void cmpxchg_weak(Register addr, Register expected, -+ Register new_val, -+ enum operand_size size, -+ Assembler::Aqrl acquire, Assembler::Aqrl release, -+ Register result); -+ void cmpxchg_narrow_value_helper(Register addr, Register expected, -+ Register new_val, -+ enum operand_size size, -+ Register tmp1, Register tmp2, Register tmp3); -+ void cmpxchg_narrow_value(Register addr, Register expected, -+ Register new_val, -+ enum operand_size size, -+ Assembler::Aqrl acquire, Assembler::Aqrl release, -+ Register result, bool result_as_bool, -+ Register tmp1, Register tmp2, Register tmp3); -+ void weak_cmpxchg_narrow_value(Register addr, Register expected, -+ Register new_val, -+ enum operand_size size, -+ Assembler::Aqrl acquire, Assembler::Aqrl release, -+ Register result, -+ Register tmp1, Register tmp2, Register tmp3); ++class NativeInstruction { ++ friend class Relocation; ++ friend bool is_NativeCallTrampolineStub_at(address); ++ public: ++ enum { ++ instruction_size = 4, ++ compressed_instruction_size = 2, ++ }; + -+ void atomic_add(Register prev, RegisterOrConstant incr, Register addr); -+ void atomic_addw(Register prev, RegisterOrConstant incr, Register addr); -+ void atomic_addal(Register prev, RegisterOrConstant incr, Register addr); -+ void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr); ++ juint encoding() const { ++ return uint_at(0); ++ } + -+ void atomic_xchg(Register prev, Register newv, Register addr); -+ void atomic_xchgw(Register prev, Register newv, Register addr); -+ void atomic_xchgal(Register prev, Register newv, Register addr); -+ void atomic_xchgalw(Register prev, Register newv, Register addr); -+ void atomic_xchgwu(Register prev, Register newv, Register addr); -+ void atomic_xchgalwu(Register prev, Register newv, Register addr); ++ bool is_jal() const { return is_jal_at(addr_at(0)); } ++ bool is_movptr() const { return is_movptr_at(addr_at(0)); } ++ bool is_call() const { return is_call_at(addr_at(0)); } ++ bool is_jump() const { return is_jump_at(addr_at(0)); } + -+ static bool far_branches() { -+ return ReservedCodeCacheSize > branch_range; ++ static bool is_jal_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1101111; } ++ static bool is_jalr_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100111 && extract_funct3(instr) == 0b000; } ++ static bool is_branch_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100011; } ++ static bool is_ld_at(address instr) { assert_cond(instr != NULL); return is_load_at(instr) && extract_funct3(instr) == 0b011; } ++ static bool is_load_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000011; } ++ static bool is_float_load_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000111; } ++ static bool is_auipc_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010111; } ++ static bool is_jump_at(address instr) { assert_cond(instr != NULL); return is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr); } ++ static bool is_addi_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010011 && extract_funct3(instr) == 0b000; } ++ static bool is_addiw_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0011011 && extract_funct3(instr) == 0b000; } ++ static bool is_lui_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0110111; } ++ static bool is_slli_shift_at(address instr, uint32_t shift) { ++ assert_cond(instr != NULL); ++ return (extract_opcode(instr) == 0b0010011 && // opcode field ++ extract_funct3(instr) == 0b001 && // funct3 field, select the type of operation ++ Assembler::extract(((unsigned*)instr)[0], 25, 20) == shift); // shamt field + } + -+ // Jumps that can reach anywhere in the code cache. -+ // Trashes tmp. -+ void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0); -+ void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0); ++ static Register extract_rs1(address instr); ++ static Register extract_rs2(address instr); ++ static Register extract_rd(address instr); ++ static uint32_t extract_opcode(address instr); ++ static uint32_t extract_funct3(address instr); + -+ static int far_branch_size() { -+ if (far_branches()) { -+ return 2 * 4; // auipc + jalr, see far_call() & far_jump() -+ } else { -+ return 4; -+ } ++ // the instruction sequence of movptr is as below: ++ // lui ++ // addi ++ // slli ++ // addi ++ // slli ++ // addi/jalr/load ++ static bool check_movptr_data_dependency(address instr) { ++ address lui = instr; ++ address addi1 = lui + instruction_size; ++ address slli1 = addi1 + instruction_size; ++ address addi2 = slli1 + instruction_size; ++ address slli2 = addi2 + instruction_size; ++ address last_instr = slli2 + instruction_size; ++ return extract_rs1(addi1) == extract_rd(lui) && ++ extract_rs1(addi1) == extract_rd(addi1) && ++ extract_rs1(slli1) == extract_rd(addi1) && ++ extract_rs1(slli1) == extract_rd(slli1) && ++ extract_rs1(addi2) == extract_rd(slli1) && ++ extract_rs1(addi2) == extract_rd(addi2) && ++ extract_rs1(slli2) == extract_rd(addi2) && ++ extract_rs1(slli2) == extract_rd(slli2) && ++ extract_rs1(last_instr) == extract_rd(slli2); + } + -+ void load_byte_map_base(Register reg); -+ -+ void bang_stack_with_offset(int offset) { -+ // stack grows down, caller passes positive offset -+ assert(offset > 0, "must bang with negative offset"); -+ sub(t0, sp, offset); -+ sd(zr, Address(t0)); ++ // the instruction sequence of li64 is as below: ++ // lui ++ // addi ++ // slli ++ // addi ++ // slli ++ // addi ++ // slli ++ // addi ++ static bool check_li64_data_dependency(address instr) { ++ address lui = instr; ++ address addi1 = lui + instruction_size; ++ address slli1 = addi1 + instruction_size; ++ address addi2 = slli1 + instruction_size; ++ address slli2 = addi2 + instruction_size; ++ address addi3 = slli2 + instruction_size; ++ address slli3 = addi3 + instruction_size; ++ address addi4 = slli3 + instruction_size; ++ return extract_rs1(addi1) == extract_rd(lui) && ++ extract_rs1(addi1) == extract_rd(addi1) && ++ extract_rs1(slli1) == extract_rd(addi1) && ++ extract_rs1(slli1) == extract_rd(slli1) && ++ extract_rs1(addi2) == extract_rd(slli1) && ++ extract_rs1(addi2) == extract_rd(addi2) && ++ extract_rs1(slli2) == extract_rd(addi2) && ++ extract_rs1(slli2) == extract_rd(slli2) && ++ extract_rs1(addi3) == extract_rd(slli2) && ++ extract_rs1(addi3) == extract_rd(addi3) && ++ extract_rs1(slli3) == extract_rd(addi3) && ++ extract_rs1(slli3) == extract_rd(slli3) && ++ extract_rs1(addi4) == extract_rd(slli3) && ++ extract_rs1(addi4) == extract_rd(addi4); + } + -+ void la_patchable(Register reg1, const Address &dest, int32_t &offset); ++ // the instruction sequence of li32 is as below: ++ // lui ++ // addiw ++ static bool check_li32_data_dependency(address instr) { ++ address lui = instr; ++ address addiw = lui + instruction_size; + -+ virtual void _call_Unimplemented(address call_site) { -+ mv(t1, call_site); ++ return extract_rs1(addiw) == extract_rd(lui) && ++ extract_rs1(addiw) == extract_rd(addiw); + } + -+ #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__) ++ // the instruction sequence of pc-relative is as below: ++ // auipc ++ // jalr/addi/load/float_load ++ static bool check_pc_relative_data_dependency(address instr) { ++ address auipc = instr; ++ address last_instr = auipc + instruction_size; + -+ // Frame creation and destruction shared between JITs. -+ void build_frame(int framesize); -+ void remove_frame(int framesize); ++ return extract_rs1(last_instr) == extract_rd(auipc); ++ } + -+ void reserved_stack_check(); ++ // the instruction sequence of load_label is as below: ++ // auipc ++ // load ++ static bool check_load_pc_relative_data_dependency(address instr) { ++ address auipc = instr; ++ address load = auipc + instruction_size; + -+ void get_polling_page(Register dest, relocInfo::relocType rtype); -+ address read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype); ++ return extract_rd(load) == extract_rd(auipc) && ++ extract_rs1(load) == extract_rd(load); ++ } + -+ address trampoline_call(Address entry, CodeBuffer* cbuf = NULL); -+ address ic_call(address entry, jint method_index = 0); ++ static bool is_movptr_at(address instr); ++ static bool is_li32_at(address instr); ++ static bool is_li64_at(address instr); ++ static bool is_pc_relative_at(address branch); ++ static bool is_load_pc_relative_at(address branch); + -+ void add_memory_int64(const Address dst, int64_t imm); -+ void add_memory_int32(const Address dst, int32_t imm); ++ static bool is_call_at(address instr) { ++ if (is_jal_at(instr) || is_jalr_at(instr)) { ++ return true; ++ } ++ return false; ++ } ++ static bool is_lwu_to_zr(address instr); + -+ void cmpptr(Register src1, Address src2, Label& equal); ++ inline bool is_nop(); ++ inline bool is_jump_or_nop(); ++ bool is_safepoint_poll(); ++ bool is_sigill_zombie_not_entrant(); ++ bool is_stop(); + -+ void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = NULL, Label* L_slow_path = NULL); -+ void load_method_holder_cld(Register result, Register method); -+ void load_method_holder(Register holder, Register method); ++ protected: ++ address addr_at(int offset) const { return address(this) + offset; } + -+ void compute_index(Register str1, Register trailing_zeros, Register match_mask, -+ Register result, Register char_tmp, Register tmp, -+ bool haystack_isL); -+ void compute_match_mask(Register src, Register pattern, Register match_mask, -+ Register mask1, Register mask2); ++ jint int_at(int offset) const { return *(jint*) addr_at(offset); } ++ juint uint_at(int offset) const { return *(juint*) addr_at(offset); } + -+#ifdef COMPILER2 -+ void mul_add(Register out, Register in, Register offset, -+ Register len, Register k, Register tmp); -+ void cad(Register dst, Register src1, Register src2, Register carry); -+ void cadc(Register dst, Register src1, Register src2, Register carry); -+ void adc(Register dst, Register src1, Register src2, Register carry); -+ void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, -+ Register src1, Register src2, Register carry); -+ void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, -+ Register y, Register y_idx, Register z, -+ Register carry, Register product, -+ Register idx, Register kdx); -+ void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, -+ Register y, Register y_idx, Register z, -+ Register carry, Register product, -+ Register idx, Register kdx); -+ void multiply_128_x_128_loop(Register y, Register z, -+ Register carry, Register carry2, -+ Register idx, Register jdx, -+ Register yz_idx1, Register yz_idx2, -+ Register tmp, Register tmp3, Register tmp4, -+ Register tmp6, Register product_hi); -+ void multiply_to_len(Register x, Register xlen, Register y, Register ylen, -+ Register z, Register zlen, -+ Register tmp1, Register tmp2, Register tmp3, Register tmp4, -+ Register tmp5, Register tmp6, Register product_hi); -+#endif ++ address ptr_at(int offset) const { return *(address*) addr_at(offset); } + -+ void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); -+ void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); ++ oop oop_at (int offset) const { return *(oop*) addr_at(offset); } + -+ void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1); + -+ void zero_words(Register base, u_int64_t cnt); -+ address zero_words(Register ptr, Register cnt); -+ void fill_words(Register base, Register cnt, Register value); -+ void zero_memory(Register addr, Register len, Register tmp); ++ void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; } ++ void set_uint_at(int offset, jint i) { *(juint*)addr_at(offset) = i; } ++ void set_ptr_at (int offset, address ptr) { *(address*) addr_at(offset) = ptr; } ++ void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; } + -+ // shift left by shamt and add -+ void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt); ++ public: + -+ // Here the float instructions with safe deal with some exceptions. -+ // e.g. convert from NaN, +Inf, -Inf to int, float, double -+ // will trigger exception, we need to deal with these situations -+ // to get correct results. -+ void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0); -+ void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0); -+ void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0); -+ void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0); ++ inline friend NativeInstruction* nativeInstruction_at(address addr); + -+ // vector load/store unit-stride instructions -+ void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) { -+ switch (sew) { -+ case Assembler::e64: -+ vle64_v(vd, base, vm); -+ break; -+ case Assembler::e32: -+ vle32_v(vd, base, vm); -+ break; -+ case Assembler::e16: -+ vle16_v(vd, base, vm); -+ break; -+ case Assembler::e8: // fall through -+ default: -+ vle8_v(vd, base, vm); -+ break; -+ } ++ static bool maybe_cpool_ref(address instr) { ++ return is_auipc_at(instr); + } + -+ void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) { -+ switch (sew) { -+ case Assembler::e64: -+ vse64_v(store_data, base, vm); -+ break; -+ case Assembler::e32: -+ vse32_v(store_data, base, vm); -+ break; -+ case Assembler::e16: -+ vse16_v(store_data, base, vm); -+ break; -+ case Assembler::e8: // fall through -+ default: -+ vse8_v(store_data, base, vm); -+ break; -+ } ++ bool is_membar() { ++ return (uint_at(0) & 0x7f) == 0b1111 && extract_funct3(addr_at(0)) == 0; + } ++}; + -+ static const int zero_words_block_size; -+ -+ void cast_primitive_type(BasicType type, Register Rt) { -+ switch (type) { -+ case T_BOOLEAN: -+ sltu(Rt, zr, Rt); -+ break; -+ case T_CHAR : -+ zero_extend(Rt, Rt, 16); -+ break; -+ case T_BYTE : -+ sign_extend(Rt, Rt, 8); -+ break; -+ case T_SHORT : -+ sign_extend(Rt, Rt, 16); -+ break; -+ case T_INT : -+ addw(Rt, Rt, zr); -+ break; -+ case T_LONG : /* nothing to do */ break; -+ case T_VOID : /* nothing to do */ break; -+ case T_FLOAT : /* nothing to do */ break; -+ case T_DOUBLE : /* nothing to do */ break; -+ default: ShouldNotReachHere(); -+ } -+ } ++inline NativeInstruction* nativeInstruction_at(address addr) { ++ return (NativeInstruction*)addr; ++} + -+ // float cmp with unordered_result -+ void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result); -+ void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result); ++// The natural type of an RISCV instruction is uint32_t ++inline NativeInstruction* nativeInstruction_at(uint32_t *addr) { ++ return (NativeInstruction*)addr; ++} + -+ // Zero/Sign-extend -+ void zero_extend(Register dst, Register src, int bits); -+ void sign_extend(Register dst, Register src, int bits); ++inline NativeCall* nativeCall_at(address addr); ++// The NativeCall is an abstraction for accessing/manipulating native ++// call instructions (used to manipulate inline caches, primitive & ++// DSO calls, etc.). + -+ // compare src1 and src2 and get -1/0/1 in dst. -+ // if [src1 > src2], dst = 1; -+ // if [src1 == src2], dst = 0; -+ // if [src1 < src2], dst = -1; -+ void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0); ++class NativeCall: public NativeInstruction { ++ public: ++ enum RISCV_specific_constants { ++ instruction_size = 4, ++ instruction_offset = 0, ++ displacement_offset = 0, ++ return_address_offset = 4 ++ }; + -+ int push_fp(unsigned int bitset, Register stack); -+ int pop_fp(unsigned int bitset, Register stack); ++ address instruction_address() const { return addr_at(instruction_offset); } ++ address next_instruction_address() const { return addr_at(return_address_offset); } ++ address return_address() const { return addr_at(return_address_offset); } ++ address destination() const; + -+ int push_vp(unsigned int bitset, Register stack); -+ int pop_vp(unsigned int bitset, Register stack); ++ void set_destination(address dest) { ++ assert(is_jal(), "Should be jal instruction!"); ++ intptr_t offset = (intptr_t)(dest - instruction_address()); ++ assert((offset & 0x1) == 0, "bad alignment"); ++ assert(is_imm_in_range(offset, 20, 1), "encoding constraint"); ++ unsigned int insn = 0b1101111; // jal ++ address pInsn = (address)(&insn); ++ Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1); ++ Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff); ++ Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1); ++ Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff); ++ Assembler::patch(pInsn, 11, 7, ra->encoding()); // Rd must be x1, need ra ++ set_int_at(displacement_offset, insn); ++ } + -+ // vext -+ void vmnot_m(VectorRegister vd, VectorRegister vs); -+ void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked); -+ void vfneg_v(VectorRegister vd, VectorRegister vs); ++ void verify_alignment() {} // do nothing on riscv ++ void verify(); ++ void print(); + -+private: ++ // Creation ++ inline friend NativeCall* nativeCall_at(address addr); ++ inline friend NativeCall* nativeCall_before(address return_address); + -+#ifdef ASSERT -+ // Template short-hand support to clean-up after a failed call to trampoline -+ // call generation (see trampoline_call() below), when a set of Labels must -+ // be reset (before returning). -+ template -+ void reset_labels(Label& lbl, More&... more) { -+ lbl.reset(); reset_labels(more...); -+ } -+ template -+ void reset_labels(Label& lbl) { -+ lbl.reset(); ++ static bool is_call_before(address return_address) { ++ return is_call_at(return_address - NativeCall::return_address_offset); + } -+#endif -+ void repne_scan(Register addr, Register value, Register count, Register tmp); + -+ // Return true if an address is within the 48-bit RISCV64 address space. -+ bool is_valid_riscv64_address(address addr) { -+ return ((uintptr_t)addr >> 48) == 0; -+ } ++ // MT-safe patching of a call instruction. ++ static void insert(address code_pos, address entry); + -+ void ld_constant(Register dest, const Address &const_addr) { -+ if (NearCpool) { -+ ld(dest, const_addr); -+ } else { -+ int32_t offset = 0; -+ la_patchable(dest, InternalAddress(const_addr.target()), offset); -+ ld(dest, Address(dest, offset)); -+ } -+ } ++ static void replace_mt_safe(address instr_addr, address code_buffer); + -+ int bitset_to_regs(unsigned int bitset, unsigned char* regs); -+ Address add_memory_helper(const Address dst); ++ // Similar to replace_mt_safe, but just changes the destination. The ++ // important thing is that free-running threads are able to execute ++ // this call instruction at all times. If the call is an immediate BL ++ // instruction we can simply rely on atomicity of 32-bit writes to ++ // make sure other threads will see no intermediate states. + -+ void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire); -+ void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release); ++ // We cannot rely on locks here, since the free-running threads must run at ++ // full speed. ++ // ++ // Used in the runtime linkage of calls; see class CompiledIC. ++ // (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.) ++ ++ // The parameter assert_lock disables the assertion during code generation. ++ void set_destination_mt_safe(address dest, bool assert_lock = true); + -+ // Check the current thread doesn't need a cross modify fence. -+ void verify_cross_modify_fence_not_required() PRODUCT_RETURN; ++ address get_trampoline(); +}; + -+#ifdef ASSERT -+inline bool AbstractAssembler::pd_check_instruction_mark() { return false; } -+#endif ++inline NativeCall* nativeCall_at(address addr) { ++ assert_cond(addr != NULL); ++ NativeCall* call = (NativeCall*)(addr - NativeCall::instruction_offset); ++ DEBUG_ONLY(call->verify()); ++ return call; ++} + -+/** -+ * class SkipIfEqual: -+ * -+ * Instantiating this class will result in assembly code being output that will -+ * jump around any code emitted between the creation of the instance and it's -+ * automatic destruction at the end of a scope block, depending on the value of -+ * the flag passed to the constructor, which will be checked at run-time. -+ */ -+class SkipIfEqual { -+ private: -+ MacroAssembler* _masm; -+ Label _label; ++inline NativeCall* nativeCall_before(address return_address) { ++ assert_cond(return_address != NULL); ++ NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset); ++ DEBUG_ONLY(call->verify()); ++ return call; ++} + ++// An interface for accessing/manipulating native mov reg, imm instructions. ++// (used to manipulate inlined 64-bit data calls, etc.) ++class NativeMovConstReg: public NativeInstruction { + public: -+ SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); -+ ~SkipIfEqual(); ++ enum RISCV_specific_constants { ++ movptr_instruction_size = 6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, addi. See movptr(). ++ movptr_with_offset_instruction_size = 5 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli. See movptr_with_offset(). ++ load_pc_relative_instruction_size = 2 * NativeInstruction::instruction_size, // auipc, ld ++ instruction_offset = 0, ++ displacement_offset = 0 ++ }; ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ address next_instruction_address() const { ++ // if the instruction at 5 * instruction_size is addi, ++ // it means a lui + addi + slli + addi + slli + addi instruction sequence, ++ // and the next instruction address should be addr_at(6 * instruction_size). ++ // However, when the instruction at 5 * instruction_size isn't addi, ++ // the next instruction address should be addr_at(5 * instruction_size) ++ if (nativeInstruction_at(instruction_address())->is_movptr()) { ++ if (is_addi_at(addr_at(movptr_with_offset_instruction_size))) { ++ // Assume: lui, addi, slli, addi, slli, addi ++ return addr_at(movptr_instruction_size); ++ } else { ++ // Assume: lui, addi, slli, addi, slli ++ return addr_at(movptr_with_offset_instruction_size); ++ } ++ } else if (is_load_pc_relative_at(instruction_address())) { ++ // Assume: auipc, ld ++ return addr_at(load_pc_relative_instruction_size); ++ } ++ guarantee(false, "Unknown instruction in NativeMovConstReg"); ++ return NULL; ++ } ++ ++ intptr_t data() const; ++ void set_data(intptr_t x); ++ ++ void flush() { ++ if (!maybe_cpool_ref(instruction_address())) { ++ ICache::invalidate_range(instruction_address(), movptr_instruction_size); ++ } ++ } ++ ++ void verify(); ++ void print(); ++ ++ // Creation ++ inline friend NativeMovConstReg* nativeMovConstReg_at(address addr); ++ inline friend NativeMovConstReg* nativeMovConstReg_before(address addr); +}; + -+#endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp -new file mode 100644 -index 00000000000..ef968ccd96d ---- /dev/null -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp -@@ -0,0 +1,31 @@ -+/* -+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++inline NativeMovConstReg* nativeMovConstReg_at(address addr) { ++ assert_cond(addr != NULL); ++ NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_offset); ++ DEBUG_ONLY(test->verify()); ++ return test; ++} + -+#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP -+#define CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP ++inline NativeMovConstReg* nativeMovConstReg_before(address addr) { ++ assert_cond(addr != NULL); ++ NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset); ++ DEBUG_ONLY(test->verify()); ++ return test; ++} + -+// Still empty. ++// RISCV should not use C1 runtime patching, but still implement ++// NativeMovRegMem to keep some compilers happy. ++class NativeMovRegMem: public NativeInstruction { ++ public: ++ enum RISCV_specific_constants { ++ instruction_size = NativeInstruction::instruction_size, ++ instruction_offset = 0, ++ data_offset = 0, ++ next_instruction_offset = NativeInstruction::instruction_size ++ }; + -+#endif // CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP -diff --git a/src/hotspot/cpu/riscv/matcher_riscv.hpp b/src/hotspot/cpu/riscv/matcher_riscv.hpp -new file mode 100644 -index 00000000000..23a75d20502 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/matcher_riscv.hpp -@@ -0,0 +1,169 @@ -+/* -+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2021, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ int instruction_start() const { return instruction_offset; } + -+#ifndef CPU_RISCV_MATCHER_RISCV_HPP -+#define CPU_RISCV_MATCHER_RISCV_HPP ++ address instruction_address() const { return addr_at(instruction_offset); } + -+ // Defined within class Matcher ++ int num_bytes_to_end_of_patch() const { return instruction_offset + instruction_size; } + -+ // false => size gets scaled to BytesPerLong, ok. -+ static const bool init_array_count_is_in_bytes = false; ++ int offset() const; + -+ // Whether this platform implements the scalable vector feature -+ static const bool implements_scalable_vector = true; ++ void set_offset(int x); + -+ static const bool supports_scalable_vector() { -+ return UseRVV; ++ void add_offset_in_bytes(int add_offset) { ++ set_offset(offset() + add_offset); + } + -+ // riscv supports misaligned vectors store/load. -+ static constexpr bool misaligned_vectors_ok() { -+ return true; -+ } ++ void verify(); ++ void print(); + -+ // Whether code generation need accurate ConvI2L types. -+ static const bool convi2l_type_required = false; ++ private: ++ inline friend NativeMovRegMem* nativeMovRegMem_at(address addr); ++}; ++ ++inline NativeMovRegMem* nativeMovRegMem_at(address addr) { ++ NativeMovRegMem* test = (NativeMovRegMem*)(addr - NativeMovRegMem::instruction_offset); ++ DEBUG_ONLY(test->verify()); ++ return test; ++} ++ ++class NativeJump: public NativeInstruction { ++ public: ++ enum RISCV_specific_constants { ++ instruction_size = NativeInstruction::instruction_size, ++ instruction_offset = 0, ++ data_offset = 0, ++ next_instruction_offset = NativeInstruction::instruction_size ++ }; ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ address next_instruction_address() const { return addr_at(instruction_size); } ++ address jump_destination() const; ++ void set_jump_destination(address dest); + -+ // Does the CPU require late expand (see block.cpp for description of late expand)? -+ static const bool require_postalloc_expand = false; ++ // Creation ++ inline friend NativeJump* nativeJump_at(address address); + -+ // Do we need to mask the count passed to shift instructions or does -+ // the cpu only look at the lower 5/6 bits anyway? -+ static const bool need_masked_shift_count = false; ++ void verify(); + -+ // No support for generic vector operands. -+ static const bool supports_generic_vector_operands = false; ++ // Insertion of native jump instruction ++ static void insert(address code_pos, address entry); ++ // MT-safe insertion of native jump at verified method entry ++ static void check_verified_entry_alignment(address entry, address verified_entry); ++ static void patch_verified_entry(address entry, address verified_entry, address dest); ++}; + -+ static constexpr bool isSimpleConstant64(jlong value) { -+ // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. -+ // Probably always true, even if a temp register is required. -+ return true; -+ } ++inline NativeJump* nativeJump_at(address addr) { ++ NativeJump* jump = (NativeJump*)(addr - NativeJump::instruction_offset); ++ DEBUG_ONLY(jump->verify()); ++ return jump; ++} + -+ // Use conditional move (CMOVL) -+ static constexpr int long_cmove_cost() { -+ // long cmoves are no more expensive than int cmoves -+ return 0; -+ } ++class NativeGeneralJump: public NativeJump { ++public: ++ enum RISCV_specific_constants { ++ instruction_size = 6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, jalr ++ instruction_offset = 0, ++ data_offset = 0, ++ next_instruction_offset = 6 * NativeInstruction::instruction_size // lui, addi, slli, addi, slli, jalr ++ }; + -+ static constexpr int float_cmove_cost() { -+ // float cmoves are no more expensive than int cmoves -+ return 0; -+ } ++ address jump_destination() const; + -+ // This affects two different things: -+ // - how Decode nodes are matched -+ // - how ImplicitNullCheck opportunities are recognized -+ // If true, the matcher will try to remove all Decodes and match them -+ // (as operands) into nodes. NullChecks are not prepared to deal with -+ // Decodes by final_graph_reshaping(). -+ // If false, final_graph_reshaping() forces the decode behind the Cmp -+ // for a NullCheck. The matcher matches the Decode node into a register. -+ // Implicit_null_check optimization moves the Decode along with the -+ // memory operation back up before the NullCheck. -+ static bool narrow_oop_use_complex_address() { -+ return CompressedOops::shift() == 0; -+ } ++ static void insert_unconditional(address code_pos, address entry); ++ static void replace_mt_safe(address instr_addr, address code_buffer); ++}; + -+ static bool narrow_klass_use_complex_address() { -+ return false; -+ } ++inline NativeGeneralJump* nativeGeneralJump_at(address addr) { ++ assert_cond(addr != NULL); ++ NativeGeneralJump* jump = (NativeGeneralJump*)(addr); ++ debug_only(jump->verify();) ++ return jump; ++} + -+ static bool const_oop_prefer_decode() { -+ // Prefer ConN+DecodeN over ConP in simple compressed oops mode. -+ return CompressedOops::base() == NULL; -+ } ++class NativeIllegalInstruction: public NativeInstruction { ++ public: ++ // Insert illegal opcode as specific address ++ static void insert(address code_pos); ++}; + -+ static bool const_klass_prefer_decode() { -+ // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode. -+ return CompressedKlassPointers::base() == NULL; -+ } ++inline bool NativeInstruction::is_nop() { ++ uint32_t insn = *(uint32_t*)addr_at(0); ++ return insn == 0x13; ++} + -+ // Is it better to copy float constants, or load them directly from -+ // memory? Intel can load a float constant from a direct address, -+ // requiring no extra registers. Most RISCs will have to materialize -+ // an address into a register first, so they would do better to copy -+ // the constant from stack. -+ static const bool rematerialize_float_constants = false; ++inline bool NativeInstruction::is_jump_or_nop() { ++ return is_nop() || is_jump(); ++} + -+ // If CPU can load and store mis-aligned doubles directly then no -+ // fixup is needed. Else we split the double into 2 integer pieces -+ // and move it piece-by-piece. Only happens when passing doubles into -+ // C code as the Java calling convention forces doubles to be aligned. -+ static const bool misaligned_doubles_ok = true; ++// Call trampoline stubs. ++class NativeCallTrampolineStub : public NativeInstruction { ++ public: + -+ // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode. -+ static const bool strict_fp_requires_explicit_rounding = false; ++ enum RISCV_specific_constants { ++ // Refer to function emit_trampoline_stub. ++ instruction_size = 3 * NativeInstruction::instruction_size + wordSize, // auipc + ld + jr + target address ++ data_offset = 3 * NativeInstruction::instruction_size, // auipc + ld + jr ++ }; + -+ // Are floats converted to double when stored to stack during -+ // deoptimization? -+ static constexpr bool float_in_double() { return false; } ++ address destination(nmethod *nm = NULL) const; ++ void set_destination(address new_destination); ++ ptrdiff_t destination_offset() const; ++}; + -+ // Do ints take an entire long register or just half? -+ // The relevant question is how the int is callee-saved: -+ // the whole long is written but de-opt'ing will have to extract -+ // the relevant 32 bits. -+ static const bool int_in_long = true; ++inline bool is_NativeCallTrampolineStub_at(address addr) { ++ // Ensure that the stub is exactly ++ // ld t0, L--->auipc + ld ++ // jr t0 ++ // L: + -+ // Does the CPU supports vector variable shift instructions? -+ static constexpr bool supports_vector_variable_shifts(void) { -+ return false; ++ // judge inst + register + imm ++ // 1). check the instructions: auipc + ld + jalr ++ // 2). check if auipc[11:7] == t0 and ld[11:7] == t0 and ld[19:15] == t0 && jr[19:15] == t0 ++ // 3). check if the offset in ld[31:20] equals the data_offset ++ assert_cond(addr != NULL); ++ const int instr_size = NativeInstruction::instruction_size; ++ if (NativeInstruction::is_auipc_at(addr) && ++ NativeInstruction::is_ld_at(addr + instr_size) && ++ NativeInstruction::is_jalr_at(addr + 2 * instr_size) && ++ (NativeInstruction::extract_rd(addr) == x5) && ++ (NativeInstruction::extract_rd(addr + instr_size) == x5) && ++ (NativeInstruction::extract_rs1(addr + instr_size) == x5) && ++ (NativeInstruction::extract_rs1(addr + 2 * instr_size) == x5) && ++ (Assembler::extract(((unsigned*)addr)[1], 31, 20) == NativeCallTrampolineStub::data_offset)) { ++ return true; + } ++ return false; ++} + -+ // Does the CPU supports vector variable rotate instructions? -+ static constexpr bool supports_vector_variable_rotates(void) { -+ return false; -+ } ++inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) { ++ assert_cond(addr != NULL); ++ assert(is_NativeCallTrampolineStub_at(addr), "no call trampoline found"); ++ return (NativeCallTrampolineStub*)addr; ++} + -+ // Does the CPU supports vector constant rotate instructions? -+ static constexpr bool supports_vector_constant_rotates(int shift) { -+ return false; -+ } ++class NativeMembar : public NativeInstruction { ++public: ++ uint32_t get_kind(); ++ void set_kind(uint32_t order_kind); ++}; + -+ // Does the CPU supports vector unsigned comparison instructions? -+ static const bool supports_vector_comparison_unsigned(int vlen, BasicType bt) { -+ return false; -+ } ++inline NativeMembar *NativeMembar_at(address addr) { ++ assert_cond(addr != NULL); ++ assert(nativeInstruction_at(addr)->is_membar(), "no membar found"); ++ return (NativeMembar*)addr; ++} + -+ // Some microarchitectures have mask registers used on vectors -+ static const bool has_predicated_vectors(void) { -+ return false; -+ } ++#endif // CPU_RISCV_NATIVEINST_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.hpp b/src/hotspot/cpu/riscv/registerMap_riscv.hpp +new file mode 100644 +index 0000000000..fef8ca9b64 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/registerMap_riscv.hpp +@@ -0,0 +1,42 @@ ++/* ++ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ // true means we have fast l2f convers -+ // false means that conversion is done by runtime call -+ static constexpr bool convL2FSupported(void) { -+ return true; -+ } ++#ifndef CPU_RISCV_REGISTERMAP_RISCV_HPP ++#define CPU_RISCV_REGISTERMAP_RISCV_HPP + -+ // Implements a variant of EncodeISOArrayNode that encode ASCII only -+ static const bool supports_encode_ascii_array = false; ++// machine-dependent implemention for register maps ++ friend class frame; + -+ // Returns pre-selection estimated size of a vector operation. -+ static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) { -+ return 0; -+ } ++ private: ++ // This is the hook for finding a register in an "well-known" location, ++ // such as a register block of a predetermined format. ++ address pd_location(VMReg reg) const { return NULL; } + -+#endif // CPU_RISCV_MATCHER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp ++ // no PD state to clear or copy: ++ void pd_clear() {} ++ void pd_initialize() {} ++ void pd_initialize_from(const RegisterMap* map) {} ++ ++#endif // CPU_RISCV_REGISTERMAP_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/register_definitions_riscv.cpp b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp new file mode 100644 -index 00000000000..1f7c0c87c21 +index 0000000000..583f67573c --- /dev/null -+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp -@@ -0,0 +1,461 @@ ++++ b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp +@@ -0,0 +1,192 @@ +/* -+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -27648,449 +26846,180 @@ index 00000000000..1f7c0c87c21 + */ + +#include "precompiled.hpp" -+#include "asm/macroAssembler.hpp" -+#include "classfile/javaClasses.inline.hpp" -+#include "classfile/vmClasses.hpp" -+#include "interpreter/interpreter.hpp" -+#include "interpreter/interpreterRuntime.hpp" -+#include "memory/allocation.inline.hpp" -+#include "prims/jvmtiExport.hpp" -+#include "prims/methodHandles.hpp" -+#include "runtime/flags/flagSetting.hpp" -+#include "runtime/frame.inline.hpp" -+#include "runtime/stubRoutines.hpp" -+ -+#define __ _masm-> -+ -+#ifdef PRODUCT -+#define BLOCK_COMMENT(str) /* nothing */ -+#else -+#define BLOCK_COMMENT(str) __ block_comment(str) -+#endif ++#include "asm/assembler.hpp" ++#include "asm/register.hpp" ++#include "interp_masm_riscv.hpp" ++#include "register_riscv.hpp" + -+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++REGISTER_DEFINITION(Register, noreg); + -+void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) { -+ assert_cond(_masm != NULL); -+ if (VerifyMethodHandles) { -+ verify_klass(_masm, klass_reg, VM_CLASS_ID(java_lang_Class), -+ "MH argument is a Class"); -+ } -+ __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset())); -+} ++REGISTER_DEFINITION(Register, x0); ++REGISTER_DEFINITION(Register, x1); ++REGISTER_DEFINITION(Register, x2); ++REGISTER_DEFINITION(Register, x3); ++REGISTER_DEFINITION(Register, x4); ++REGISTER_DEFINITION(Register, x5); ++REGISTER_DEFINITION(Register, x6); ++REGISTER_DEFINITION(Register, x7); ++REGISTER_DEFINITION(Register, x8); ++REGISTER_DEFINITION(Register, x9); ++REGISTER_DEFINITION(Register, x10); ++REGISTER_DEFINITION(Register, x11); ++REGISTER_DEFINITION(Register, x12); ++REGISTER_DEFINITION(Register, x13); ++REGISTER_DEFINITION(Register, x14); ++REGISTER_DEFINITION(Register, x15); ++REGISTER_DEFINITION(Register, x16); ++REGISTER_DEFINITION(Register, x17); ++REGISTER_DEFINITION(Register, x18); ++REGISTER_DEFINITION(Register, x19); ++REGISTER_DEFINITION(Register, x20); ++REGISTER_DEFINITION(Register, x21); ++REGISTER_DEFINITION(Register, x22); ++REGISTER_DEFINITION(Register, x23); ++REGISTER_DEFINITION(Register, x24); ++REGISTER_DEFINITION(Register, x25); ++REGISTER_DEFINITION(Register, x26); ++REGISTER_DEFINITION(Register, x27); ++REGISTER_DEFINITION(Register, x28); ++REGISTER_DEFINITION(Register, x29); ++REGISTER_DEFINITION(Register, x30); ++REGISTER_DEFINITION(Register, x31); + -+#ifdef ASSERT -+static int check_nonzero(const char* xname, int x) { -+ assert(x != 0, "%s should be nonzero", xname); -+ return x; -+} -+#define NONZERO(x) check_nonzero(#x, x) -+#else //ASSERT -+#define NONZERO(x) (x) -+#endif //PRODUCT ++REGISTER_DEFINITION(FloatRegister, fnoreg); + -+#ifdef ASSERT -+void MethodHandles::verify_klass(MacroAssembler* _masm, -+ Register obj, vmClassID klass_id, -+ const char* error_message) { -+ assert_cond(_masm != NULL); -+ InstanceKlass** klass_addr = vmClasses::klass_addr_at(klass_id); -+ Klass* klass = vmClasses::klass_at(klass_id); -+ Register temp = t1; -+ Register temp2 = t0; // used by MacroAssembler::cmpptr -+ Label L_ok, L_bad; -+ BLOCK_COMMENT("verify_klass {"); -+ __ verify_oop(obj); -+ __ beqz(obj, L_bad); -+ __ push_reg(RegSet::of(temp, temp2), sp); -+ __ load_klass(temp, obj); -+ __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok); -+ intptr_t super_check_offset = klass->super_check_offset(); -+ __ ld(temp, Address(temp, super_check_offset)); -+ __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok); -+ __ pop_reg(RegSet::of(temp, temp2), sp); -+ __ bind(L_bad); -+ __ stop(error_message); -+ __ BIND(L_ok); -+ __ pop_reg(RegSet::of(temp, temp2), sp); -+ BLOCK_COMMENT("} verify_klass"); -+} -+ -+void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {} -+ -+#endif //ASSERT -+ -+void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, -+ bool for_compiler_entry) { -+ assert_cond(_masm != NULL); -+ assert(method == xmethod, "interpreter calling convention"); -+ Label L_no_such_method; -+ __ beqz(xmethod, L_no_such_method); -+ __ verify_method_ptr(method); -+ -+ if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) { -+ Label run_compiled_code; -+ // JVMTI events, such as single-stepping, are implemented partly by avoiding running -+ // compiled code in threads for which the event is enabled. Check here for -+ // interp_only_mode if these events CAN be enabled. -+ -+ __ lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset())); -+ __ beqz(t0, run_compiled_code); -+ __ ld(t0, Address(method, Method::interpreter_entry_offset())); -+ __ jr(t0); -+ __ BIND(run_compiled_code); -+ } -+ -+ const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : -+ Method::from_interpreted_offset(); -+ __ ld(t0,Address(method, entry_offset)); -+ __ jr(t0); -+ __ bind(L_no_such_method); -+ __ far_jump(RuntimeAddress(StubRoutines::throw_AbstractMethodError_entry())); -+} -+ -+void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, -+ Register recv, Register method_temp, -+ Register temp2, -+ bool for_compiler_entry) { -+ assert_cond(_masm != NULL); -+ BLOCK_COMMENT("jump_to_lambda_form {"); -+ // This is the initial entry point of a lazy method handle. -+ // After type checking, it picks up the invoker from the LambdaForm. -+ assert_different_registers(recv, method_temp, temp2); -+ assert(recv != noreg, "required register"); -+ assert(method_temp == xmethod, "required register for loading method"); -+ -+ // Load the invoker, as MH -> MH.form -> LF.vmentry -+ __ verify_oop(recv); -+ __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset())), temp2); -+ __ verify_oop(method_temp); -+ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset())), temp2); -+ __ verify_oop(method_temp); -+ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset())), temp2); -+ __ verify_oop(method_temp); -+ __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())), noreg, noreg); -+ -+ if (VerifyMethodHandles && !for_compiler_entry) { -+ // make sure recv is already on stack -+ __ ld(temp2, Address(method_temp, Method::const_offset())); -+ __ load_sized_value(temp2, -+ Address(temp2, ConstMethod::size_of_parameters_offset()), -+ sizeof(u2), /*is_signed*/ false); -+ Label L; -+ __ ld(t0, __ argument_address(temp2, -1)); -+ __ beq(recv, t0, L); -+ __ ld(x10, __ argument_address(temp2, -1)); -+ __ ebreak(); -+ __ BIND(L); -+ } -+ -+ jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry); -+ BLOCK_COMMENT("} jump_to_lambda_form"); -+} -+ -+// Code generation -+address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm, -+ vmIntrinsics::ID iid) { -+ assert_cond(_masm != NULL); -+ const bool not_for_compiler_entry = false; // this is the interpreter entry -+ assert(is_signature_polymorphic(iid), "expected invoke iid"); -+ if (iid == vmIntrinsics::_invokeGeneric || -+ iid == vmIntrinsics::_compiledLambdaForm) { -+ // Perhaps surprisingly, the symbolic references visible to Java are not directly used. -+ // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod. -+ // They all allow an appendix argument. -+ __ ebreak(); // empty stubs make SG sick -+ return NULL; -+ } -+ -+ // No need in interpreter entry for linkToNative for now. -+ // Interpreter calls compiled entry through i2c. -+ if (iid == vmIntrinsics::_linkToNative) { -+ __ ebreak(); -+ return NULL; -+ } -+ -+ // x30: sender SP (must preserve; see prepare_to_jump_from_interpreted) -+ // xmethod: Method* -+ // x13: argument locator (parameter slot count, added to sp) -+ // x11: used as temp to hold mh or receiver -+ // x10, x29: garbage temps, blown away -+ Register argp = x13; // argument list ptr, live on error paths -+ Register mh = x11; // MH receiver; dies quickly and is recycled -+ -+ // here's where control starts out: -+ __ align(CodeEntryAlignment); -+ address entry_point = __ pc(); -+ -+ if (VerifyMethodHandles) { -+ assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); -+ -+ Label L; -+ BLOCK_COMMENT("verify_intrinsic_id {"); -+ __ lhu(t0, Address(xmethod, Method::intrinsic_id_offset_in_bytes())); -+ __ mv(t1, (int) iid); -+ __ beq(t0, t1, L); -+ if (iid == vmIntrinsics::_linkToVirtual || -+ iid == vmIntrinsics::_linkToSpecial) { -+ // could do this for all kinds, but would explode assembly code size -+ trace_method_handle(_masm, "bad Method*::intrinsic_id"); -+ } -+ __ ebreak(); -+ __ bind(L); -+ BLOCK_COMMENT("} verify_intrinsic_id"); -+ } -+ -+ // First task: Find out how big the argument list is. -+ Address x13_first_arg_addr; -+ int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid); -+ assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic"); -+ if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) { -+ __ ld(argp, Address(xmethod, Method::const_offset())); -+ __ load_sized_value(argp, -+ Address(argp, ConstMethod::size_of_parameters_offset()), -+ sizeof(u2), /*is_signed*/ false); -+ x13_first_arg_addr = __ argument_address(argp, -1); -+ } else { -+ DEBUG_ONLY(argp = noreg); -+ } -+ -+ if (!is_signature_polymorphic_static(iid)) { -+ __ ld(mh, x13_first_arg_addr); -+ DEBUG_ONLY(argp = noreg); -+ } -+ -+ // x13_first_arg_addr is live! -+ -+ trace_method_handle_interpreter_entry(_masm, iid); -+ if (iid == vmIntrinsics::_invokeBasic) { -+ generate_method_handle_dispatch(_masm, iid, mh, noreg, not_for_compiler_entry); -+ } else { -+ // Adjust argument list by popping the trailing MemberName argument. -+ Register recv = noreg; -+ if (MethodHandles::ref_kind_has_receiver(ref_kind)) { -+ // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack. -+ __ ld(recv = x12, x13_first_arg_addr); -+ } -+ DEBUG_ONLY(argp = noreg); -+ Register xmember = xmethod; // MemberName ptr; incoming method ptr is dead now -+ __ pop_reg(xmember); // extract last argument -+ generate_method_handle_dispatch(_masm, iid, recv, xmember, not_for_compiler_entry); -+ } -+ -+ return entry_point; -+} -+ -+ -+void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, -+ vmIntrinsics::ID iid, -+ Register receiver_reg, -+ Register member_reg, -+ bool for_compiler_entry) { -+ assert_cond(_masm != NULL); -+ assert(is_signature_polymorphic(iid), "expected invoke iid"); -+ // temps used in this code are not used in *either* compiled or interpreted calling sequences -+ Register temp1 = x7; -+ Register temp2 = x28; -+ Register temp3 = x29; // x30 is live by this point: it contains the sender SP -+ if (for_compiler_entry) { -+ assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment"); -+ assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); -+ assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); -+ assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); -+ } -+ -+ assert_different_registers(temp1, temp2, temp3, receiver_reg); -+ assert_different_registers(temp1, temp2, temp3, member_reg); -+ -+ if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) { -+ if (iid == vmIntrinsics::_linkToNative) { -+ assert(for_compiler_entry, "only compiler entry is supported"); -+ } -+ // indirect through MH.form.vmentry.vmtarget -+ jump_to_lambda_form(_masm, receiver_reg, xmethod, temp1, for_compiler_entry); -+ } else { -+ // The method is a member invoker used by direct method handles. -+ if (VerifyMethodHandles) { -+ // make sure the trailing argument really is a MemberName (caller responsibility) -+ verify_klass(_masm, member_reg, VM_CLASS_ID(java_lang_invoke_MemberName), -+ "MemberName required for invokeVirtual etc."); -+ } -+ -+ Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset())); -+ Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset())); -+ Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset())); -+ Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())); -+ -+ Register temp1_recv_klass = temp1; -+ if (iid != vmIntrinsics::_linkToStatic) { -+ __ verify_oop(receiver_reg); -+ if (iid == vmIntrinsics::_linkToSpecial) { -+ // Don't actually load the klass; just null-check the receiver. -+ __ null_check(receiver_reg); -+ } else { -+ // load receiver klass itself -+ __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes()); -+ __ load_klass(temp1_recv_klass, receiver_reg); -+ __ verify_klass_ptr(temp1_recv_klass); -+ } -+ BLOCK_COMMENT("check_receiver {"); -+ // The receiver for the MemberName must be in receiver_reg. -+ // Check the receiver against the MemberName.clazz -+ if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) { -+ // Did not load it above... -+ __ load_klass(temp1_recv_klass, receiver_reg); -+ __ verify_klass_ptr(temp1_recv_klass); -+ } -+ if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) { -+ Label L_ok; -+ Register temp2_defc = temp2; -+ __ load_heap_oop(temp2_defc, member_clazz, temp3); -+ load_klass_from_Class(_masm, temp2_defc); -+ __ verify_klass_ptr(temp2_defc); -+ __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok); -+ // If we get here, the type check failed! -+ __ ebreak(); -+ __ bind(L_ok); -+ } -+ BLOCK_COMMENT("} check_receiver"); -+ } -+ if (iid == vmIntrinsics::_linkToSpecial || -+ iid == vmIntrinsics::_linkToStatic) { -+ DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass -+ } -+ -+ // Live registers at this point: -+ // member_reg - MemberName that was the trailing argument -+ // temp1_recv_klass - klass of stacked receiver, if needed -+ // x30 - interpreter linkage (if interpreted) -+ // x11 ... x10 - compiler arguments (if compiled) -+ -+ Label L_incompatible_class_change_error; -+ switch (iid) { -+ case vmIntrinsics::_linkToSpecial: -+ if (VerifyMethodHandles) { -+ verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3); -+ } -+ __ load_heap_oop(xmethod, member_vmtarget); -+ __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg); -+ break; -+ -+ case vmIntrinsics::_linkToStatic: -+ if (VerifyMethodHandles) { -+ verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3); -+ } -+ __ load_heap_oop(xmethod, member_vmtarget); -+ __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg); -+ break; -+ -+ case vmIntrinsics::_linkToVirtual: -+ { -+ // same as TemplateTable::invokevirtual, -+ // minus the CP setup and profiling: -+ -+ if (VerifyMethodHandles) { -+ verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3); -+ } -+ -+ // pick out the vtable index from the MemberName, and then we can discard it: -+ Register temp2_index = temp2; -+ __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg); -+ -+ if (VerifyMethodHandles) { -+ Label L_index_ok; -+ __ bgez(temp2_index, L_index_ok); -+ __ ebreak(); -+ __ BIND(L_index_ok); -+ } -+ -+ // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget -+ // at this point. And VerifyMethodHandles has already checked clazz, if needed. -+ -+ // get target Method* & entry point -+ __ lookup_virtual_method(temp1_recv_klass, temp2_index, xmethod); -+ break; -+ } -+ -+ case vmIntrinsics::_linkToInterface: -+ { -+ // same as TemplateTable::invokeinterface -+ // (minus the CP setup and profiling, with different argument motion) -+ if (VerifyMethodHandles) { -+ verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3); -+ } -+ -+ Register temp3_intf = temp3; -+ __ load_heap_oop(temp3_intf, member_clazz); -+ load_klass_from_Class(_masm, temp3_intf); -+ __ verify_klass_ptr(temp3_intf); -+ -+ Register rindex = xmethod; -+ __ access_load_at(T_ADDRESS, IN_HEAP, rindex, member_vmindex, noreg, noreg); -+ if (VerifyMethodHandles) { -+ Label L; -+ __ bgez(rindex, L); -+ __ ebreak(); -+ __ bind(L); -+ } ++REGISTER_DEFINITION(FloatRegister, f0); ++REGISTER_DEFINITION(FloatRegister, f1); ++REGISTER_DEFINITION(FloatRegister, f2); ++REGISTER_DEFINITION(FloatRegister, f3); ++REGISTER_DEFINITION(FloatRegister, f4); ++REGISTER_DEFINITION(FloatRegister, f5); ++REGISTER_DEFINITION(FloatRegister, f6); ++REGISTER_DEFINITION(FloatRegister, f7); ++REGISTER_DEFINITION(FloatRegister, f8); ++REGISTER_DEFINITION(FloatRegister, f9); ++REGISTER_DEFINITION(FloatRegister, f10); ++REGISTER_DEFINITION(FloatRegister, f11); ++REGISTER_DEFINITION(FloatRegister, f12); ++REGISTER_DEFINITION(FloatRegister, f13); ++REGISTER_DEFINITION(FloatRegister, f14); ++REGISTER_DEFINITION(FloatRegister, f15); ++REGISTER_DEFINITION(FloatRegister, f16); ++REGISTER_DEFINITION(FloatRegister, f17); ++REGISTER_DEFINITION(FloatRegister, f18); ++REGISTER_DEFINITION(FloatRegister, f19); ++REGISTER_DEFINITION(FloatRegister, f20); ++REGISTER_DEFINITION(FloatRegister, f21); ++REGISTER_DEFINITION(FloatRegister, f22); ++REGISTER_DEFINITION(FloatRegister, f23); ++REGISTER_DEFINITION(FloatRegister, f24); ++REGISTER_DEFINITION(FloatRegister, f25); ++REGISTER_DEFINITION(FloatRegister, f26); ++REGISTER_DEFINITION(FloatRegister, f27); ++REGISTER_DEFINITION(FloatRegister, f28); ++REGISTER_DEFINITION(FloatRegister, f29); ++REGISTER_DEFINITION(FloatRegister, f30); ++REGISTER_DEFINITION(FloatRegister, f31); + -+ // given intf, index, and recv klass, dispatch to the implementation method -+ __ lookup_interface_method(temp1_recv_klass, temp3_intf, -+ // note: next two args must be the same: -+ rindex, xmethod, -+ temp2, -+ L_incompatible_class_change_error); -+ break; -+ } ++REGISTER_DEFINITION(VectorRegister, vnoreg); + -+ default: -+ fatal("unexpected intrinsic %d: %s", vmIntrinsics::as_int(iid), vmIntrinsics::name_at(iid)); -+ break; -+ } ++REGISTER_DEFINITION(VectorRegister, v0); ++REGISTER_DEFINITION(VectorRegister, v1); ++REGISTER_DEFINITION(VectorRegister, v2); ++REGISTER_DEFINITION(VectorRegister, v3); ++REGISTER_DEFINITION(VectorRegister, v4); ++REGISTER_DEFINITION(VectorRegister, v5); ++REGISTER_DEFINITION(VectorRegister, v6); ++REGISTER_DEFINITION(VectorRegister, v7); ++REGISTER_DEFINITION(VectorRegister, v8); ++REGISTER_DEFINITION(VectorRegister, v9); ++REGISTER_DEFINITION(VectorRegister, v10); ++REGISTER_DEFINITION(VectorRegister, v11); ++REGISTER_DEFINITION(VectorRegister, v12); ++REGISTER_DEFINITION(VectorRegister, v13); ++REGISTER_DEFINITION(VectorRegister, v14); ++REGISTER_DEFINITION(VectorRegister, v15); ++REGISTER_DEFINITION(VectorRegister, v16); ++REGISTER_DEFINITION(VectorRegister, v17); ++REGISTER_DEFINITION(VectorRegister, v18); ++REGISTER_DEFINITION(VectorRegister, v19); ++REGISTER_DEFINITION(VectorRegister, v20); ++REGISTER_DEFINITION(VectorRegister, v21); ++REGISTER_DEFINITION(VectorRegister, v22); ++REGISTER_DEFINITION(VectorRegister, v23); ++REGISTER_DEFINITION(VectorRegister, v24); ++REGISTER_DEFINITION(VectorRegister, v25); ++REGISTER_DEFINITION(VectorRegister, v26); ++REGISTER_DEFINITION(VectorRegister, v27); ++REGISTER_DEFINITION(VectorRegister, v28); ++REGISTER_DEFINITION(VectorRegister, v29); ++REGISTER_DEFINITION(VectorRegister, v30); ++REGISTER_DEFINITION(VectorRegister, v31); + -+ // live at this point: xmethod, x30 (if interpreted) ++REGISTER_DEFINITION(Register, c_rarg0); ++REGISTER_DEFINITION(Register, c_rarg1); ++REGISTER_DEFINITION(Register, c_rarg2); ++REGISTER_DEFINITION(Register, c_rarg3); ++REGISTER_DEFINITION(Register, c_rarg4); ++REGISTER_DEFINITION(Register, c_rarg5); ++REGISTER_DEFINITION(Register, c_rarg6); ++REGISTER_DEFINITION(Register, c_rarg7); + -+ // After figuring out which concrete method to call, jump into it. -+ // Note that this works in the interpreter with no data motion. -+ // But the compiled version will require that r2_recv be shifted out. -+ __ verify_method_ptr(xmethod); -+ jump_from_method_handle(_masm, xmethod, temp1, for_compiler_entry); -+ if (iid == vmIntrinsics::_linkToInterface) { -+ __ bind(L_incompatible_class_change_error); -+ __ far_jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry())); -+ } -+ } ++REGISTER_DEFINITION(FloatRegister, c_farg0); ++REGISTER_DEFINITION(FloatRegister, c_farg1); ++REGISTER_DEFINITION(FloatRegister, c_farg2); ++REGISTER_DEFINITION(FloatRegister, c_farg3); ++REGISTER_DEFINITION(FloatRegister, c_farg4); ++REGISTER_DEFINITION(FloatRegister, c_farg5); ++REGISTER_DEFINITION(FloatRegister, c_farg6); ++REGISTER_DEFINITION(FloatRegister, c_farg7); + -+} ++REGISTER_DEFINITION(Register, j_rarg0); ++REGISTER_DEFINITION(Register, j_rarg1); ++REGISTER_DEFINITION(Register, j_rarg2); ++REGISTER_DEFINITION(Register, j_rarg3); ++REGISTER_DEFINITION(Register, j_rarg4); ++REGISTER_DEFINITION(Register, j_rarg5); ++REGISTER_DEFINITION(Register, j_rarg6); ++REGISTER_DEFINITION(Register, j_rarg7); + -+#ifndef PRODUCT -+void trace_method_handle_stub(const char* adaptername, -+ oopDesc* mh, -+ intptr_t* saved_regs, -+ intptr_t* entry_sp) { } ++REGISTER_DEFINITION(FloatRegister, j_farg0); ++REGISTER_DEFINITION(FloatRegister, j_farg1); ++REGISTER_DEFINITION(FloatRegister, j_farg2); ++REGISTER_DEFINITION(FloatRegister, j_farg3); ++REGISTER_DEFINITION(FloatRegister, j_farg4); ++REGISTER_DEFINITION(FloatRegister, j_farg5); ++REGISTER_DEFINITION(FloatRegister, j_farg6); ++REGISTER_DEFINITION(FloatRegister, j_farg7); + -+// The stub wraps the arguments in a struct on the stack to avoid -+// dealing with the different calling conventions for passing 6 -+// arguments. -+struct MethodHandleStubArguments { -+ const char* adaptername; -+ oopDesc* mh; -+ intptr_t* saved_regs; -+ intptr_t* entry_sp; -+}; -+void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) { } ++REGISTER_DEFINITION(Register, zr); ++REGISTER_DEFINITION(Register, gp); ++REGISTER_DEFINITION(Register, tp); ++REGISTER_DEFINITION(Register, xmethod); ++REGISTER_DEFINITION(Register, ra); ++REGISTER_DEFINITION(Register, sp); ++REGISTER_DEFINITION(Register, fp); ++REGISTER_DEFINITION(Register, xheapbase); ++REGISTER_DEFINITION(Register, xcpool); ++REGISTER_DEFINITION(Register, xmonitors); ++REGISTER_DEFINITION(Register, xlocals); ++REGISTER_DEFINITION(Register, xthread); ++REGISTER_DEFINITION(Register, xbcp); ++REGISTER_DEFINITION(Register, xdispatch); ++REGISTER_DEFINITION(Register, esp); + -+void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { } -+#endif //PRODUCT -diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp ++REGISTER_DEFINITION(Register, t0); ++REGISTER_DEFINITION(Register, t1); ++REGISTER_DEFINITION(Register, t2); +diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp new file mode 100644 -index 00000000000..f73aba29d67 +index 0000000000..ef60cb3bb0 --- /dev/null -+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp -@@ -0,0 +1,57 @@ ++++ b/src/hotspot/cpu/riscv/register_riscv.cpp +@@ -0,0 +1,64 @@ +/* -+ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -28113,47 +27042,53 @@ index 00000000000..f73aba29d67 + * + */ + -+// Platform-specific definitions for method handles. -+// These definitions are inlined into class MethodHandles. ++#include "precompiled.hpp" ++#include "register_riscv.hpp" + -+// Adapters -+enum /* platform_dependent_constants */ { -+ adapter_code_size = 32000 DEBUG_ONLY(+ 120000) -+}; ++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers * ++ RegisterImpl::max_slots_per_register; + -+public: ++const int ConcreteRegisterImpl::max_fpr = ++ ConcreteRegisterImpl::max_gpr + ++ FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register; + -+ static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg); ++const char* RegisterImpl::name() const { ++ static const char *const names[number_of_registers] = { ++ "zr", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "fp", "x9", ++ "c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7", ++ "x18", "x19", "esp", "xdispatch", "xbcp", "xthread", "xlocals", ++ "xmonitors", "xcpool", "xheapbase", "x28", "x29", "x30", "xmethod" ++ }; ++ return is_valid() ? names[encoding()] : "noreg"; ++} + -+ static void verify_klass(MacroAssembler* _masm, -+ Register obj, vmClassID klass_id, -+ const char* error_message = "wrong klass") NOT_DEBUG_RETURN; ++const char* FloatRegisterImpl::name() const { ++ static const char *const names[number_of_registers] = { ++ "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", ++ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", ++ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", ++ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31" ++ }; ++ return is_valid() ? names[encoding()] : "noreg"; ++} + -+ static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) { -+ verify_klass(_masm, mh_reg, VM_CLASS_ID(java_lang_invoke_MethodHandle), -+ "reference is a MH"); -+ } -+ -+ static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN; -+ -+ // Similar to InterpreterMacroAssembler::jump_from_interpreted. -+ // Takes care of special dispatch from single stepping too. -+ static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, -+ bool for_compiler_entry); -+ -+ static void jump_to_lambda_form(MacroAssembler* _masm, -+ Register recv, Register method_temp, -+ Register temp2, -+ bool for_compiler_entry); -diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp ++const char* VectorRegisterImpl::name() const { ++ static const char *const names[number_of_registers] = { ++ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", ++ "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", ++ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", ++ "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" ++ }; ++ return is_valid() ? names[encoding()] : "noreg"; ++} +diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp new file mode 100644 -index 00000000000..0a05c577860 +index 0000000000..f64a06eb89 --- /dev/null -+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp -@@ -0,0 +1,429 @@ ++++ b/src/hotspot/cpu/riscv/register_riscv.hpp +@@ -0,0 +1,381 @@ +/* -+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -28177,419 +27112,489 @@ index 00000000000..0a05c577860 + * + */ + -+#include "precompiled.hpp" -+#include "asm/macroAssembler.hpp" -+#include "code/compiledIC.hpp" -+#include "memory/resourceArea.hpp" -+#include "nativeInst_riscv.hpp" -+#include "oops/oop.inline.hpp" -+#include "runtime/handles.hpp" -+#include "runtime/orderAccess.hpp" -+#include "runtime/sharedRuntime.hpp" -+#include "runtime/stubRoutines.hpp" -+#include "utilities/ostream.hpp" -+#ifdef COMPILER1 -+#include "c1/c1_Runtime1.hpp" -+#endif ++#ifndef CPU_RISCV_REGISTER_RISCV_HPP ++#define CPU_RISCV_REGISTER_RISCV_HPP + -+Register NativeInstruction::extract_rs1(address instr) { -+ assert_cond(instr != NULL); -+ return as_Register(Assembler::extract(((unsigned*)instr)[0], 19, 15)); -+} ++#include "asm/register.hpp" + -+Register NativeInstruction::extract_rs2(address instr) { -+ assert_cond(instr != NULL); -+ return as_Register(Assembler::extract(((unsigned*)instr)[0], 24, 20)); -+} ++#define CSR_FFLAGS 0x001 // Floating-Point Accrued Exceptions. ++#define CSR_FRM 0x002 // Floating-Point Dynamic Rounding Mode. ++#define CSR_FCSR 0x003 // Floating-Point Control and Status Register (frm + fflags). ++#define CSR_VSTART 0x008 // Vector start position ++#define CSR_VXSAT 0x009 // Fixed-Point Saturate Flag ++#define CSR_VXRM 0x00A // Fixed-Point Rounding Mode ++#define CSR_VCSR 0x00F // Vector control and status register ++#define CSR_VL 0xC20 // Vector length ++#define CSR_VTYPE 0xC21 // Vector data type register ++#define CSR_VLENB 0xC22 // VLEN/8 (vector register length in bytes) ++#define CSR_CYCLE 0xc00 // Cycle counter for RDCYCLE instruction. ++#define CSR_TIME 0xc01 // Timer for RDTIME instruction. ++#define CSR_INSTERT 0xc02 // Instructions-retired counter for RDINSTRET instruction. + -+Register NativeInstruction::extract_rd(address instr) { -+ assert_cond(instr != NULL); -+ return as_Register(Assembler::extract(((unsigned*)instr)[0], 11, 7)); -+} ++class VMRegImpl; ++typedef VMRegImpl* VMReg; + -+uint32_t NativeInstruction::extract_opcode(address instr) { -+ assert_cond(instr != NULL); -+ return Assembler::extract(((unsigned*)instr)[0], 6, 0); -+} ++// Use Register as shortcut ++class RegisterImpl; ++typedef RegisterImpl* Register; + -+uint32_t NativeInstruction::extract_funct3(address instr) { -+ assert_cond(instr != NULL); -+ return Assembler::extract(((unsigned*)instr)[0], 14, 12); ++inline Register as_Register(int encoding) { ++ return (Register)(intptr_t) encoding; +} + -+bool NativeInstruction::is_pc_relative_at(address instr) { -+ // auipc + jalr -+ // auipc + addi -+ // auipc + load -+ // auipc + fload_load -+ return (is_auipc_at(instr)) && -+ (is_addi_at(instr + instruction_size) || -+ is_jalr_at(instr + instruction_size) || -+ is_load_at(instr + instruction_size) || -+ is_float_load_at(instr + instruction_size)) && -+ check_pc_relative_data_dependency(instr); -+} ++class RegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++ number_of_registers = 32, ++ max_slots_per_register = 2, + -+// ie:ld(Rd, Label) -+bool NativeInstruction::is_load_pc_relative_at(address instr) { -+ return is_auipc_at(instr) && // auipc -+ is_ld_at(instr + instruction_size) && // ld -+ check_load_pc_relative_data_dependency(instr); -+} ++ // integer registers x8 - x15 and floating-point registers f8 - f15 are allocatable ++ // for compressed instructions. See Table 17.2 in spec. ++ compressed_register_base = 8, ++ compressed_register_top = 15, ++ }; + -+bool NativeInstruction::is_movptr_at(address instr) { -+ return is_lui_at(instr) && // Lui -+ is_addi_at(instr + instruction_size) && // Addi -+ is_slli_shift_at(instr + instruction_size * 2, 11) && // Slli Rd, Rs, 11 -+ is_addi_at(instr + instruction_size * 3) && // Addi -+ is_slli_shift_at(instr + instruction_size * 4, 5) && // Slli Rd, Rs, 5 -+ (is_addi_at(instr + instruction_size * 5) || -+ is_jalr_at(instr + instruction_size * 5) || -+ is_load_at(instr + instruction_size * 5)) && // Addi/Jalr/Load -+ check_movptr_data_dependency(instr); -+} ++ // derived registers, offsets, and addresses ++ const Register successor() const { return as_Register(encoding() + 1); } + -+bool NativeInstruction::is_li32_at(address instr) { -+ return is_lui_at(instr) && // lui -+ is_addiw_at(instr + instruction_size) && // addiw -+ check_li32_data_dependency(instr); -+} ++ // construction ++ inline friend Register as_Register(int encoding); + -+bool NativeInstruction::is_li64_at(address instr) { -+ return is_lui_at(instr) && // lui -+ is_addi_at(instr + instruction_size) && // addi -+ is_slli_shift_at(instr + instruction_size * 2, 12) && // Slli Rd, Rs, 12 -+ is_addi_at(instr + instruction_size * 3) && // addi -+ is_slli_shift_at(instr + instruction_size * 4, 12) && // Slli Rd, Rs, 12 -+ is_addi_at(instr + instruction_size * 5) && // addi -+ is_slli_shift_at(instr + instruction_size * 6, 8) && // Slli Rd, Rs, 8 -+ is_addi_at(instr + instruction_size * 7) && // addi -+ check_li64_data_dependency(instr); -+} ++ VMReg as_VMReg() const; + -+void NativeCall::verify() { -+ assert(NativeCall::is_call_at((address)this), "unexpected code at call site"); -+} ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); } ++ int encoding_nocheck() const { return (intptr_t)this; } ++ bool is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; } ++ const char* name() const; + -+address NativeCall::destination() const { -+ address addr = (address)this; -+ assert(NativeInstruction::is_jal_at(instruction_address()), "inst must be jal."); -+ address destination = MacroAssembler::target_addr_for_insn(instruction_address()); ++ // for rvc ++ int compressed_encoding() const { ++ assert(is_compressed_valid(), "invalid compressed register"); ++ return encoding() - compressed_register_base; ++ } + -+ // Do we use a trampoline stub for this call? -+ CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // Else we get assertion if nmethod is zombie. -+ assert(cb && cb->is_nmethod(), "sanity"); -+ nmethod *nm = (nmethod *)cb; -+ if (nm != NULL && nm->stub_contains(destination) && is_NativeCallTrampolineStub_at(destination)) { -+ // Yes we do, so get the destination from the trampoline stub. -+ const address trampoline_stub_addr = destination; -+ destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination(); ++ int compressed_encoding_nocheck() const { ++ return encoding_nocheck() - compressed_register_base; + } + -+ return destination; -+} ++ bool is_compressed_valid() const { ++ return encoding_nocheck() >= compressed_register_base && ++ encoding_nocheck() <= compressed_register_top; ++ } + -+// Similar to replace_mt_safe, but just changes the destination. The -+// important thing is that free-running threads are able to execute this -+// call instruction at all times. -+// -+// Used in the runtime linkage of calls; see class CompiledIC. -+// -+// Add parameter assert_lock to switch off assertion -+// during code generation, where no patching lock is needed. -+void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) { -+ assert(!assert_lock || -+ (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()) || -+ CompiledICLocker::is_safe(addr_at(0)), -+ "concurrent code patching"); ++ // Return the bit which represents this register. This is intended ++ // to be ORed into a bitmask: for usage see class RegSet below. ++ uint64_t bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; } ++}; + -+ ResourceMark rm; -+ address addr_call = addr_at(0); -+ assert(NativeCall::is_call_at(addr_call), "unexpected code at call site"); ++// The integer registers of the RISCV architecture + -+ // Patch the constant in the call's trampoline stub. -+ address trampoline_stub_addr = get_trampoline(); -+ if (trampoline_stub_addr != NULL) { -+ assert (!is_NativeCallTrampolineStub_at(dest), "chained trampolines"); -+ nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest); -+ } ++CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); + -+ // Patch the call. -+ if (Assembler::reachable_from_branch_at(addr_call, dest)) { -+ set_destination(dest); -+ } else { -+ assert (trampoline_stub_addr != NULL, "we need a trampoline"); -+ set_destination(trampoline_stub_addr); -+ } ++CONSTANT_REGISTER_DECLARATION(Register, x0, (0)); ++CONSTANT_REGISTER_DECLARATION(Register, x1, (1)); ++CONSTANT_REGISTER_DECLARATION(Register, x2, (2)); ++CONSTANT_REGISTER_DECLARATION(Register, x3, (3)); ++CONSTANT_REGISTER_DECLARATION(Register, x4, (4)); ++CONSTANT_REGISTER_DECLARATION(Register, x5, (5)); ++CONSTANT_REGISTER_DECLARATION(Register, x6, (6)); ++CONSTANT_REGISTER_DECLARATION(Register, x7, (7)); ++CONSTANT_REGISTER_DECLARATION(Register, x8, (8)); ++CONSTANT_REGISTER_DECLARATION(Register, x9, (9)); ++CONSTANT_REGISTER_DECLARATION(Register, x10, (10)); ++CONSTANT_REGISTER_DECLARATION(Register, x11, (11)); ++CONSTANT_REGISTER_DECLARATION(Register, x12, (12)); ++CONSTANT_REGISTER_DECLARATION(Register, x13, (13)); ++CONSTANT_REGISTER_DECLARATION(Register, x14, (14)); ++CONSTANT_REGISTER_DECLARATION(Register, x15, (15)); ++CONSTANT_REGISTER_DECLARATION(Register, x16, (16)); ++CONSTANT_REGISTER_DECLARATION(Register, x17, (17)); ++CONSTANT_REGISTER_DECLARATION(Register, x18, (18)); ++CONSTANT_REGISTER_DECLARATION(Register, x19, (19)); ++CONSTANT_REGISTER_DECLARATION(Register, x20, (20)); ++CONSTANT_REGISTER_DECLARATION(Register, x21, (21)); ++CONSTANT_REGISTER_DECLARATION(Register, x22, (22)); ++CONSTANT_REGISTER_DECLARATION(Register, x23, (23)); ++CONSTANT_REGISTER_DECLARATION(Register, x24, (24)); ++CONSTANT_REGISTER_DECLARATION(Register, x25, (25)); ++CONSTANT_REGISTER_DECLARATION(Register, x26, (26)); ++CONSTANT_REGISTER_DECLARATION(Register, x27, (27)); ++CONSTANT_REGISTER_DECLARATION(Register, x28, (28)); ++CONSTANT_REGISTER_DECLARATION(Register, x29, (29)); ++CONSTANT_REGISTER_DECLARATION(Register, x30, (30)); ++CONSTANT_REGISTER_DECLARATION(Register, x31, (31)); + -+ ICache::invalidate_range(addr_call, instruction_size); -+} ++// Use FloatRegister as shortcut ++class FloatRegisterImpl; ++typedef FloatRegisterImpl* FloatRegister; + -+address NativeCall::get_trampoline() { -+ address call_addr = addr_at(0); ++inline FloatRegister as_FloatRegister(int encoding) { ++ return (FloatRegister)(intptr_t) encoding; ++} + -+ CodeBlob *code = CodeCache::find_blob(call_addr); -+ assert(code != NULL, "Could not find the containing code blob"); ++// The implementation of floating point registers for the architecture ++class FloatRegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++ number_of_registers = 32, ++ max_slots_per_register = 2, + -+ address jal_destination = MacroAssembler::pd_call_destination(call_addr); -+ if (code != NULL && code->contains(jal_destination) && is_NativeCallTrampolineStub_at(jal_destination)) { -+ return jal_destination; -+ } ++ // float registers in the range of [f8~f15] correspond to RVC. Please see Table 16.2 in spec. ++ compressed_register_base = 8, ++ compressed_register_top = 15, ++ }; + -+ if (code != NULL && code->is_nmethod()) { -+ return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code); -+ } ++ // construction ++ inline friend FloatRegister as_FloatRegister(int encoding); + -+ return NULL; -+} ++ VMReg as_VMReg() const; + -+// Inserts a native call instruction at a given pc -+void NativeCall::insert(address code_pos, address entry) { Unimplemented(); } ++ // derived registers, offsets, and addresses ++ FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } + -+//------------------------------------------------------------------- ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); } ++ int encoding_nocheck() const { return (intptr_t)this; } ++ int is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; } ++ const char* name() const; + -+void NativeMovConstReg::verify() { -+ if (!(nativeInstruction_at(instruction_address())->is_movptr() || -+ is_auipc_at(instruction_address()))) { -+ fatal("should be MOVPTR or AUIPC"); ++ // for rvc ++ int compressed_encoding() const { ++ assert(is_compressed_valid(), "invalid compressed register"); ++ return encoding() - compressed_register_base; + } -+} + -+intptr_t NativeMovConstReg::data() const { -+ address addr = MacroAssembler::target_addr_for_insn(instruction_address()); -+ if (maybe_cpool_ref(instruction_address())) { -+ return *(intptr_t*)addr; -+ } else { -+ return (intptr_t)addr; ++ int compressed_encoding_nocheck() const { ++ return encoding_nocheck() - compressed_register_base; + } -+} + -+void NativeMovConstReg::set_data(intptr_t x) { -+ if (maybe_cpool_ref(instruction_address())) { -+ address addr = MacroAssembler::target_addr_for_insn(instruction_address()); -+ *(intptr_t*)addr = x; -+ } else { -+ // Store x into the instruction stream. -+ MacroAssembler::pd_patch_instruction_size(instruction_address(), (address)x); -+ ICache::invalidate_range(instruction_address(), movptr_instruction_size); ++ bool is_compressed_valid() const { ++ return encoding_nocheck() >= compressed_register_base && ++ encoding_nocheck() <= compressed_register_top; + } ++}; + -+ // Find and replace the oop/metadata corresponding to this -+ // instruction in oops section. -+ CodeBlob* cb = CodeCache::find_blob(instruction_address()); -+ nmethod* nm = cb->as_nmethod_or_null(); -+ if (nm != NULL) { -+ RelocIterator iter(nm, instruction_address(), next_instruction_address()); -+ while (iter.next()) { -+ if (iter.type() == relocInfo::oop_type) { -+ oop* oop_addr = iter.oop_reloc()->oop_addr(); -+ *oop_addr = cast_to_oop(x); -+ break; -+ } else if (iter.type() == relocInfo::metadata_type) { -+ Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr(); -+ *metadata_addr = (Metadata*)x; -+ break; -+ } -+ } -+ } -+} ++// The float registers of the RISCV architecture + -+void NativeMovConstReg::print() { -+ tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT, -+ p2i(instruction_address()), data()); -+} ++CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1)); + -+//------------------------------------------------------------------- ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f0 , ( 0)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f1 , ( 1)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f2 , ( 2)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f3 , ( 3)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f4 , ( 4)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f5 , ( 5)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f6 , ( 6)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f7 , ( 7)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f8 , ( 8)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f9 , ( 9)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f10 , (10)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f11 , (11)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f12 , (12)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f13 , (13)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f14 , (14)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f15 , (15)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f16 , (16)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f17 , (17)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f18 , (18)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f19 , (19)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f20 , (20)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f21 , (21)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f22 , (22)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f23 , (23)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f24 , (24)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f25 , (25)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f26 , (26)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f27 , (27)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f28 , (28)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f29 , (29)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f30 , (30)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f31 , (31)); + -+int NativeMovRegMem::offset() const { -+ Unimplemented(); -+ return 0; ++// Use VectorRegister as shortcut ++class VectorRegisterImpl; ++typedef VectorRegisterImpl* VectorRegister; ++ ++inline VectorRegister as_VectorRegister(int encoding) { ++ return (VectorRegister)(intptr_t) encoding; +} + -+void NativeMovRegMem::set_offset(int x) { Unimplemented(); } ++// The implementation of vector registers for RVV ++class VectorRegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++ number_of_registers = 32, ++ max_slots_per_register = 4 ++ }; + -+void NativeMovRegMem::verify() { -+ Unimplemented(); -+} ++ // construction ++ inline friend VectorRegister as_VectorRegister(int encoding); + -+//-------------------------------------------------------------------------------- ++ VMReg as_VMReg() const; + -+void NativeJump::verify() { } ++ // derived registers, offsets, and addresses ++ VectorRegister successor() const { return as_VectorRegister(encoding() + 1); } + ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); } ++ int encoding_nocheck() const { return (intptr_t)this; } ++ bool is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; } ++ const char* name() const; + -+void NativeJump::check_verified_entry_alignment(address entry, address verified_entry) { -+} ++}; + ++// The vector registers of RVV ++CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg , (-1)); + -+address NativeJump::jump_destination() const { -+ address dest = MacroAssembler::target_addr_for_insn(instruction_address()); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v0 , ( 0)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v1 , ( 1)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v2 , ( 2)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v3 , ( 3)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v4 , ( 4)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v5 , ( 5)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v6 , ( 6)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v7 , ( 7)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v8 , ( 8)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v9 , ( 9)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v10 , (10)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v11 , (11)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v12 , (12)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v13 , (13)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v14 , (14)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v15 , (15)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v16 , (16)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v17 , (17)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v18 , (18)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v19 , (19)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v20 , (20)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v21 , (21)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v22 , (22)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v23 , (23)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v24 , (24)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v25 , (25)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v26 , (26)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v27 , (27)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v28 , (28)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v29 , (29)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v30 , (30)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v31 , (31)); + -+ // We use jump to self as the unresolved address which the inline -+ // cache code (and relocs) know about -+ // As a special case we also use sequence movptr_with_offset(r,0), jalr(r,0) -+ // i.e. jump to 0 when we need leave space for a wide immediate -+ // load + -+ // return -1 if jump to self or to 0 -+ if ((dest == (address) this) || dest == 0) { -+ dest = (address) -1; -+ } ++// Need to know the total number of registers of all sorts for SharedInfo. ++// Define a class that exports it. ++class ConcreteRegisterImpl : public AbstractRegisterImpl { ++ public: ++ enum { ++ // A big enough number for C2: all the registers plus flags ++ // This number must be large enough to cover REG_COUNT (defined by c2) registers. ++ // There is no requirement that any ordering here matches any ordering c2 gives ++ // it's optoregs. + -+ return dest; ++ number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers + ++ FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers) ++ }; ++ ++ // added to make it compile ++ static const int max_gpr; ++ static const int max_fpr; +}; + -+void NativeJump::set_jump_destination(address dest) { -+ // We use jump to self as the unresolved address which the inline -+ // cache code (and relocs) know about -+ if (dest == (address) -1) -+ dest = instruction_address(); ++// A set of registers ++class RegSet { ++ uint32_t _bitset; + -+ MacroAssembler::pd_patch_instruction(instruction_address(), dest); -+ ICache::invalidate_range(instruction_address(), instruction_size); -+} ++ RegSet(uint32_t bitset) : _bitset(bitset) { } + -+//------------------------------------------------------------------- ++public: + -+address NativeGeneralJump::jump_destination() const { -+ NativeMovConstReg* move = nativeMovConstReg_at(instruction_address()); -+ address dest = (address) move->data(); ++ RegSet() : _bitset(0) { } + -+ // We use jump to self as the unresolved address which the inline -+ // cache code (and relocs) know about -+ // As a special case we also use jump to 0 when first generating -+ // a general jump ++ RegSet(Register r1) : _bitset(r1->bit()) { } + -+ // return -1 if jump to self or to 0 -+ if ((dest == (address) this) || dest == 0) { -+ dest = (address) -1; ++ RegSet operator+(const RegSet aSet) const { ++ RegSet result(_bitset | aSet._bitset); ++ return result; + } + -+ return dest; -+} ++ RegSet operator-(const RegSet aSet) const { ++ RegSet result(_bitset & ~aSet._bitset); ++ return result; ++ } + -+//------------------------------------------------------------------- ++ RegSet &operator+=(const RegSet aSet) { ++ *this = *this + aSet; ++ return *this; ++ } + -+bool NativeInstruction::is_safepoint_poll() { -+ return is_lwu_to_zr(address(this)); -+} ++ RegSet &operator-=(const RegSet aSet) { ++ *this = *this - aSet; ++ return *this; ++ } + -+bool NativeInstruction::is_lwu_to_zr(address instr) { -+ assert_cond(instr != NULL); -+ return (extract_opcode(instr) == 0b0000011 && -+ extract_funct3(instr) == 0b110 && -+ extract_rd(instr) == zr); // zr -+} -+ -+// A 16-bit instruction with all bits ones is permanently reserved as an illegal instruction. -+bool NativeInstruction::is_sigill_zombie_not_entrant() { -+ // jvmci -+ return uint_at(0) == 0xffffffff; -+} -+ -+void NativeIllegalInstruction::insert(address code_pos) { -+ assert_cond(code_pos != NULL); -+ *(juint*)code_pos = 0xffffffff; // all bits ones is permanently reserved as an illegal instruction -+} ++ static RegSet of(Register r1) { ++ return RegSet(r1); ++ } + -+bool NativeInstruction::is_stop() { -+ return uint_at(0) == 0xffffffff; // an illegal instruction -+} ++ static RegSet of(Register r1, Register r2) { ++ return of(r1) + r2; ++ } + -+//------------------------------------------------------------------- ++ static RegSet of(Register r1, Register r2, Register r3) { ++ return of(r1, r2) + r3; ++ } + -+// MT-safe inserting of a jump over a jump or a nop (used by -+// nmethod::make_not_entrant_or_zombie) ++ static RegSet of(Register r1, Register r2, Register r3, Register r4) { ++ return of(r1, r2, r3) + r4; ++ } + -+void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { ++ static RegSet range(Register start, Register end) { ++ uint32_t bits = ~0; ++ bits <<= start->encoding(); ++ bits <<= 31 - end->encoding(); ++ bits >>= 31 - end->encoding(); + -+ assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch"); ++ return RegSet(bits); ++ } + -+ assert(nativeInstruction_at(verified_entry)->is_jump_or_nop() || -+ nativeInstruction_at(verified_entry)->is_sigill_zombie_not_entrant(), -+ "riscv cannot replace non-jump with jump"); ++ uint32_t bits() const { return _bitset; } + -+ // Patch this nmethod atomically. -+ if (Assembler::reachable_from_branch_at(verified_entry, dest)) { -+ ptrdiff_t offset = dest - verified_entry; -+ guarantee(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction."); // 1M ++private: + -+ uint32_t insn = 0; -+ address pInsn = (address)&insn; -+ Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1); -+ Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff); -+ Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1); -+ Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff); -+ Assembler::patch(pInsn, 11, 7, 0); // zero, no link jump -+ Assembler::patch(pInsn, 6, 0, 0b1101111); // j, (jal x0 offset) -+ *(unsigned int*)verified_entry = insn; -+ } else { -+ // We use an illegal instruction for marking a method as -+ // not_entrant or zombie. -+ NativeIllegalInstruction::insert(verified_entry); ++ Register first() { ++ uint32_t first = _bitset & -_bitset; ++ return first ? as_Register(exact_log2(first)) : noreg; + } ++}; + -+ ICache::invalidate_range(verified_entry, instruction_size); -+} ++#endif // CPU_RISCV_REGISTER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp +new file mode 100644 +index 0000000000..047ea2276c +--- /dev/null ++++ b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp +@@ -0,0 +1,112 @@ ++/* ++ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { -+ CodeBuffer cb(code_pos, instruction_size); -+ MacroAssembler a(&cb); ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/relocInfo.hpp" ++#include "nativeInst_riscv.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/safepoint.hpp" + -+ int32_t offset = 0; -+ a.movptr_with_offset(t0, entry, offset); // lui, addi, slli, addi, slli -+ a.jalr(x0, t0, offset); // jalr ++void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { ++ if (verify_only) { ++ return; ++ } + -+ ICache::invalidate_range(code_pos, instruction_size); -+} ++ int bytes; + -+// MT-safe patching of a long jump instruction. -+void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { -+ ShouldNotCallThis(); ++ switch (type()) { ++ case relocInfo::oop_type: { ++ oop_Relocation *reloc = (oop_Relocation *)this; ++ if (NativeInstruction::is_load_pc_relative_at(addr())) { ++ address constptr = (address)code()->oop_addr_at(reloc->oop_index()); ++ bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr); ++ assert(*(address*)constptr == x, "error in oop relocation"); ++ } else { ++ bytes = MacroAssembler::patch_oop(addr(), x); ++ } ++ break; ++ } ++ default: ++ bytes = MacroAssembler::pd_patch_instruction_size(addr(), x); ++ break; ++ } ++ ICache::invalidate_range(addr(), bytes); +} + -+ -+address NativeCallTrampolineStub::destination(nmethod *nm) const { -+ return ptr_at(data_offset); ++address Relocation::pd_call_destination(address orig_addr) { ++ assert(is_call(), "should be an address instruction here"); ++ if (NativeCall::is_call_at(addr())) { ++ address trampoline = nativeCall_at(addr())->get_trampoline(); ++ if (trampoline != NULL) { ++ return nativeCallTrampolineStub_at(trampoline)->destination(); ++ } ++ } ++ if (orig_addr != NULL) { ++ // the extracted address from the instructions in address orig_addr ++ address new_addr = MacroAssembler::pd_call_destination(orig_addr); ++ // If call is branch to self, don't try to relocate it, just leave it ++ // as branch to self. This happens during code generation if the code ++ // buffer expands. It will be relocated to the trampoline above once ++ // code generation is complete. ++ new_addr = (new_addr == orig_addr) ? addr() : new_addr; ++ return new_addr; ++ } ++ return MacroAssembler::pd_call_destination(addr()); +} + -+void NativeCallTrampolineStub::set_destination(address new_destination) { -+ set_ptr_at(data_offset, new_destination); -+ OrderAccess::release(); ++void Relocation::pd_set_call_destination(address x) { ++ assert(is_call(), "should be an address instruction here"); ++ if (NativeCall::is_call_at(addr())) { ++ address trampoline = nativeCall_at(addr())->get_trampoline(); ++ if (trampoline != NULL) { ++ nativeCall_at(addr())->set_destination_mt_safe(x, /* assert_lock */false); ++ return; ++ } ++ } ++ MacroAssembler::pd_patch_instruction_size(addr(), x); ++ address pd_call = pd_call_destination(addr()); ++ assert(pd_call == x, "fail in reloc"); +} + -+uint32_t NativeMembar::get_kind() { -+ uint32_t insn = uint_at(0); -+ -+ uint32_t predecessor = Assembler::extract(insn, 27, 24); -+ uint32_t successor = Assembler::extract(insn, 23, 20); -+ -+ return MacroAssembler::pred_succ_to_membar_mask(predecessor, successor); ++address* Relocation::pd_address_in_code() { ++ assert(NativeCall::is_load_pc_relative_at(addr()), "Not the expected instruction sequence!"); ++ return (address*)(MacroAssembler::target_addr_for_insn(addr())); +} + -+void NativeMembar::set_kind(uint32_t order_kind) { -+ uint32_t predecessor = 0; -+ uint32_t successor = 0; -+ -+ MacroAssembler::membar_mask_to_pred_succ(order_kind, predecessor, successor); ++address Relocation::pd_get_address_from_code() { ++ return MacroAssembler::pd_call_destination(addr()); ++} + -+ uint32_t insn = uint_at(0); -+ address pInsn = (address) &insn; -+ Assembler::patch(pInsn, 27, 24, predecessor); -+ Assembler::patch(pInsn, 23, 20, successor); ++void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { ++ if (NativeInstruction::maybe_cpool_ref(addr())) { ++ address old_addr = old_addr_for(addr(), src, dest); ++ MacroAssembler::pd_patch_instruction_size(addr(), MacroAssembler::target_addr_for_insn(old_addr)); ++ } ++} + -+ address membar = addr_at(0); -+ *(unsigned int*) membar = insn; ++void metadata_Relocation::pd_fix_value(address x) { +} -diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp +diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.hpp b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp new file mode 100644 -index 00000000000..718b2e3de6c +index 0000000000..840ed935d8 --- /dev/null -+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp -@@ -0,0 +1,572 @@ ++++ b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp +@@ -0,0 +1,44 @@ +/* -+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -28612,7635 +27617,6143 @@ index 00000000000..718b2e3de6c + * + */ + -+#ifndef CPU_RISCV_NATIVEINST_RISCV_HPP -+#define CPU_RISCV_NATIVEINST_RISCV_HPP -+ -+#include "asm/assembler.hpp" -+#include "runtime/icache.hpp" -+#include "runtime/os.hpp" -+ -+// We have interfaces for the following instructions: -+// - NativeInstruction -+// - - NativeCall -+// - - NativeMovConstReg -+// - - NativeMovRegMem -+// - - NativeJump -+// - - NativeGeneralJump -+// - - NativeIllegalInstruction -+// - - NativeCallTrampolineStub -+// - - NativeMembar -+// - - NativeFenceI -+ -+// The base class for different kinds of native instruction abstractions. -+// Provides the primitive operations to manipulate code relative to this. -+ -+class NativeCall; ++#ifndef CPU_RISCV_RELOCINFO_RISCV_HPP ++#define CPU_RISCV_RELOCINFO_RISCV_HPP + -+class NativeInstruction { -+ friend class Relocation; -+ friend bool is_NativeCallTrampolineStub_at(address); -+ public: ++ // machine-dependent parts of class relocInfo ++ private: + enum { -+ instruction_size = 4, -+ compressed_instruction_size = 2, ++ // Relocations are byte-aligned. ++ offset_unit = 1, ++ // Must be at least 1 for RelocInfo::narrow_oop_in_const. ++ format_width = 1 + }; + -+ juint encoding() const { -+ return uint_at(0); -+ } ++ public: + -+ bool is_jal() const { return is_jal_at(addr_at(0)); } -+ bool is_movptr() const { return is_movptr_at(addr_at(0)); } -+ bool is_call() const { return is_call_at(addr_at(0)); } -+ bool is_jump() const { return is_jump_at(addr_at(0)); } ++ // This platform has no oops in the code that are not also ++ // listed in the oop section. ++ static bool mustIterateImmediateOopsInCode() { return false; } + -+ static bool is_jal_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1101111; } -+ static bool is_jalr_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100111 && extract_funct3(instr) == 0b000; } -+ static bool is_branch_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100011; } -+ static bool is_ld_at(address instr) { assert_cond(instr != NULL); return is_load_at(instr) && extract_funct3(instr) == 0b011; } -+ static bool is_load_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000011; } -+ static bool is_float_load_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000111; } -+ static bool is_auipc_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010111; } -+ static bool is_jump_at(address instr) { assert_cond(instr != NULL); return is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr); } -+ static bool is_addi_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010011 && extract_funct3(instr) == 0b000; } -+ static bool is_addiw_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0011011 && extract_funct3(instr) == 0b000; } -+ static bool is_lui_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0110111; } -+ static bool is_slli_shift_at(address instr, uint32_t shift) { -+ assert_cond(instr != NULL); -+ return (extract_opcode(instr) == 0b0010011 && // opcode field -+ extract_funct3(instr) == 0b001 && // funct3 field, select the type of operation -+ Assembler::extract(((unsigned*)instr)[0], 25, 20) == shift); // shamt field -+ } ++#endif // CPU_RISCV_RELOCINFO_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +new file mode 100644 +index 0000000000..02d6167629 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -0,0 +1,10280 @@ ++// ++// Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// + -+ static Register extract_rs1(address instr); -+ static Register extract_rs2(address instr); -+ static Register extract_rd(address instr); -+ static uint32_t extract_opcode(address instr); -+ static uint32_t extract_funct3(address instr); ++// RISCV Architecture Description File + -+ // the instruction sequence of movptr is as below: -+ // lui -+ // addi -+ // slli -+ // addi -+ // slli -+ // addi/jalr/load -+ static bool check_movptr_data_dependency(address instr) { -+ address lui = instr; -+ address addi1 = lui + instruction_size; -+ address slli1 = addi1 + instruction_size; -+ address addi2 = slli1 + instruction_size; -+ address slli2 = addi2 + instruction_size; -+ address last_instr = slli2 + instruction_size; -+ return extract_rs1(addi1) == extract_rd(lui) && -+ extract_rs1(addi1) == extract_rd(addi1) && -+ extract_rs1(slli1) == extract_rd(addi1) && -+ extract_rs1(slli1) == extract_rd(slli1) && -+ extract_rs1(addi2) == extract_rd(slli1) && -+ extract_rs1(addi2) == extract_rd(addi2) && -+ extract_rs1(slli2) == extract_rd(addi2) && -+ extract_rs1(slli2) == extract_rd(slli2) && -+ extract_rs1(last_instr) == extract_rd(slli2); -+ } ++//----------REGISTER DEFINITION BLOCK------------------------------------------ ++// This information is used by the matcher and the register allocator to ++// describe individual registers and classes of registers within the target ++// archtecture. + -+ // the instruction sequence of li64 is as below: -+ // lui -+ // addi -+ // slli -+ // addi -+ // slli -+ // addi -+ // slli -+ // addi -+ static bool check_li64_data_dependency(address instr) { -+ address lui = instr; -+ address addi1 = lui + instruction_size; -+ address slli1 = addi1 + instruction_size; -+ address addi2 = slli1 + instruction_size; -+ address slli2 = addi2 + instruction_size; -+ address addi3 = slli2 + instruction_size; -+ address slli3 = addi3 + instruction_size; -+ address addi4 = slli3 + instruction_size; -+ return extract_rs1(addi1) == extract_rd(lui) && -+ extract_rs1(addi1) == extract_rd(addi1) && -+ extract_rs1(slli1) == extract_rd(addi1) && -+ extract_rs1(slli1) == extract_rd(slli1) && -+ extract_rs1(addi2) == extract_rd(slli1) && -+ extract_rs1(addi2) == extract_rd(addi2) && -+ extract_rs1(slli2) == extract_rd(addi2) && -+ extract_rs1(slli2) == extract_rd(slli2) && -+ extract_rs1(addi3) == extract_rd(slli2) && -+ extract_rs1(addi3) == extract_rd(addi3) && -+ extract_rs1(slli3) == extract_rd(addi3) && -+ extract_rs1(slli3) == extract_rd(slli3) && -+ extract_rs1(addi4) == extract_rd(slli3) && -+ extract_rs1(addi4) == extract_rd(addi4); -+ } ++register %{ ++//----------Architecture Description Register Definitions---------------------- ++// General Registers ++// "reg_def" name ( register save type, C convention save type, ++// ideal register type, encoding ); ++// Register Save Types: ++// ++// NS = No-Save: The register allocator assumes that these registers ++// can be used without saving upon entry to the method, & ++// that they do not need to be saved at call sites. ++// ++// SOC = Save-On-Call: The register allocator assumes that these registers ++// can be used without saving upon entry to the method, ++// but that they must be saved at call sites. ++// ++// SOE = Save-On-Entry: The register allocator assumes that these registers ++// must be saved before using them upon entry to the ++// method, but they do not need to be saved at call ++// sites. ++// ++// AS = Always-Save: The register allocator assumes that these registers ++// must be saved before using them upon entry to the ++// method, & that they must be saved at call sites. ++// ++// Ideal Register Type is used to determine how to save & restore a ++// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get ++// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. ++// ++// The encoding number is the actual bit-pattern placed into the opcodes. + -+ // the instruction sequence of li32 is as below: -+ // lui -+ // addiw -+ static bool check_li32_data_dependency(address instr) { -+ address lui = instr; -+ address addiw = lui + instruction_size; ++// We must define the 64 bit int registers in two 32 bit halves, the ++// real lower register and a virtual upper half register. upper halves ++// are used by the register allocator but are not actually supplied as ++// operands to memory ops. ++// ++// follow the C1 compiler in making registers ++// ++// x7, x9-x17, x27-x31 volatile (caller save) ++// x0-x4, x8, x23 system (no save, no allocate) ++// x5-x6 non-allocatable (so we can use them as temporary regs) + -+ return extract_rs1(addiw) == extract_rd(lui) && -+ extract_rs1(addiw) == extract_rd(addiw); -+ } ++// ++// as regards Java usage. we don't use any callee save registers ++// because this makes it difficult to de-optimise a frame (see comment ++// in x86 implementation of Deoptimization::unwind_callee_save_values) ++// + -+ // the instruction sequence of pc-relative is as below: -+ // auipc -+ // jalr/addi/load/float_load -+ static bool check_pc_relative_data_dependency(address instr) { -+ address auipc = instr; -+ address last_instr = auipc + instruction_size; ++// General Registers + -+ return extract_rs1(last_instr) == extract_rd(auipc); -+ } ++reg_def R0 ( NS, NS, Op_RegI, 0, x0->as_VMReg() ); // zr ++reg_def R0_H ( NS, NS, Op_RegI, 0, x0->as_VMReg()->next() ); ++reg_def R1 ( NS, SOC, Op_RegI, 1, x1->as_VMReg() ); // ra ++reg_def R1_H ( NS, SOC, Op_RegI, 1, x1->as_VMReg()->next() ); ++reg_def R2 ( NS, SOE, Op_RegI, 2, x2->as_VMReg() ); // sp ++reg_def R2_H ( NS, SOE, Op_RegI, 2, x2->as_VMReg()->next() ); ++reg_def R3 ( NS, NS, Op_RegI, 3, x3->as_VMReg() ); // gp ++reg_def R3_H ( NS, NS, Op_RegI, 3, x3->as_VMReg()->next() ); ++reg_def R4 ( NS, NS, Op_RegI, 4, x4->as_VMReg() ); // tp ++reg_def R4_H ( NS, NS, Op_RegI, 4, x4->as_VMReg()->next() ); ++reg_def R7 ( SOC, SOC, Op_RegI, 7, x7->as_VMReg() ); ++reg_def R7_H ( SOC, SOC, Op_RegI, 7, x7->as_VMReg()->next() ); ++reg_def R8 ( NS, SOE, Op_RegI, 8, x8->as_VMReg() ); // fp ++reg_def R8_H ( NS, SOE, Op_RegI, 8, x8->as_VMReg()->next() ); ++reg_def R9 ( SOC, SOE, Op_RegI, 9, x9->as_VMReg() ); ++reg_def R9_H ( SOC, SOE, Op_RegI, 9, x9->as_VMReg()->next() ); ++reg_def R10 ( SOC, SOC, Op_RegI, 10, x10->as_VMReg() ); ++reg_def R10_H ( SOC, SOC, Op_RegI, 10, x10->as_VMReg()->next()); ++reg_def R11 ( SOC, SOC, Op_RegI, 11, x11->as_VMReg() ); ++reg_def R11_H ( SOC, SOC, Op_RegI, 11, x11->as_VMReg()->next()); ++reg_def R12 ( SOC, SOC, Op_RegI, 12, x12->as_VMReg() ); ++reg_def R12_H ( SOC, SOC, Op_RegI, 12, x12->as_VMReg()->next()); ++reg_def R13 ( SOC, SOC, Op_RegI, 13, x13->as_VMReg() ); ++reg_def R13_H ( SOC, SOC, Op_RegI, 13, x13->as_VMReg()->next()); ++reg_def R14 ( SOC, SOC, Op_RegI, 14, x14->as_VMReg() ); ++reg_def R14_H ( SOC, SOC, Op_RegI, 14, x14->as_VMReg()->next()); ++reg_def R15 ( SOC, SOC, Op_RegI, 15, x15->as_VMReg() ); ++reg_def R15_H ( SOC, SOC, Op_RegI, 15, x15->as_VMReg()->next()); ++reg_def R16 ( SOC, SOC, Op_RegI, 16, x16->as_VMReg() ); ++reg_def R16_H ( SOC, SOC, Op_RegI, 16, x16->as_VMReg()->next()); ++reg_def R17 ( SOC, SOC, Op_RegI, 17, x17->as_VMReg() ); ++reg_def R17_H ( SOC, SOC, Op_RegI, 17, x17->as_VMReg()->next()); ++reg_def R18 ( SOC, SOE, Op_RegI, 18, x18->as_VMReg() ); ++reg_def R18_H ( SOC, SOE, Op_RegI, 18, x18->as_VMReg()->next()); ++reg_def R19 ( SOC, SOE, Op_RegI, 19, x19->as_VMReg() ); ++reg_def R19_H ( SOC, SOE, Op_RegI, 19, x19->as_VMReg()->next()); ++reg_def R20 ( SOC, SOE, Op_RegI, 20, x20->as_VMReg() ); // caller esp ++reg_def R20_H ( SOC, SOE, Op_RegI, 20, x20->as_VMReg()->next()); ++reg_def R21 ( SOC, SOE, Op_RegI, 21, x21->as_VMReg() ); ++reg_def R21_H ( SOC, SOE, Op_RegI, 21, x21->as_VMReg()->next()); ++reg_def R22 ( SOC, SOE, Op_RegI, 22, x22->as_VMReg() ); ++reg_def R22_H ( SOC, SOE, Op_RegI, 22, x22->as_VMReg()->next()); ++reg_def R23 ( NS, SOE, Op_RegI, 23, x23->as_VMReg() ); // java thread ++reg_def R23_H ( NS, SOE, Op_RegI, 23, x23->as_VMReg()->next()); ++reg_def R24 ( SOC, SOE, Op_RegI, 24, x24->as_VMReg() ); ++reg_def R24_H ( SOC, SOE, Op_RegI, 24, x24->as_VMReg()->next()); ++reg_def R25 ( SOC, SOE, Op_RegI, 25, x25->as_VMReg() ); ++reg_def R25_H ( SOC, SOE, Op_RegI, 25, x25->as_VMReg()->next()); ++reg_def R26 ( SOC, SOE, Op_RegI, 26, x26->as_VMReg() ); ++reg_def R26_H ( SOC, SOE, Op_RegI, 26, x26->as_VMReg()->next()); ++reg_def R27 ( SOC, SOE, Op_RegI, 27, x27->as_VMReg() ); // heapbase ++reg_def R27_H ( SOC, SOE, Op_RegI, 27, x27->as_VMReg()->next()); ++reg_def R28 ( SOC, SOC, Op_RegI, 28, x28->as_VMReg() ); ++reg_def R28_H ( SOC, SOC, Op_RegI, 28, x28->as_VMReg()->next()); ++reg_def R29 ( SOC, SOC, Op_RegI, 29, x29->as_VMReg() ); ++reg_def R29_H ( SOC, SOC, Op_RegI, 29, x29->as_VMReg()->next()); ++reg_def R30 ( SOC, SOC, Op_RegI, 30, x30->as_VMReg() ); ++reg_def R30_H ( SOC, SOC, Op_RegI, 30, x30->as_VMReg()->next()); ++reg_def R31 ( SOC, SOC, Op_RegI, 31, x31->as_VMReg() ); ++reg_def R31_H ( SOC, SOC, Op_RegI, 31, x31->as_VMReg()->next()); + -+ // the instruction sequence of load_label is as below: -+ // auipc -+ // load -+ static bool check_load_pc_relative_data_dependency(address instr) { -+ address auipc = instr; -+ address load = auipc + instruction_size; ++// ---------------------------- ++// Float/Double Registers ++// ---------------------------- + -+ return extract_rd(load) == extract_rd(auipc) && -+ extract_rs1(load) == extract_rd(load); -+ } ++// Double Registers + -+ static bool is_movptr_at(address instr); -+ static bool is_li32_at(address instr); -+ static bool is_li64_at(address instr); -+ static bool is_pc_relative_at(address branch); -+ static bool is_load_pc_relative_at(address branch); ++// The rules of ADL require that double registers be defined in pairs. ++// Each pair must be two 32-bit values, but not necessarily a pair of ++// single float registers. In each pair, ADLC-assigned register numbers ++// must be adjacent, with the lower number even. Finally, when the ++// CPU stores such a register pair to memory, the word associated with ++// the lower ADLC-assigned number must be stored to the lower address. + -+ static bool is_call_at(address instr) { -+ if (is_jal_at(instr) || is_jalr_at(instr)) { -+ return true; -+ } -+ return false; -+ } -+ static bool is_lwu_to_zr(address instr); ++// RISCV has 32 floating-point registers. Each can store a single ++// or double precision floating-point value. + -+ inline bool is_nop(); -+ inline bool is_jump_or_nop(); -+ bool is_safepoint_poll(); -+ bool is_sigill_zombie_not_entrant(); -+ bool is_stop(); ++// for Java use float registers f0-f31 are always save on call whereas ++// the platform ABI treats f8-f9 and f18-f27 as callee save). Other ++// float registers are SOC as per the platform spec + -+ protected: -+ address addr_at(int offset) const { return address(this) + offset; } ++reg_def F0 ( SOC, SOC, Op_RegF, 0, f0->as_VMReg() ); ++reg_def F0_H ( SOC, SOC, Op_RegF, 0, f0->as_VMReg()->next() ); ++reg_def F1 ( SOC, SOC, Op_RegF, 1, f1->as_VMReg() ); ++reg_def F1_H ( SOC, SOC, Op_RegF, 1, f1->as_VMReg()->next() ); ++reg_def F2 ( SOC, SOC, Op_RegF, 2, f2->as_VMReg() ); ++reg_def F2_H ( SOC, SOC, Op_RegF, 2, f2->as_VMReg()->next() ); ++reg_def F3 ( SOC, SOC, Op_RegF, 3, f3->as_VMReg() ); ++reg_def F3_H ( SOC, SOC, Op_RegF, 3, f3->as_VMReg()->next() ); ++reg_def F4 ( SOC, SOC, Op_RegF, 4, f4->as_VMReg() ); ++reg_def F4_H ( SOC, SOC, Op_RegF, 4, f4->as_VMReg()->next() ); ++reg_def F5 ( SOC, SOC, Op_RegF, 5, f5->as_VMReg() ); ++reg_def F5_H ( SOC, SOC, Op_RegF, 5, f5->as_VMReg()->next() ); ++reg_def F6 ( SOC, SOC, Op_RegF, 6, f6->as_VMReg() ); ++reg_def F6_H ( SOC, SOC, Op_RegF, 6, f6->as_VMReg()->next() ); ++reg_def F7 ( SOC, SOC, Op_RegF, 7, f7->as_VMReg() ); ++reg_def F7_H ( SOC, SOC, Op_RegF, 7, f7->as_VMReg()->next() ); ++reg_def F8 ( SOC, SOE, Op_RegF, 8, f8->as_VMReg() ); ++reg_def F8_H ( SOC, SOE, Op_RegF, 8, f8->as_VMReg()->next() ); ++reg_def F9 ( SOC, SOE, Op_RegF, 9, f9->as_VMReg() ); ++reg_def F9_H ( SOC, SOE, Op_RegF, 9, f9->as_VMReg()->next() ); ++reg_def F10 ( SOC, SOC, Op_RegF, 10, f10->as_VMReg() ); ++reg_def F10_H ( SOC, SOC, Op_RegF, 10, f10->as_VMReg()->next() ); ++reg_def F11 ( SOC, SOC, Op_RegF, 11, f11->as_VMReg() ); ++reg_def F11_H ( SOC, SOC, Op_RegF, 11, f11->as_VMReg()->next() ); ++reg_def F12 ( SOC, SOC, Op_RegF, 12, f12->as_VMReg() ); ++reg_def F12_H ( SOC, SOC, Op_RegF, 12, f12->as_VMReg()->next() ); ++reg_def F13 ( SOC, SOC, Op_RegF, 13, f13->as_VMReg() ); ++reg_def F13_H ( SOC, SOC, Op_RegF, 13, f13->as_VMReg()->next() ); ++reg_def F14 ( SOC, SOC, Op_RegF, 14, f14->as_VMReg() ); ++reg_def F14_H ( SOC, SOC, Op_RegF, 14, f14->as_VMReg()->next() ); ++reg_def F15 ( SOC, SOC, Op_RegF, 15, f15->as_VMReg() ); ++reg_def F15_H ( SOC, SOC, Op_RegF, 15, f15->as_VMReg()->next() ); ++reg_def F16 ( SOC, SOC, Op_RegF, 16, f16->as_VMReg() ); ++reg_def F16_H ( SOC, SOC, Op_RegF, 16, f16->as_VMReg()->next() ); ++reg_def F17 ( SOC, SOC, Op_RegF, 17, f17->as_VMReg() ); ++reg_def F17_H ( SOC, SOC, Op_RegF, 17, f17->as_VMReg()->next() ); ++reg_def F18 ( SOC, SOE, Op_RegF, 18, f18->as_VMReg() ); ++reg_def F18_H ( SOC, SOE, Op_RegF, 18, f18->as_VMReg()->next() ); ++reg_def F19 ( SOC, SOE, Op_RegF, 19, f19->as_VMReg() ); ++reg_def F19_H ( SOC, SOE, Op_RegF, 19, f19->as_VMReg()->next() ); ++reg_def F20 ( SOC, SOE, Op_RegF, 20, f20->as_VMReg() ); ++reg_def F20_H ( SOC, SOE, Op_RegF, 20, f20->as_VMReg()->next() ); ++reg_def F21 ( SOC, SOE, Op_RegF, 21, f21->as_VMReg() ); ++reg_def F21_H ( SOC, SOE, Op_RegF, 21, f21->as_VMReg()->next() ); ++reg_def F22 ( SOC, SOE, Op_RegF, 22, f22->as_VMReg() ); ++reg_def F22_H ( SOC, SOE, Op_RegF, 22, f22->as_VMReg()->next() ); ++reg_def F23 ( SOC, SOE, Op_RegF, 23, f23->as_VMReg() ); ++reg_def F23_H ( SOC, SOE, Op_RegF, 23, f23->as_VMReg()->next() ); ++reg_def F24 ( SOC, SOE, Op_RegF, 24, f24->as_VMReg() ); ++reg_def F24_H ( SOC, SOE, Op_RegF, 24, f24->as_VMReg()->next() ); ++reg_def F25 ( SOC, SOE, Op_RegF, 25, f25->as_VMReg() ); ++reg_def F25_H ( SOC, SOE, Op_RegF, 25, f25->as_VMReg()->next() ); ++reg_def F26 ( SOC, SOE, Op_RegF, 26, f26->as_VMReg() ); ++reg_def F26_H ( SOC, SOE, Op_RegF, 26, f26->as_VMReg()->next() ); ++reg_def F27 ( SOC, SOE, Op_RegF, 27, f27->as_VMReg() ); ++reg_def F27_H ( SOC, SOE, Op_RegF, 27, f27->as_VMReg()->next() ); ++reg_def F28 ( SOC, SOC, Op_RegF, 28, f28->as_VMReg() ); ++reg_def F28_H ( SOC, SOC, Op_RegF, 28, f28->as_VMReg()->next() ); ++reg_def F29 ( SOC, SOC, Op_RegF, 29, f29->as_VMReg() ); ++reg_def F29_H ( SOC, SOC, Op_RegF, 29, f29->as_VMReg()->next() ); ++reg_def F30 ( SOC, SOC, Op_RegF, 30, f30->as_VMReg() ); ++reg_def F30_H ( SOC, SOC, Op_RegF, 30, f30->as_VMReg()->next() ); ++reg_def F31 ( SOC, SOC, Op_RegF, 31, f31->as_VMReg() ); ++reg_def F31_H ( SOC, SOC, Op_RegF, 31, f31->as_VMReg()->next() ); + -+ jint int_at(int offset) const { return *(jint*) addr_at(offset); } -+ juint uint_at(int offset) const { return *(juint*) addr_at(offset); } ++// ---------------------------- ++// Special Registers ++// ---------------------------- + -+ address ptr_at(int offset) const { return *(address*) addr_at(offset); } ++// On riscv, the physical flag register is missing, so we use t1 instead, ++// to bridge the RegFlag semantics in share/opto + -+ oop oop_at (int offset) const { return *(oop*) addr_at(offset); } ++reg_def RFLAGS (SOC, SOC, Op_RegFlags, 6, x6->as_VMReg() ); + ++// Specify priority of register selection within phases of register ++// allocation. Highest priority is first. A useful heuristic is to ++// give registers a low priority when they are required by machine ++// instructions, like EAX and EDX on I486, and choose no-save registers ++// before save-on-call, & save-on-call before save-on-entry. Registers ++// which participate in fixed calling sequences should come last. ++// Registers which are used as pairs must fall on an even boundary. + -+ void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; } -+ void set_uint_at(int offset, jint i) { *(juint*)addr_at(offset) = i; } -+ void set_ptr_at (int offset, address ptr) { *(address*) addr_at(offset) = ptr; } -+ void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; } ++alloc_class chunk0( ++ // volatiles ++ R7, R7_H, ++ R28, R28_H, ++ R29, R29_H, ++ R30, R30_H, ++ R31, R31_H, + -+ public: ++ // arg registers ++ R10, R10_H, ++ R11, R11_H, ++ R12, R12_H, ++ R13, R13_H, ++ R14, R14_H, ++ R15, R15_H, ++ R16, R16_H, ++ R17, R17_H, + -+ inline friend NativeInstruction* nativeInstruction_at(address addr); ++ // non-volatiles ++ R9, R9_H, ++ R18, R18_H, ++ R19, R19_H, ++ R20, R20_H, ++ R21, R21_H, ++ R22, R22_H, ++ R24, R24_H, ++ R25, R25_H, ++ R26, R26_H, + -+ static bool maybe_cpool_ref(address instr) { -+ return is_auipc_at(instr); -+ } ++ // non-allocatable registers ++ R23, R23_H, // java thread ++ R27, R27_H, // heapbase ++ R4, R4_H, // thread ++ R8, R8_H, // fp ++ R0, R0_H, // zero ++ R1, R1_H, // ra ++ R2, R2_H, // sp ++ R3, R3_H, // gp ++); + -+ bool is_membar() { -+ return (uint_at(0) & 0x7f) == 0b1111 && extract_funct3(addr_at(0)) == 0; -+ } -+}; ++alloc_class chunk1( + -+inline NativeInstruction* nativeInstruction_at(address addr) { -+ return (NativeInstruction*)addr; -+} ++ // no save ++ F0, F0_H, ++ F1, F1_H, ++ F2, F2_H, ++ F3, F3_H, ++ F4, F4_H, ++ F5, F5_H, ++ F6, F6_H, ++ F7, F7_H, ++ F28, F28_H, ++ F29, F29_H, ++ F30, F30_H, ++ F31, F31_H, + -+// The natural type of an RISCV instruction is uint32_t -+inline NativeInstruction* nativeInstruction_at(uint32_t *addr) { -+ return (NativeInstruction*)addr; -+} ++ // arg registers ++ F10, F10_H, ++ F11, F11_H, ++ F12, F12_H, ++ F13, F13_H, ++ F14, F14_H, ++ F15, F15_H, ++ F16, F16_H, ++ F17, F17_H, + -+inline NativeCall* nativeCall_at(address addr); -+// The NativeCall is an abstraction for accessing/manipulating native -+// call instructions (used to manipulate inline caches, primitive & -+// DSO calls, etc.). ++ // non-volatiles ++ F8, F8_H, ++ F9, F9_H, ++ F18, F18_H, ++ F19, F19_H, ++ F20, F20_H, ++ F21, F21_H, ++ F22, F22_H, ++ F23, F23_H, ++ F24, F24_H, ++ F25, F25_H, ++ F26, F26_H, ++ F27, F27_H, ++); + -+class NativeCall: public NativeInstruction { -+ public: -+ enum RISCV_specific_constants { -+ instruction_size = 4, -+ instruction_offset = 0, -+ displacement_offset = 0, -+ return_address_offset = 4 -+ }; ++alloc_class chunk2(RFLAGS); + -+ address instruction_address() const { return addr_at(instruction_offset); } -+ address next_instruction_address() const { return addr_at(return_address_offset); } -+ address return_address() const { return addr_at(return_address_offset); } -+ address destination() const; ++//----------Architecture Description Register Classes-------------------------- ++// Several register classes are automatically defined based upon information in ++// this architecture description. ++// 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) ++// 2) reg_class compiler_method_reg ( /* as def'd in frame section */ ) ++// 2) reg_class interpreter_method_reg ( /* as def'd in frame section */ ) ++// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) ++// + -+ void set_destination(address dest) { -+ assert(is_jal(), "Should be jal instruction!"); -+ intptr_t offset = (intptr_t)(dest - instruction_address()); -+ assert((offset & 0x1) == 0, "bad alignment"); -+ assert(is_imm_in_range(offset, 20, 1), "encoding constraint"); -+ unsigned int insn = 0b1101111; // jal -+ address pInsn = (address)(&insn); -+ Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1); -+ Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff); -+ Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1); -+ Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff); -+ Assembler::patch(pInsn, 11, 7, ra->encoding()); // Rd must be x1, need ra -+ set_int_at(displacement_offset, insn); -+ } ++// Class for all 32 bit general purpose registers ++reg_class all_reg32( ++ R0, ++ R1, ++ R2, ++ R3, ++ R4, ++ R7, ++ R8, ++ R9, ++ R10, ++ R11, ++ R12, ++ R13, ++ R14, ++ R15, ++ R16, ++ R17, ++ R18, ++ R19, ++ R20, ++ R21, ++ R22, ++ R23, ++ R24, ++ R25, ++ R26, ++ R27, ++ R28, ++ R29, ++ R30, ++ R31 ++); + -+ void verify_alignment() {} // do nothing on riscv -+ void verify(); -+ void print(); ++// Class for any 32 bit integer registers (excluding zr) ++reg_class any_reg32 %{ ++ return _ANY_REG32_mask; ++%} + -+ // Creation -+ inline friend NativeCall* nativeCall_at(address addr); -+ inline friend NativeCall* nativeCall_before(address return_address); ++// Singleton class for R10 int register ++reg_class int_r10_reg(R10); + -+ static bool is_call_before(address return_address) { -+ return is_call_at(return_address - NativeCall::return_address_offset); -+ } ++// Singleton class for R12 int register ++reg_class int_r12_reg(R12); + -+ // MT-safe patching of a call instruction. -+ static void insert(address code_pos, address entry); ++// Singleton class for R13 int register ++reg_class int_r13_reg(R13); + -+ static void replace_mt_safe(address instr_addr, address code_buffer); ++// Singleton class for R14 int register ++reg_class int_r14_reg(R14); + -+ // Similar to replace_mt_safe, but just changes the destination. The -+ // important thing is that free-running threads are able to execute -+ // this call instruction at all times. If the call is an immediate BL -+ // instruction we can simply rely on atomicity of 32-bit writes to -+ // make sure other threads will see no intermediate states. ++// Class for all long integer registers ++reg_class all_reg( ++ R0, R0_H, ++ R1, R1_H, ++ R2, R2_H, ++ R3, R3_H, ++ R4, R4_H, ++ R7, R7_H, ++ R8, R8_H, ++ R9, R9_H, ++ R10, R10_H, ++ R11, R11_H, ++ R12, R12_H, ++ R13, R13_H, ++ R14, R14_H, ++ R15, R15_H, ++ R16, R16_H, ++ R17, R17_H, ++ R18, R18_H, ++ R19, R19_H, ++ R20, R20_H, ++ R21, R21_H, ++ R22, R22_H, ++ R23, R23_H, ++ R24, R24_H, ++ R25, R25_H, ++ R26, R26_H, ++ R27, R27_H, ++ R28, R28_H, ++ R29, R29_H, ++ R30, R30_H, ++ R31, R31_H ++); + -+ // We cannot rely on locks here, since the free-running threads must run at -+ // full speed. -+ // -+ // Used in the runtime linkage of calls; see class CompiledIC. -+ // (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.) ++// Class for all long integer registers (excluding zr) ++reg_class any_reg %{ ++ return _ANY_REG_mask; ++%} + -+ // The parameter assert_lock disables the assertion during code generation. -+ void set_destination_mt_safe(address dest, bool assert_lock = true); ++// Class for non-allocatable 32 bit registers ++reg_class non_allocatable_reg32( ++ R0, // zr ++ R1, // ra ++ R2, // sp ++ R3, // gp ++ R4, // tp ++ R23 // java thread ++); + -+ address get_trampoline(); -+}; ++// Class for non-allocatable 64 bit registers ++reg_class non_allocatable_reg( ++ R0, R0_H, // zr ++ R1, R1_H, // ra ++ R2, R2_H, // sp ++ R3, R3_H, // gp ++ R4, R4_H, // tp ++ R23, R23_H // java thread ++); + -+inline NativeCall* nativeCall_at(address addr) { -+ assert_cond(addr != NULL); -+ NativeCall* call = (NativeCall*)(addr - NativeCall::instruction_offset); -+#ifdef ASSERT -+ call->verify(); -+#endif -+ return call; -+} ++reg_class no_special_reg32 %{ ++ return _NO_SPECIAL_REG32_mask; ++%} + -+inline NativeCall* nativeCall_before(address return_address) { -+ assert_cond(return_address != NULL); -+ NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset); -+#ifdef ASSERT -+ call->verify(); -+#endif -+ return call; -+} ++reg_class no_special_reg %{ ++ return _NO_SPECIAL_REG_mask; ++%} + -+// An interface for accessing/manipulating native mov reg, imm instructions. -+// (used to manipulate inlined 64-bit data calls, etc.) -+class NativeMovConstReg: public NativeInstruction { -+ public: -+ enum RISCV_specific_constants { -+ movptr_instruction_size = 6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, addi. See movptr(). -+ movptr_with_offset_instruction_size = 5 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli. See movptr_with_offset(). -+ load_pc_relative_instruction_size = 2 * NativeInstruction::instruction_size, // auipc, ld -+ instruction_offset = 0, -+ displacement_offset = 0 -+ }; ++reg_class ptr_reg %{ ++ return _PTR_REG_mask; ++%} + -+ address instruction_address() const { return addr_at(instruction_offset); } -+ address next_instruction_address() const { -+ // if the instruction at 5 * instruction_size is addi, -+ // it means a lui + addi + slli + addi + slli + addi instruction sequence, -+ // and the next instruction address should be addr_at(6 * instruction_size). -+ // However, when the instruction at 5 * instruction_size isn't addi, -+ // the next instruction address should be addr_at(5 * instruction_size) -+ if (nativeInstruction_at(instruction_address())->is_movptr()) { -+ if (is_addi_at(addr_at(movptr_with_offset_instruction_size))) { -+ // Assume: lui, addi, slli, addi, slli, addi -+ return addr_at(movptr_instruction_size); -+ } else { -+ // Assume: lui, addi, slli, addi, slli -+ return addr_at(movptr_with_offset_instruction_size); -+ } -+ } else if (is_load_pc_relative_at(instruction_address())) { -+ // Assume: auipc, ld -+ return addr_at(load_pc_relative_instruction_size); -+ } -+ guarantee(false, "Unknown instruction in NativeMovConstReg"); -+ return NULL; -+ } ++reg_class no_special_ptr_reg %{ ++ return _NO_SPECIAL_PTR_REG_mask; ++%} + -+ intptr_t data() const; -+ void set_data(intptr_t x); ++// Class for 64 bit register r10 ++reg_class r10_reg( ++ R10, R10_H ++); + -+ void flush() { -+ if (!maybe_cpool_ref(instruction_address())) { -+ ICache::invalidate_range(instruction_address(), movptr_instruction_size); -+ } -+ } ++// Class for 64 bit register r11 ++reg_class r11_reg( ++ R11, R11_H ++); + -+ void verify(); -+ void print(); ++// Class for 64 bit register r12 ++reg_class r12_reg( ++ R12, R12_H ++); + -+ // Creation -+ inline friend NativeMovConstReg* nativeMovConstReg_at(address addr); -+ inline friend NativeMovConstReg* nativeMovConstReg_before(address addr); -+}; ++// Class for 64 bit register r13 ++reg_class r13_reg( ++ R13, R13_H ++); + -+inline NativeMovConstReg* nativeMovConstReg_at(address addr) { -+ assert_cond(addr != NULL); -+ NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_offset); -+#ifdef ASSERT -+ test->verify(); -+#endif -+ return test; -+} ++// Class for 64 bit register r14 ++reg_class r14_reg( ++ R14, R14_H ++); + -+inline NativeMovConstReg* nativeMovConstReg_before(address addr) { -+ assert_cond(addr != NULL); -+ NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset); -+#ifdef ASSERT -+ test->verify(); -+#endif -+ return test; -+} ++// Class for 64 bit register r15 ++reg_class r15_reg( ++ R15, R15_H ++); + -+// RISCV should not use C1 runtime patching, so just leave NativeMovRegMem Unimplemented. -+class NativeMovRegMem: public NativeInstruction { -+ public: -+ int instruction_start() const { -+ Unimplemented(); -+ return 0; -+ } ++// Class for 64 bit register r16 ++reg_class r16_reg( ++ R16, R16_H ++); + -+ address instruction_address() const { -+ Unimplemented(); -+ return NULL; -+ } ++// Class for method register ++reg_class method_reg( ++ R31, R31_H ++); + -+ int num_bytes_to_end_of_patch() const { -+ Unimplemented(); -+ return 0; -+ } ++// Class for heapbase register ++reg_class heapbase_reg( ++ R27, R27_H ++); + -+ int offset() const; ++// Class for java thread register ++reg_class java_thread_reg( ++ R23, R23_H ++); + -+ void set_offset(int x); ++reg_class r28_reg( ++ R28, R28_H ++); + -+ void add_offset_in_bytes(int add_offset) { Unimplemented(); } ++reg_class r29_reg( ++ R29, R29_H ++); + -+ void verify(); -+ void print(); ++reg_class r30_reg( ++ R30, R30_H ++); + -+ private: -+ inline friend NativeMovRegMem* nativeMovRegMem_at (address addr); -+}; ++// Class for zero registesr ++reg_class zr_reg( ++ R0, R0_H ++); + -+inline NativeMovRegMem* nativeMovRegMem_at (address addr) { -+ Unimplemented(); -+ return NULL; -+} ++// Class for thread register ++reg_class thread_reg( ++ R4, R4_H ++); + -+class NativeJump: public NativeInstruction { -+ public: -+ enum RISCV_specific_constants { -+ instruction_size = NativeInstruction::instruction_size, -+ instruction_offset = 0, -+ data_offset = 0, -+ next_instruction_offset = NativeInstruction::instruction_size -+ }; ++// Class for frame pointer register ++reg_class fp_reg( ++ R8, R8_H ++); + -+ address instruction_address() const { return addr_at(instruction_offset); } -+ address next_instruction_address() const { return addr_at(instruction_size); } -+ address jump_destination() const; -+ void set_jump_destination(address dest); ++// Class for link register ++reg_class ra_reg( ++ R1, R1_H ++); + -+ // Creation -+ inline friend NativeJump* nativeJump_at(address address); ++// Class for long sp register ++reg_class sp_reg( ++ R2, R2_H ++); + -+ void verify(); ++// Class for all float registers ++reg_class float_reg( ++ F0, ++ F1, ++ F2, ++ F3, ++ F4, ++ F5, ++ F6, ++ F7, ++ F8, ++ F9, ++ F10, ++ F11, ++ F12, ++ F13, ++ F14, ++ F15, ++ F16, ++ F17, ++ F18, ++ F19, ++ F20, ++ F21, ++ F22, ++ F23, ++ F24, ++ F25, ++ F26, ++ F27, ++ F28, ++ F29, ++ F30, ++ F31 ++); + -+ // Insertion of native jump instruction -+ static void insert(address code_pos, address entry); -+ // MT-safe insertion of native jump at verified method entry -+ static void check_verified_entry_alignment(address entry, address verified_entry); -+ static void patch_verified_entry(address entry, address verified_entry, address dest); -+}; ++// Double precision float registers have virtual `high halves' that ++// are needed by the allocator. ++// Class for all double registers ++reg_class double_reg( ++ F0, F0_H, ++ F1, F1_H, ++ F2, F2_H, ++ F3, F3_H, ++ F4, F4_H, ++ F5, F5_H, ++ F6, F6_H, ++ F7, F7_H, ++ F8, F8_H, ++ F9, F9_H, ++ F10, F10_H, ++ F11, F11_H, ++ F12, F12_H, ++ F13, F13_H, ++ F14, F14_H, ++ F15, F15_H, ++ F16, F16_H, ++ F17, F17_H, ++ F18, F18_H, ++ F19, F19_H, ++ F20, F20_H, ++ F21, F21_H, ++ F22, F22_H, ++ F23, F23_H, ++ F24, F24_H, ++ F25, F25_H, ++ F26, F26_H, ++ F27, F27_H, ++ F28, F28_H, ++ F29, F29_H, ++ F30, F30_H, ++ F31, F31_H ++); + -+inline NativeJump* nativeJump_at(address addr) { -+ NativeJump* jump = (NativeJump*)(addr - NativeJump::instruction_offset); -+#ifdef ASSERT -+ jump->verify(); -+#endif -+ return jump; -+} ++// Class for 64 bit register f0 ++reg_class f0_reg( ++ F0, F0_H ++); + -+class NativeGeneralJump: public NativeJump { -+public: -+ enum RISCV_specific_constants { -+ instruction_size = 6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, jalr -+ instruction_offset = 0, -+ data_offset = 0, -+ next_instruction_offset = 6 * NativeInstruction::instruction_size // lui, addi, slli, addi, slli, jalr -+ }; ++// Class for 64 bit register f1 ++reg_class f1_reg( ++ F1, F1_H ++); + -+ address jump_destination() const; ++// Class for 64 bit register f2 ++reg_class f2_reg( ++ F2, F2_H ++); + -+ static void insert_unconditional(address code_pos, address entry); -+ static void replace_mt_safe(address instr_addr, address code_buffer); -+}; ++// Class for 64 bit register f3 ++reg_class f3_reg( ++ F3, F3_H ++); + -+inline NativeGeneralJump* nativeGeneralJump_at(address addr) { -+ assert_cond(addr != NULL); -+ NativeGeneralJump* jump = (NativeGeneralJump*)(addr); -+ debug_only(jump->verify();) -+ return jump; -+} ++// class for condition codes ++reg_class reg_flags(RFLAGS); ++%} + -+class NativeIllegalInstruction: public NativeInstruction { -+ public: -+ // Insert illegal opcode as specific address -+ static void insert(address code_pos); -+}; ++//----------DEFINITION BLOCK--------------------------------------------------- ++// Define name --> value mappings to inform the ADLC of an integer valued name ++// Current support includes integer values in the range [0, 0x7FFFFFFF] ++// Format: ++// int_def ( , ); ++// Generated Code in ad_.hpp ++// #define () ++// // value == ++// Generated code in ad_.cpp adlc_verification() ++// assert( == , "Expect () to equal "); ++// + -+inline bool NativeInstruction::is_nop() { -+ uint32_t insn = *(uint32_t*)addr_at(0); -+ return insn == 0x13; -+} ++// we follow the ppc-aix port in using a simple cost model which ranks ++// register operations as cheap, memory ops as more expensive and ++// branches as most expensive. the first two have a low as well as a ++// normal cost. huge cost appears to be a way of saying don't do ++// something + -+inline bool NativeInstruction::is_jump_or_nop() { -+ return is_nop() || is_jump(); -+} ++definitions %{ ++ // The default cost (of a register move instruction). ++ int_def DEFAULT_COST ( 100, 100); ++ int_def ALU_COST ( 100, 1 * DEFAULT_COST); // unknown, const, arith, shift, slt, ++ // multi, auipc, nop, logical, move ++ int_def LOAD_COST ( 300, 3 * DEFAULT_COST); // load, fpload ++ int_def STORE_COST ( 100, 1 * DEFAULT_COST); // store, fpstore ++ int_def XFER_COST ( 300, 3 * DEFAULT_COST); // mfc, mtc, fcvt, fmove, fcmp ++ int_def BRANCH_COST ( 200, 2 * DEFAULT_COST); // branch, jmp, call ++ int_def IMUL_COST ( 1000, 10 * DEFAULT_COST); // imul ++ int_def IDIVSI_COST ( 3400, 34 * DEFAULT_COST); // idivdi ++ int_def IDIVDI_COST ( 6600, 66 * DEFAULT_COST); // idivsi ++ int_def FMUL_SINGLE_COST ( 500, 5 * DEFAULT_COST); // fadd, fmul, fmadd ++ int_def FMUL_DOUBLE_COST ( 700, 7 * DEFAULT_COST); // fadd, fmul, fmadd ++ int_def FDIV_COST ( 2000, 20 * DEFAULT_COST); // fdiv ++ int_def FSQRT_COST ( 2500, 25 * DEFAULT_COST); // fsqrt ++ int_def VOLATILE_REF_COST ( 1000, 10 * DEFAULT_COST); ++%} + -+// Call trampoline stubs. -+class NativeCallTrampolineStub : public NativeInstruction { -+ public: + -+ enum RISCV_specific_constants { -+ // Refer to function emit_trampoline_stub. -+ instruction_size = 3 * NativeInstruction::instruction_size + wordSize, // auipc + ld + jr + target address -+ data_offset = 3 * NativeInstruction::instruction_size, // auipc + ld + jr -+ }; + -+ address destination(nmethod *nm = NULL) const; -+ void set_destination(address new_destination); -+ ptrdiff_t destination_offset() const; -+}; ++//----------SOURCE BLOCK------------------------------------------------------- ++// This is a block of C++ code which provides values, functions, and ++// definitions necessary in the rest of the architecture description + -+inline bool is_NativeCallTrampolineStub_at(address addr) { -+ // Ensure that the stub is exactly -+ // ld t0, L--->auipc + ld -+ // jr t0 -+ // L: ++source_hpp %{ + -+ // judge inst + register + imm -+ // 1). check the instructions: auipc + ld + jalr -+ // 2). check if auipc[11:7] == t0 and ld[11:7] == t0 and ld[19:15] == t0 && jr[19:15] == t0 -+ // 3). check if the offset in ld[31:20] equals the data_offset -+ assert_cond(addr != NULL); -+ const int instr_size = NativeInstruction::instruction_size; -+ if (NativeInstruction::is_auipc_at(addr) && -+ NativeInstruction::is_ld_at(addr + instr_size) && -+ NativeInstruction::is_jalr_at(addr + 2 * instr_size) && -+ (NativeInstruction::extract_rd(addr) == x5) && -+ (NativeInstruction::extract_rd(addr + instr_size) == x5) && -+ (NativeInstruction::extract_rs1(addr + instr_size) == x5) && -+ (NativeInstruction::extract_rs1(addr + 2 * instr_size) == x5) && -+ (Assembler::extract(((unsigned*)addr)[1], 31, 20) == NativeCallTrampolineStub::data_offset)) { -+ return true; -+ } -+ return false; -+} ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/cardTable.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "opto/addnode.hpp" ++#include "opto/convertnode.hpp" + -+inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) { -+ assert_cond(addr != NULL); -+ assert(is_NativeCallTrampolineStub_at(addr), "no call trampoline found"); -+ return (NativeCallTrampolineStub*)addr; -+} ++extern RegMask _ANY_REG32_mask; ++extern RegMask _ANY_REG_mask; ++extern RegMask _PTR_REG_mask; ++extern RegMask _NO_SPECIAL_REG32_mask; ++extern RegMask _NO_SPECIAL_REG_mask; ++extern RegMask _NO_SPECIAL_PTR_REG_mask; + -+class NativeMembar : public NativeInstruction { -+public: -+ uint32_t get_kind(); -+ void set_kind(uint32_t order_kind); -+}; ++class CallStubImpl { + -+inline NativeMembar *NativeMembar_at(address addr) { -+ assert_cond(addr != NULL); -+ assert(nativeInstruction_at(addr)->is_membar(), "no membar found"); -+ return (NativeMembar*)addr; -+} ++ //-------------------------------------------------------------- ++ //---< Used for optimization in Compile::shorten_branches >--- ++ //-------------------------------------------------------------- + -+class NativeFenceI : public NativeInstruction { -+public: -+ static inline int instruction_size() { -+ // 2 for fence.i + fence -+ return (UseConservativeFence ? 2 : 1) * NativeInstruction::instruction_size; ++ public: ++ // Size of call trampoline stub. ++ static uint size_call_trampoline() { ++ return 0; // no call trampolines on this platform ++ } ++ ++ // number of relocations needed by a call trampoline stub ++ static uint reloc_call_trampoline() { ++ return 0; // no call trampolines on this platform + } +}; + -+#endif // CPU_RISCV_NATIVEINST_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.cpp b/src/hotspot/cpu/riscv/registerMap_riscv.cpp -new file mode 100644 -index 00000000000..26c1edc36ff ---- /dev/null -+++ b/src/hotspot/cpu/riscv/registerMap_riscv.cpp -@@ -0,0 +1,45 @@ -+/* -+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++class HandlerImpl { + -+#include "precompiled.hpp" -+#include "runtime/registerMap.hpp" -+#include "vmreg_riscv.inline.hpp" ++ public: + -+address RegisterMap::pd_location(VMReg base_reg, int slot_idx) const { -+ if (base_reg->is_VectorRegister()) { -+ assert(base_reg->is_concrete(), "must pass base reg"); -+ int base_reg_enc = (base_reg->value() - ConcreteRegisterImpl::max_fpr) / -+ VectorRegisterImpl::max_slots_per_register; -+ intptr_t offset_in_bytes = slot_idx * VMRegImpl::stack_slot_size; -+ address base_location = location(base_reg); -+ if (base_location != NULL) { -+ return base_location + offset_in_bytes; -+ } else { -+ return NULL; -+ } -+ } else { -+ return location(base_reg->next(slot_idx)); ++ static int emit_exception_handler(CodeBuffer &cbuf); ++ static int emit_deopt_handler(CodeBuffer& cbuf); ++ ++ static uint size_exception_handler() { ++ return MacroAssembler::far_branch_size(); + } -+} -diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.hpp b/src/hotspot/cpu/riscv/registerMap_riscv.hpp -new file mode 100644 -index 00000000000..f34349811a9 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/registerMap_riscv.hpp -@@ -0,0 +1,43 @@ -+/* -+ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ + -+#ifndef CPU_RISCV_REGISTERMAP_RISCV_HPP -+#define CPU_RISCV_REGISTERMAP_RISCV_HPP ++ static uint size_deopt_handler() { ++ // count auipc + far branch ++ return NativeInstruction::instruction_size + MacroAssembler::far_branch_size(); ++ } ++}; + -+// machine-dependent implemention for register maps -+ friend class frame; ++bool is_CAS(int opcode, bool maybe_volatile); + -+ private: -+ // This is the hook for finding a register in an "well-known" location, -+ // such as a register block of a predetermined format. -+ address pd_location(VMReg reg) const { return NULL; } -+ address pd_location(VMReg base_reg, int slot_idx) const; ++// predicate controlling translation of CompareAndSwapX ++bool needs_acquiring_load_reserved(const Node *load); + -+ // no PD state to clear or copy: -+ void pd_clear() {} -+ void pd_initialize() {} -+ void pd_initialize_from(const RegisterMap* map) {} ++// predicate controlling translation of StoreCM ++bool unnecessary_storestore(const Node *storecm); + -+#endif // CPU_RISCV_REGISTERMAP_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp -new file mode 100644 -index 00000000000..f8116e9df8c ---- /dev/null -+++ b/src/hotspot/cpu/riscv/register_riscv.cpp -@@ -0,0 +1,73 @@ -+/* -+ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++// predicate controlling addressing modes ++bool size_fits_all_mem_uses(AddPNode* addp, int shift); ++%} + -+#include "precompiled.hpp" -+#include "register_riscv.hpp" ++source %{ + -+REGISTER_IMPL_DEFINITION(Register, RegisterImpl, RegisterImpl::number_of_registers); -+REGISTER_IMPL_DEFINITION(FloatRegister, FloatRegisterImpl, FloatRegisterImpl::number_of_registers); -+REGISTER_IMPL_DEFINITION(VectorRegister, VectorRegisterImpl, VectorRegisterImpl::number_of_registers); ++// Derived RegMask with conditionally allocatable registers + -+const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers * -+ RegisterImpl::max_slots_per_register; ++RegMask _ANY_REG32_mask; ++RegMask _ANY_REG_mask; ++RegMask _PTR_REG_mask; ++RegMask _NO_SPECIAL_REG32_mask; ++RegMask _NO_SPECIAL_REG_mask; ++RegMask _NO_SPECIAL_PTR_REG_mask; + -+const int ConcreteRegisterImpl::max_fpr = -+ ConcreteRegisterImpl::max_gpr + -+ FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register; ++void reg_mask_init() { + -+const int ConcreteRegisterImpl::max_vpr = -+ ConcreteRegisterImpl::max_fpr + -+ VectorRegisterImpl::number_of_registers * VectorRegisterImpl::max_slots_per_register; ++ _ANY_REG32_mask = _ALL_REG32_mask; ++ _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(x0->as_VMReg())); + ++ _ANY_REG_mask = _ALL_REG_mask; ++ _ANY_REG_mask.SUBTRACT(_ZR_REG_mask); + -+const char* RegisterImpl::name() const { -+ static const char *const names[number_of_registers] = { -+ "zr", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "fp", "x9", -+ "c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7", -+ "x18", "x19", "esp", "xdispatch", "xbcp", "xthread", "xlocals", -+ "xmonitors", "xcpool", "xheapbase", "x28", "x29", "x30", "xmethod" -+ }; -+ return is_valid() ? names[encoding()] : "noreg"; -+} ++ _PTR_REG_mask = _ALL_REG_mask; ++ _PTR_REG_mask.SUBTRACT(_ZR_REG_mask); + -+const char* FloatRegisterImpl::name() const { -+ static const char *const names[number_of_registers] = { -+ "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", -+ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", -+ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", -+ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31" -+ }; -+ return is_valid() ? names[encoding()] : "noreg"; -+} ++ _NO_SPECIAL_REG32_mask = _ALL_REG32_mask; ++ _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask); + -+const char* VectorRegisterImpl::name() const { -+ static const char *const names[number_of_registers] = { -+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", -+ "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", -+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", -+ "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" -+ }; -+ return is_valid() ? names[encoding()] : "noreg"; -+} -diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp -new file mode 100644 -index 00000000000..a9200cac647 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/register_riscv.hpp -@@ -0,0 +1,324 @@ -+/* -+ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ _NO_SPECIAL_REG_mask = _ALL_REG_mask; ++ _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask); + -+#ifndef CPU_RISCV_REGISTER_RISCV_HPP -+#define CPU_RISCV_REGISTER_RISCV_HPP ++ _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask; ++ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask); + -+#include "asm/register.hpp" ++ // x27 is not allocatable when compressed oops is on ++ if (UseCompressedOops) { ++ _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x27->as_VMReg())); ++ _NO_SPECIAL_REG_mask.SUBTRACT(_HEAPBASE_REG_mask); ++ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_HEAPBASE_REG_mask); ++ } + -+#define CSR_FFLAGS 0x001 // Floating-Point Accrued Exceptions. -+#define CSR_FRM 0x002 // Floating-Point Dynamic Rounding Mode. -+#define CSR_FCSR 0x003 // Floating-Point Control and Status Register (frm + fflags). -+#define CSR_VSTART 0x008 // Vector start position -+#define CSR_VXSAT 0x009 // Fixed-Point Saturate Flag -+#define CSR_VXRM 0x00A // Fixed-Point Rounding Mode -+#define CSR_VCSR 0x00F // Vector control and status register -+#define CSR_VL 0xC20 // Vector length -+#define CSR_VTYPE 0xC21 // Vector data type register -+#define CSR_VLENB 0xC22 // VLEN/8 (vector register length in bytes) -+#define CSR_CYCLE 0xc00 // Cycle counter for RDCYCLE instruction. -+#define CSR_TIME 0xc01 // Timer for RDTIME instruction. -+#define CSR_INSTERT 0xc02 // Instructions-retired counter for RDINSTRET instruction. ++ // x8 is not allocatable when PreserveFramePointer is on ++ if (PreserveFramePointer) { ++ _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x8->as_VMReg())); ++ _NO_SPECIAL_REG_mask.SUBTRACT(_FP_REG_mask); ++ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_FP_REG_mask); ++ } ++} + -+class VMRegImpl; -+typedef VMRegImpl* VMReg; ++// is_CAS(int opcode, bool maybe_volatile) ++// ++// return true if opcode is one of the possible CompareAndSwapX ++// values otherwise false. ++bool is_CAS(int opcode, bool maybe_volatile) ++{ ++ switch (opcode) { ++ // We handle these ++ case Op_CompareAndSwapI: ++ case Op_CompareAndSwapL: ++ case Op_CompareAndSwapP: ++ case Op_CompareAndSwapN: ++#if INCLUDE_SHENANDOAHGC ++ case Op_ShenandoahCompareAndSwapP: ++ case Op_ShenandoahCompareAndSwapN: ++#endif ++ case Op_CompareAndSwapB: ++ case Op_CompareAndSwapS: ++ case Op_GetAndSetI: ++ case Op_GetAndSetL: ++ case Op_GetAndSetP: ++ case Op_GetAndSetN: ++ case Op_GetAndAddI: ++ case Op_GetAndAddL: ++ return true; ++ case Op_CompareAndExchangeI: ++ case Op_CompareAndExchangeN: ++ case Op_CompareAndExchangeB: ++ case Op_CompareAndExchangeS: ++ case Op_CompareAndExchangeL: ++ case Op_CompareAndExchangeP: ++ case Op_WeakCompareAndSwapB: ++ case Op_WeakCompareAndSwapS: ++ case Op_WeakCompareAndSwapI: ++ case Op_WeakCompareAndSwapL: ++ case Op_WeakCompareAndSwapP: ++ case Op_WeakCompareAndSwapN: ++ return maybe_volatile; ++ default: ++ return false; ++ } ++} + -+// Use Register as shortcut -+class RegisterImpl; -+typedef const RegisterImpl* Register; ++// predicate controlling translation of CAS ++// ++// returns true if CAS needs to use an acquiring load otherwise false ++bool needs_acquiring_load_reserved(const Node *n) ++{ ++ assert(n != NULL && is_CAS(n->Opcode(), true), "expecting a compare and swap"); + -+inline constexpr Register as_Register(int encoding); ++ LoadStoreNode* ldst = n->as_LoadStore(); ++ if (n != NULL && is_CAS(n->Opcode(), false)) { ++ assert(ldst != NULL && ldst->trailing_membar() != NULL, "expected trailing membar"); ++ } else { ++ return ldst != NULL && ldst->trailing_membar() != NULL; ++ } ++ // so we can just return true here ++ return true; ++} + -+class RegisterImpl: public AbstractRegisterImpl { -+ static constexpr Register first(); ++// predicate controlling translation of StoreCM ++// ++// returns true if a StoreStore must precede the card write otherwise ++// false + -+ public: -+ enum { -+ number_of_registers = 32, -+ max_slots_per_register = 2, ++bool unnecessary_storestore(const Node *storecm) ++{ ++ assert(storecm->Opcode() == Op_StoreCM, "expecting a StoreCM"); + -+ // integer registers x8 - x15 and floating-point registers f8 - f15 are allocatable -+ // for compressed instructions. See Table 17.2 in spec. -+ compressed_register_base = 8, -+ compressed_register_top = 15, -+ }; ++ // we need to generate a dmb ishst between an object put and the ++ // associated card mark when we are using CMS without conditional ++ // card marking + -+ // derived registers, offsets, and addresses -+ const Register successor() const { return this + 1; } ++ if (UseConcMarkSweepGC && !UseCondCardMark) { ++ return false; ++ } + -+ // construction -+ inline friend constexpr Register as_Register(int encoding); ++ // a storestore is unnecesary in all other cases + -+ VMReg as_VMReg() const; ++ return true; ++} + -+ // accessors -+ int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); } -+ int encoding_nocheck() const { return this - first(); } -+ bool is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; } -+ const char* name() const; ++#define __ _masm. + -+ // for rvc -+ int compressed_encoding() const { -+ assert(is_compressed_valid(), "invalid compressed register"); -+ return encoding() - compressed_register_base; -+ } ++// advance declarations for helper functions to convert register ++// indices to register objects + -+ int compressed_encoding_nocheck() const { -+ return encoding_nocheck() - compressed_register_base; -+ } ++// the ad file has to provide implementations of certain methods ++// expected by the generic code ++// ++// REQUIRED FUNCTIONALITY + -+ bool is_compressed_valid() const { -+ return encoding_nocheck() >= compressed_register_base && -+ encoding_nocheck() <= compressed_register_top; -+ } -+}; ++//============================================================================= + -+REGISTER_IMPL_DECLARATION(Register, RegisterImpl, RegisterImpl::number_of_registers); ++// !!!!! Special hack to get all types of calls to specify the byte offset ++// from the start of the call to the point where the return address ++// will point. + -+// The integer registers of the RISCV architecture ++int MachCallStaticJavaNode::ret_addr_offset() ++{ ++ // jal ++ return 1 * NativeInstruction::instruction_size; ++} + -+CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); ++int MachCallDynamicJavaNode::ret_addr_offset() ++{ ++ return 7 * NativeInstruction::instruction_size; // movptr, jal ++} + -+CONSTANT_REGISTER_DECLARATION(Register, x0, (0)); -+CONSTANT_REGISTER_DECLARATION(Register, x1, (1)); -+CONSTANT_REGISTER_DECLARATION(Register, x2, (2)); -+CONSTANT_REGISTER_DECLARATION(Register, x3, (3)); -+CONSTANT_REGISTER_DECLARATION(Register, x4, (4)); -+CONSTANT_REGISTER_DECLARATION(Register, x5, (5)); -+CONSTANT_REGISTER_DECLARATION(Register, x6, (6)); -+CONSTANT_REGISTER_DECLARATION(Register, x7, (7)); -+CONSTANT_REGISTER_DECLARATION(Register, x8, (8)); -+CONSTANT_REGISTER_DECLARATION(Register, x9, (9)); -+CONSTANT_REGISTER_DECLARATION(Register, x10, (10)); -+CONSTANT_REGISTER_DECLARATION(Register, x11, (11)); -+CONSTANT_REGISTER_DECLARATION(Register, x12, (12)); -+CONSTANT_REGISTER_DECLARATION(Register, x13, (13)); -+CONSTANT_REGISTER_DECLARATION(Register, x14, (14)); -+CONSTANT_REGISTER_DECLARATION(Register, x15, (15)); -+CONSTANT_REGISTER_DECLARATION(Register, x16, (16)); -+CONSTANT_REGISTER_DECLARATION(Register, x17, (17)); -+CONSTANT_REGISTER_DECLARATION(Register, x18, (18)); -+CONSTANT_REGISTER_DECLARATION(Register, x19, (19)); -+CONSTANT_REGISTER_DECLARATION(Register, x20, (20)); -+CONSTANT_REGISTER_DECLARATION(Register, x21, (21)); -+CONSTANT_REGISTER_DECLARATION(Register, x22, (22)); -+CONSTANT_REGISTER_DECLARATION(Register, x23, (23)); -+CONSTANT_REGISTER_DECLARATION(Register, x24, (24)); -+CONSTANT_REGISTER_DECLARATION(Register, x25, (25)); -+CONSTANT_REGISTER_DECLARATION(Register, x26, (26)); -+CONSTANT_REGISTER_DECLARATION(Register, x27, (27)); -+CONSTANT_REGISTER_DECLARATION(Register, x28, (28)); -+CONSTANT_REGISTER_DECLARATION(Register, x29, (29)); -+CONSTANT_REGISTER_DECLARATION(Register, x30, (30)); -+CONSTANT_REGISTER_DECLARATION(Register, x31, (31)); ++int MachCallRuntimeNode::ret_addr_offset() { ++ // for generated stubs the call will be ++ // jal(addr) ++ // or with far branches ++ // jal(trampoline_stub) ++ // for real runtime callouts it will be 11 instructions ++ // see riscv_enc_java_to_runtime ++ // la(t1, retaddr) -> auipc + addi ++ // la(t0, RuntimeAddress(addr)) -> lui + addi + slli + addi + slli + addi ++ // addi(sp, sp, -2 * wordSize) -> addi ++ // sd(t1, Address(sp, wordSize)) -> sd ++ // jalr(t0) -> jalr ++ CodeBlob *cb = CodeCache::find_blob(_entry_point); ++ if (cb != NULL) { ++ return 1 * NativeInstruction::instruction_size; ++ } else { ++ return 11 * NativeInstruction::instruction_size; ++ } ++} + -+// Use FloatRegister as shortcut -+class FloatRegisterImpl; -+typedef const FloatRegisterImpl* FloatRegister; ++// ++// Compute padding required for nodes which need alignment ++// + -+inline constexpr FloatRegister as_FloatRegister(int encoding); ++// With RVC a call instruction may get 2-byte aligned. ++// The address of the call instruction needs to be 4-byte aligned to ++// ensure that it does not span a cache line so that it can be patched. ++int CallStaticJavaDirectNode::compute_padding(int current_offset) const ++{ ++ // to make sure the address of jal 4-byte aligned. ++ return align_up(current_offset, alignment_required()) - current_offset; ++} + -+// The implementation of floating point registers for the architecture -+class FloatRegisterImpl: public AbstractRegisterImpl { -+ static constexpr FloatRegister first(); ++// With RVC a call instruction may get 2-byte aligned. ++// The address of the call instruction needs to be 4-byte aligned to ++// ensure that it does not span a cache line so that it can be patched. ++int CallDynamicJavaDirectNode::compute_padding(int current_offset) const ++{ ++ // skip the movptr in MacroAssembler::ic_call(): ++ // lui + addi + slli + addi + slli + addi ++ // Though movptr() has already 4-byte aligned with or without RVC, ++ // We need to prevent from further changes by explicitly calculating the size. ++ const int movptr_size = 6 * NativeInstruction::instruction_size; ++ current_offset += movptr_size; ++ // to make sure the address of jal 4-byte aligned. ++ return align_up(current_offset, alignment_required()) - current_offset; ++} + -+ public: -+ enum { -+ number_of_registers = 32, -+ max_slots_per_register = 2, ++// Indicate if the safepoint node needs the polling page as an input + -+ // float registers in the range of [f8~f15] correspond to RVC. Please see Table 16.2 in spec. -+ compressed_register_base = 8, -+ compressed_register_top = 15, -+ }; ++// the shared code plants the oop data at the start of the generated ++// code for the safepoint node and that needs ot be at the load ++// instruction itself. so we cannot plant a mov of the safepoint poll ++// address followed by a load. setting this to true means the mov is ++// scheduled as a prior instruction. that's better for scheduling ++// anyway. + -+ // construction -+ inline friend constexpr FloatRegister as_FloatRegister(int encoding); ++bool SafePointNode::needs_polling_address_input() ++{ ++ return true; ++} + -+ VMReg as_VMReg() const; ++//============================================================================= + -+ // derived registers, offsets, and addresses -+ FloatRegister successor() const { -+ return as_FloatRegister((encoding() + 1) % (unsigned)number_of_registers); -+ } ++#ifndef PRODUCT ++void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const { ++ assert_cond(st != NULL); ++ st->print("BREAKPOINT"); ++} ++#endif + -+ // accessors -+ int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); } -+ int encoding_nocheck() const { return this - first(); } -+ int is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; } -+ const char* name() const; ++void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ MacroAssembler _masm(&cbuf); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ ebreak(); ++} + -+ // for rvc -+ int compressed_encoding() const { -+ assert(is_compressed_valid(), "invalid compressed register"); -+ return encoding() - compressed_register_base; -+ } ++uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); ++} + -+ int compressed_encoding_nocheck() const { -+ return encoding_nocheck() - compressed_register_base; ++//============================================================================= ++ ++#ifndef PRODUCT ++ void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { ++ st->print("nop \t# %d bytes pad for loops and calls", _count); + } ++#endif + -+ bool is_compressed_valid() const { -+ return encoding_nocheck() >= compressed_register_base && -+ encoding_nocheck() <= compressed_register_top; ++ void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { ++ MacroAssembler _masm(&cbuf); ++ Assembler::CompressibleRegion cr(&_masm); // nops shall be 2-byte under RVC for alignment purposes. ++ for (int i = 0; i < _count; i++) { ++ __ nop(); ++ } + } -+}; + -+REGISTER_IMPL_DECLARATION(FloatRegister, FloatRegisterImpl, FloatRegisterImpl::number_of_registers); ++ uint MachNopNode::size(PhaseRegAlloc*) const { ++ return _count * (UseRVC ? NativeInstruction::compressed_instruction_size : NativeInstruction::instruction_size); ++ } + -+// The float registers of the RISCV architecture ++//============================================================================= ++const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; + -+CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1)); ++int Compile::ConstantTable::calculate_table_base_offset() const { ++ return 0; // absolute addressing, no offset ++} + -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f0 , ( 0)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f1 , ( 1)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f2 , ( 2)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f3 , ( 3)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f4 , ( 4)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f5 , ( 5)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f6 , ( 6)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f7 , ( 7)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f8 , ( 8)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f9 , ( 9)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f10 , (10)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f11 , (11)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f12 , (12)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f13 , (13)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f14 , (14)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f15 , (15)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f16 , (16)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f17 , (17)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f18 , (18)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f19 , (19)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f20 , (20)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f21 , (21)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f22 , (22)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f23 , (23)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f24 , (24)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f25 , (25)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f26 , (26)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f27 , (27)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f28 , (28)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f29 , (29)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f30 , (30)); -+CONSTANT_REGISTER_DECLARATION(FloatRegister, f31 , (31)); ++bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } ++void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) { ++ ShouldNotReachHere(); ++} + -+// Use VectorRegister as shortcut -+class VectorRegisterImpl; -+typedef const VectorRegisterImpl* VectorRegister; ++void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { ++ // Empty encoding ++} + -+inline constexpr VectorRegister as_VectorRegister(int encoding); ++uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { ++ return 0; ++} + -+// The implementation of vector registers for RVV -+class VectorRegisterImpl: public AbstractRegisterImpl { -+ static constexpr VectorRegister first(); ++#ifndef PRODUCT ++void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { ++ assert_cond(st != NULL); ++ st->print("-- \t// MachConstantBaseNode (empty encoding)"); ++} ++#endif + -+ public: -+ enum { -+ number_of_registers = 32, -+ max_slots_per_register = 4 -+ }; ++#ifndef PRODUCT ++void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const { ++ assert_cond(st != NULL && ra_ != NULL); ++ Compile* C = ra_->C; + -+ // construction -+ inline friend constexpr VectorRegister as_VectorRegister(int encoding); ++ int framesize = C->frame_slots() << LogBytesPerInt; + -+ VMReg as_VMReg() const; ++ if (C->need_stack_bang(framesize)) { ++ st->print("# stack bang size=%d\n\t", framesize); ++ } + -+ // derived registers, offsets, and addresses -+ VectorRegister successor() const { return this + 1; } ++ st->print("sd fp, [sp, #%d]\n\t", - 2 * wordSize); ++ st->print("sd ra, [sp, #%d]\n\t", - wordSize); ++ if (PreserveFramePointer) { st->print("sub fp, sp, #%d\n\t", 2 * wordSize); } ++ st->print("sub sp, sp, #%d\n\t", framesize); ++} ++#endif + -+ // accessors -+ int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); } -+ int encoding_nocheck() const { return this - first(); } -+ bool is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; } -+ const char* name() const; ++void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ assert_cond(ra_ != NULL); ++ Compile* C = ra_->C; ++ MacroAssembler _masm(&cbuf); + -+}; ++ // n.b. frame size includes space for return pc and fp ++ const int framesize = C->frame_size_in_bytes(); + -+REGISTER_IMPL_DECLARATION(VectorRegister, VectorRegisterImpl, VectorRegisterImpl::number_of_registers); ++ // insert a nop at the start of the prolog so we can patch in a ++ // branch if we need to invalidate the method later ++ __ nop(); + -+// The vector registers of RVV -+CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg , (-1)); ++ assert_cond(C != NULL); + -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v0 , ( 0)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v1 , ( 1)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v2 , ( 2)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v3 , ( 3)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v4 , ( 4)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v5 , ( 5)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v6 , ( 6)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v7 , ( 7)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v8 , ( 8)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v9 , ( 9)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v10 , (10)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v11 , (11)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v12 , (12)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v13 , (13)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v14 , (14)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v15 , (15)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v16 , (16)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v17 , (17)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v18 , (18)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v19 , (19)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v20 , (20)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v21 , (21)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v22 , (22)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v23 , (23)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v24 , (24)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v25 , (25)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v26 , (26)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v27 , (27)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v28 , (28)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v29 , (29)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v30 , (30)); -+CONSTANT_REGISTER_DECLARATION(VectorRegister, v31 , (31)); ++ int bangsize = C->bang_size_in_bytes(); ++ if (C->need_stack_bang(bangsize)) { ++ __ generate_stack_overflow_check(bangsize); ++ } + ++ __ build_frame(framesize); + -+// Need to know the total number of registers of all sorts for SharedInfo. -+// Define a class that exports it. -+class ConcreteRegisterImpl : public AbstractRegisterImpl { -+ public: -+ enum { -+ // A big enough number for C2: all the registers plus flags -+ // This number must be large enough to cover REG_COUNT (defined by c2) registers. -+ // There is no requirement that any ordering here matches any ordering c2 gives -+ // it's optoregs. ++ if (VerifyStackAtCalls) { ++ Unimplemented(); ++ } + -+ number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers + -+ FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers + -+ VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers) -+ }; ++ C->set_frame_complete(cbuf.insts_size()); + -+ // added to make it compile -+ static const int max_gpr; -+ static const int max_fpr; -+ static const int max_vpr; -+}; ++ if (C->has_mach_constant_base_node()) { ++ // NOTE: We set the table base offset here because users might be ++ // emitted before MachConstantBaseNode. ++ Compile::ConstantTable& constant_table = C->constant_table(); ++ constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); ++ } ++} + -+typedef AbstractRegSet RegSet; -+typedef AbstractRegSet FloatRegSet; -+typedef AbstractRegSet VectorRegSet; ++uint MachPrologNode::size(PhaseRegAlloc* ra_) const ++{ ++ assert_cond(ra_ != NULL); ++ return MachNode::size(ra_); // too many variables; just compute it ++ // the hard way ++} + -+#endif // CPU_RISCV_REGISTER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp -new file mode 100644 -index 00000000000..228a64eae2c ---- /dev/null -+++ b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp -@@ -0,0 +1,113 @@ -+/* -+ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++int MachPrologNode::reloc() const ++{ ++ return 0; ++} + -+#include "precompiled.hpp" -+#include "asm/macroAssembler.hpp" -+#include "code/relocInfo.hpp" -+#include "nativeInst_riscv.hpp" -+#include "oops/oop.inline.hpp" -+#include "runtime/safepoint.hpp" ++//============================================================================= + -+void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { -+ if (verify_only) { -+ return; -+ } ++#ifndef PRODUCT ++void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const { ++ assert_cond(st != NULL && ra_ != NULL); ++ Compile* C = ra_->C; ++ assert_cond(C != NULL); ++ int framesize = C->frame_size_in_bytes(); + -+ int bytes; ++ st->print("# pop frame %d\n\t", framesize); + -+ switch (type()) { -+ case relocInfo::oop_type: { -+ oop_Relocation *reloc = (oop_Relocation *)this; -+ // in movoop when BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate -+ if (NativeInstruction::is_load_pc_relative_at(addr())) { -+ address constptr = (address)code()->oop_addr_at(reloc->oop_index()); -+ bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr); -+ assert(*(address*)constptr == x, "error in oop relocation"); -+ } else { -+ bytes = MacroAssembler::patch_oop(addr(), x); -+ } -+ break; -+ } -+ default: -+ bytes = MacroAssembler::pd_patch_instruction_size(addr(), x); -+ break; ++ if (framesize == 0) { ++ st->print("ld ra, [sp,#%d]\n\t", (2 * wordSize)); ++ st->print("ld fp, [sp,#%d]\n\t", (3 * wordSize)); ++ st->print("add sp, sp, #%d\n\t", (2 * wordSize)); ++ } else { ++ st->print("add sp, sp, #%d\n\t", framesize); ++ st->print("ld ra, [sp,#%d]\n\t", - 2 * wordSize); ++ st->print("ld fp, [sp,#%d]\n\t", - wordSize); + } -+ ICache::invalidate_range(addr(), bytes); -+} + -+address Relocation::pd_call_destination(address orig_addr) { -+ assert(is_call(), "should be an address instruction here"); -+ if (NativeCall::is_call_at(addr())) { -+ address trampoline = nativeCall_at(addr())->get_trampoline(); -+ if (trampoline != NULL) { -+ return nativeCallTrampolineStub_at(trampoline)->destination(); -+ } -+ } -+ if (orig_addr != NULL) { -+ // the extracted address from the instructions in address orig_addr -+ address new_addr = MacroAssembler::pd_call_destination(orig_addr); -+ // If call is branch to self, don't try to relocate it, just leave it -+ // as branch to self. This happens during code generation if the code -+ // buffer expands. It will be relocated to the trampoline above once -+ // code generation is complete. -+ new_addr = (new_addr == orig_addr) ? addr() : new_addr; -+ return new_addr; ++ if (do_polling() && C->is_method_compilation()) { ++ st->print("# touch polling page\n\t"); ++ st->print("li t0, #0x%lx\n\t", p2i(os::get_polling_page())); ++ st->print("ld zr, [t0]"); + } -+ return MacroAssembler::pd_call_destination(addr()); +} ++#endif + -+void Relocation::pd_set_call_destination(address x) { -+ assert(is_call(), "should be an address instruction here"); -+ if (NativeCall::is_call_at(addr())) { -+ address trampoline = nativeCall_at(addr())->get_trampoline(); -+ if (trampoline != NULL) { -+ nativeCall_at(addr())->set_destination_mt_safe(x, /* assert_lock */false); -+ return; -+ } ++void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ assert_cond(ra_ != NULL); ++ Compile* C = ra_->C; ++ MacroAssembler _masm(&cbuf); ++ assert_cond(C != NULL); ++ int framesize = C->frame_size_in_bytes(); ++ ++ __ remove_frame(framesize); ++ ++ if (StackReservedPages > 0 && C->has_reserved_stack_access()) { ++ __ reserved_stack_check(); + } -+ MacroAssembler::pd_patch_instruction_size(addr(), x); -+ address pd_call = pd_call_destination(addr()); -+ assert(pd_call == x, "fail in reloc"); -+} + -+address* Relocation::pd_address_in_code() { -+ assert(NativeCall::is_load_pc_relative_at(addr()), "Not the expected instruction sequence!"); -+ return (address*)(MacroAssembler::target_addr_for_insn(addr())); ++ if (do_polling() && C->is_method_compilation()) { ++ __ read_polling_page(t0, os::get_polling_page(), relocInfo::poll_return_type); ++ } +} + -+address Relocation::pd_get_address_from_code() { -+ return MacroAssembler::pd_call_destination(addr()); ++uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { ++ assert_cond(ra_ != NULL); ++ // Variable size. Determine dynamically. ++ return MachNode::size(ra_); +} + -+void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { -+ if (NativeInstruction::maybe_cpool_ref(addr())) { -+ address old_addr = old_addr_for(addr(), src, dest); -+ MacroAssembler::pd_patch_instruction_size(addr(), MacroAssembler::target_addr_for_insn(old_addr)); -+ } ++int MachEpilogNode::reloc() const { ++ // Return number of relocatable values contained in this instruction. ++ return 1; // 1 for polling page. ++} ++const Pipeline * MachEpilogNode::pipeline() const { ++ return MachNode::pipeline_class(); +} + -+void metadata_Relocation::pd_fix_value(address x) { ++// This method seems to be obsolete. It is declared in machnode.hpp ++// and defined in all *.ad files, but it is never called. Should we ++// get rid of it? ++int MachEpilogNode::safepoint_offset() const { ++ assert(do_polling(), "no return for this epilog node"); ++ return 4; +} -diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.hpp b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp -new file mode 100644 -index 00000000000..840ed935d88 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp -@@ -0,0 +1,44 @@ -+/* -+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ + -+#ifndef CPU_RISCV_RELOCINFO_RISCV_HPP -+#define CPU_RISCV_RELOCINFO_RISCV_HPP ++//============================================================================= + -+ // machine-dependent parts of class relocInfo -+ private: -+ enum { -+ // Relocations are byte-aligned. -+ offset_unit = 1, -+ // Must be at least 1 for RelocInfo::narrow_oop_in_const. -+ format_width = 1 -+ }; ++// Figure out which register class each belongs in: rc_int, rc_float or ++// rc_stack. ++enum RC { rc_bad, rc_int, rc_float, rc_stack }; + -+ public: ++static enum RC rc_class(OptoReg::Name reg) { + -+ // This platform has no oops in the code that are not also -+ // listed in the oop section. -+ static bool mustIterateImmediateOopsInCode() { return false; } ++ if (reg == OptoReg::Bad) { ++ return rc_bad; ++ } + -+#endif // CPU_RISCV_RELOCINFO_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -new file mode 100644 -index 00000000000..588887e1d96 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -0,0 +1,10611 @@ -+// -+// Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. -+// Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. -+// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+// -+// This code is free software; you can redistribute it and/or modify it -+// under the terms of the GNU General Public License version 2 only, as -+// published by the Free Software Foundation. -+// -+// This code is distributed in the hope that it will be useful, but WITHOUT -+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+// version 2 for more details (a copy is included in the LICENSE file that -+// accompanied this code). -+// -+// You should have received a copy of the GNU General Public License version -+// 2 along with this work; if not, write to the Free Software Foundation, -+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+// -+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+// or visit www.oracle.com if you need additional information or have any -+// questions. -+// -+// ++ // we have 30 int registers * 2 halves ++ // (t0 and t1 are omitted) ++ int slots_of_int_registers = RegisterImpl::max_slots_per_register * (RegisterImpl::number_of_registers - 2); ++ if (reg < slots_of_int_registers) { ++ return rc_int; ++ } + -+// RISCV Architecture Description File ++ // we have 32 float register * 2 halves ++ int slots_of_float_registers = FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers; ++ if (reg < slots_of_int_registers + slots_of_float_registers) { ++ return rc_float; ++ } + -+//----------REGISTER DEFINITION BLOCK------------------------------------------ -+// This information is used by the matcher and the register allocator to -+// describe individual registers and classes of registers within the target -+// archtecture. ++ // Between float regs & stack is the flags regs. ++ assert(OptoReg::is_stack(reg), "blow up if spilling flags"); + -+register %{ -+//----------Architecture Description Register Definitions---------------------- -+// General Registers -+// "reg_def" name ( register save type, C convention save type, -+// ideal register type, encoding ); -+// Register Save Types: -+// -+// NS = No-Save: The register allocator assumes that these registers -+// can be used without saving upon entry to the method, & -+// that they do not need to be saved at call sites. -+// -+// SOC = Save-On-Call: The register allocator assumes that these registers -+// can be used without saving upon entry to the method, -+// but that they must be saved at call sites. -+// -+// SOE = Save-On-Entry: The register allocator assumes that these registers -+// must be saved before using them upon entry to the -+// method, but they do not need to be saved at call -+// sites. -+// -+// AS = Always-Save: The register allocator assumes that these registers -+// must be saved before using them upon entry to the -+// method, & that they must be saved at call sites. -+// -+// Ideal Register Type is used to determine how to save & restore a -+// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get -+// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. -+// -+// The encoding number is the actual bit-pattern placed into the opcodes. ++ return rc_stack; ++} + -+// We must define the 64 bit int registers in two 32 bit halves, the -+// real lower register and a virtual upper half register. upper halves -+// are used by the register allocator but are not actually supplied as -+// operands to memory ops. -+// -+// follow the C1 compiler in making registers -+// -+// x7, x9-x17, x27-x31 volatile (caller save) -+// x0-x4, x8, x23 system (no save, no allocate) -+// x5-x6 non-allocatable (so we can use them as temporary regs) ++uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const { ++ assert_cond(ra_ != NULL); ++ Compile* C = ra_->C; + -+// -+// as regards Java usage. we don't use any callee save registers -+// because this makes it difficult to de-optimise a frame (see comment -+// in x86 implementation of Deoptimization::unwind_callee_save_values) -+// ++ // Get registers to move. ++ OptoReg::Name src_hi = ra_->get_reg_second(in(1)); ++ OptoReg::Name src_lo = ra_->get_reg_first(in(1)); ++ OptoReg::Name dst_hi = ra_->get_reg_second(this); ++ OptoReg::Name dst_lo = ra_->get_reg_first(this); + -+// General Registers ++ enum RC src_hi_rc = rc_class(src_hi); ++ enum RC src_lo_rc = rc_class(src_lo); ++ enum RC dst_hi_rc = rc_class(dst_hi); ++ enum RC dst_lo_rc = rc_class(dst_lo); + -+reg_def R0 ( NS, NS, Op_RegI, 0, x0->as_VMReg() ); // zr -+reg_def R0_H ( NS, NS, Op_RegI, 0, x0->as_VMReg()->next() ); -+reg_def R1 ( NS, SOC, Op_RegI, 1, x1->as_VMReg() ); // ra -+reg_def R1_H ( NS, SOC, Op_RegI, 1, x1->as_VMReg()->next() ); -+reg_def R2 ( NS, SOE, Op_RegI, 2, x2->as_VMReg() ); // sp -+reg_def R2_H ( NS, SOE, Op_RegI, 2, x2->as_VMReg()->next() ); -+reg_def R3 ( NS, NS, Op_RegI, 3, x3->as_VMReg() ); // gp -+reg_def R3_H ( NS, NS, Op_RegI, 3, x3->as_VMReg()->next() ); -+reg_def R4 ( NS, NS, Op_RegI, 4, x4->as_VMReg() ); // tp -+reg_def R4_H ( NS, NS, Op_RegI, 4, x4->as_VMReg()->next() ); -+reg_def R7 ( SOC, SOC, Op_RegI, 7, x7->as_VMReg() ); -+reg_def R7_H ( SOC, SOC, Op_RegI, 7, x7->as_VMReg()->next() ); -+reg_def R8 ( NS, SOE, Op_RegI, 8, x8->as_VMReg() ); // fp -+reg_def R8_H ( NS, SOE, Op_RegI, 8, x8->as_VMReg()->next() ); -+reg_def R9 ( SOC, SOE, Op_RegI, 9, x9->as_VMReg() ); -+reg_def R9_H ( SOC, SOE, Op_RegI, 9, x9->as_VMReg()->next() ); -+reg_def R10 ( SOC, SOC, Op_RegI, 10, x10->as_VMReg() ); -+reg_def R10_H ( SOC, SOC, Op_RegI, 10, x10->as_VMReg()->next()); -+reg_def R11 ( SOC, SOC, Op_RegI, 11, x11->as_VMReg() ); -+reg_def R11_H ( SOC, SOC, Op_RegI, 11, x11->as_VMReg()->next()); -+reg_def R12 ( SOC, SOC, Op_RegI, 12, x12->as_VMReg() ); -+reg_def R12_H ( SOC, SOC, Op_RegI, 12, x12->as_VMReg()->next()); -+reg_def R13 ( SOC, SOC, Op_RegI, 13, x13->as_VMReg() ); -+reg_def R13_H ( SOC, SOC, Op_RegI, 13, x13->as_VMReg()->next()); -+reg_def R14 ( SOC, SOC, Op_RegI, 14, x14->as_VMReg() ); -+reg_def R14_H ( SOC, SOC, Op_RegI, 14, x14->as_VMReg()->next()); -+reg_def R15 ( SOC, SOC, Op_RegI, 15, x15->as_VMReg() ); -+reg_def R15_H ( SOC, SOC, Op_RegI, 15, x15->as_VMReg()->next()); -+reg_def R16 ( SOC, SOC, Op_RegI, 16, x16->as_VMReg() ); -+reg_def R16_H ( SOC, SOC, Op_RegI, 16, x16->as_VMReg()->next()); -+reg_def R17 ( SOC, SOC, Op_RegI, 17, x17->as_VMReg() ); -+reg_def R17_H ( SOC, SOC, Op_RegI, 17, x17->as_VMReg()->next()); -+reg_def R18 ( SOC, SOE, Op_RegI, 18, x18->as_VMReg() ); -+reg_def R18_H ( SOC, SOE, Op_RegI, 18, x18->as_VMReg()->next()); -+reg_def R19 ( SOC, SOE, Op_RegI, 19, x19->as_VMReg() ); -+reg_def R19_H ( SOC, SOE, Op_RegI, 19, x19->as_VMReg()->next()); -+reg_def R20 ( SOC, SOE, Op_RegI, 20, x20->as_VMReg() ); // caller esp -+reg_def R20_H ( SOC, SOE, Op_RegI, 20, x20->as_VMReg()->next()); -+reg_def R21 ( SOC, SOE, Op_RegI, 21, x21->as_VMReg() ); -+reg_def R21_H ( SOC, SOE, Op_RegI, 21, x21->as_VMReg()->next()); -+reg_def R22 ( SOC, SOE, Op_RegI, 22, x22->as_VMReg() ); -+reg_def R22_H ( SOC, SOE, Op_RegI, 22, x22->as_VMReg()->next()); -+reg_def R23 ( NS, SOE, Op_RegI, 23, x23->as_VMReg() ); // java thread -+reg_def R23_H ( NS, SOE, Op_RegI, 23, x23->as_VMReg()->next()); -+reg_def R24 ( SOC, SOE, Op_RegI, 24, x24->as_VMReg() ); -+reg_def R24_H ( SOC, SOE, Op_RegI, 24, x24->as_VMReg()->next()); -+reg_def R25 ( SOC, SOE, Op_RegI, 25, x25->as_VMReg() ); -+reg_def R25_H ( SOC, SOE, Op_RegI, 25, x25->as_VMReg()->next()); -+reg_def R26 ( SOC, SOE, Op_RegI, 26, x26->as_VMReg() ); -+reg_def R26_H ( SOC, SOE, Op_RegI, 26, x26->as_VMReg()->next()); -+reg_def R27 ( SOC, SOE, Op_RegI, 27, x27->as_VMReg() ); // heapbase -+reg_def R27_H ( SOC, SOE, Op_RegI, 27, x27->as_VMReg()->next()); -+reg_def R28 ( SOC, SOC, Op_RegI, 28, x28->as_VMReg() ); -+reg_def R28_H ( SOC, SOC, Op_RegI, 28, x28->as_VMReg()->next()); -+reg_def R29 ( SOC, SOC, Op_RegI, 29, x29->as_VMReg() ); -+reg_def R29_H ( SOC, SOC, Op_RegI, 29, x29->as_VMReg()->next()); -+reg_def R30 ( SOC, SOC, Op_RegI, 30, x30->as_VMReg() ); -+reg_def R30_H ( SOC, SOC, Op_RegI, 30, x30->as_VMReg()->next()); -+reg_def R31 ( SOC, SOC, Op_RegI, 31, x31->as_VMReg() ); -+reg_def R31_H ( SOC, SOC, Op_RegI, 31, x31->as_VMReg()->next()); ++ assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register"); + -+// ---------------------------- -+// Float/Double Registers -+// ---------------------------- ++ if (src_hi != OptoReg::Bad) { ++ assert((src_lo & 1) == 0 && src_lo + 1 == src_hi && ++ (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi, ++ "expected aligned-adjacent pairs"); ++ } + -+// Double Registers ++ if (src_lo == dst_lo && src_hi == dst_hi) { ++ return 0; // Self copy, no move. ++ } + -+// The rules of ADL require that double registers be defined in pairs. -+// Each pair must be two 32-bit values, but not necessarily a pair of -+// single float registers. In each pair, ADLC-assigned register numbers -+// must be adjacent, with the lower number even. Finally, when the -+// CPU stores such a register pair to memory, the word associated with -+// the lower ADLC-assigned number must be stored to the lower address. ++ bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi && ++ (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi; ++ int src_offset = ra_->reg2offset(src_lo); ++ int dst_offset = ra_->reg2offset(dst_lo); + -+// RISCV has 32 floating-point registers. Each can store a single -+// or double precision floating-point value. ++ if (cbuf != NULL) { ++ MacroAssembler _masm(cbuf); ++ Assembler::CompressibleRegion cr(&_masm); ++ switch (src_lo_rc) { ++ case rc_int: ++ if (dst_lo_rc == rc_int) { // gpr --> gpr copy ++ if (!is64 && this->ideal_reg() != Op_RegI) { // zero extended for narrow oop or klass ++ __ zero_extend(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo]), 32); ++ } else { ++ __ mv(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo])); ++ } ++ } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy ++ if (is64) { ++ __ fmv_d_x(as_FloatRegister(Matcher::_regEncode[dst_lo]), ++ as_Register(Matcher::_regEncode[src_lo])); ++ } else { ++ __ fmv_w_x(as_FloatRegister(Matcher::_regEncode[dst_lo]), ++ as_Register(Matcher::_regEncode[src_lo])); ++ } ++ } else { // gpr --> stack spill ++ assert(dst_lo_rc == rc_stack, "spill to bad register class"); ++ __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset); ++ } ++ break; ++ case rc_float: ++ if (dst_lo_rc == rc_int) { // fpr --> gpr copy ++ if (is64) { ++ __ fmv_x_d(as_Register(Matcher::_regEncode[dst_lo]), ++ as_FloatRegister(Matcher::_regEncode[src_lo])); ++ } else { ++ __ fmv_x_w(as_Register(Matcher::_regEncode[dst_lo]), ++ as_FloatRegister(Matcher::_regEncode[src_lo])); ++ } ++ } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy ++ if (is64) { ++ __ fmv_d(as_FloatRegister(Matcher::_regEncode[dst_lo]), ++ as_FloatRegister(Matcher::_regEncode[src_lo])); ++ } else { ++ __ fmv_s(as_FloatRegister(Matcher::_regEncode[dst_lo]), ++ as_FloatRegister(Matcher::_regEncode[src_lo])); ++ } ++ } else { // fpr --> stack spill ++ assert(dst_lo_rc == rc_stack, "spill to bad register class"); ++ __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]), ++ is64, dst_offset); ++ } ++ break; ++ case rc_stack: ++ if (dst_lo_rc == rc_int) { // stack --> gpr load ++ if (this->ideal_reg() == Op_RegI) { ++ __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset); ++ } else { // // zero extended for narrow oop or klass ++ __ unspillu(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset); ++ } ++ } else if (dst_lo_rc == rc_float) { // stack --> fpr load ++ __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]), ++ is64, src_offset); ++ } else { // stack --> stack copy ++ assert(dst_lo_rc == rc_stack, "spill to bad register class"); ++ if (this->ideal_reg() == Op_RegI) { ++ __ unspill(t0, is64, src_offset); ++ } else { // zero extended for narrow oop or klass ++ __ unspillu(t0, is64, src_offset); ++ } ++ __ spill(t0, is64, dst_offset); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } + -+// for Java use float registers f0-f31 are always save on call whereas -+// the platform ABI treats f8-f9 and f18-f27 as callee save). Other -+// float registers are SOC as per the platform spec ++ if (st != NULL) { ++ st->print("spill "); ++ if (src_lo_rc == rc_stack) { ++ st->print("[sp, #%d] -> ", src_offset); ++ } else { ++ st->print("%s -> ", Matcher::regName[src_lo]); ++ } ++ if (dst_lo_rc == rc_stack) { ++ st->print("[sp, #%d]", dst_offset); ++ } else { ++ st->print("%s", Matcher::regName[dst_lo]); ++ } ++ st->print("\t# spill size = %d", is64 ? 64 : 32); ++ } + -+reg_def F0 ( SOC, SOC, Op_RegF, 0, f0->as_VMReg() ); -+reg_def F0_H ( SOC, SOC, Op_RegF, 0, f0->as_VMReg()->next() ); -+reg_def F1 ( SOC, SOC, Op_RegF, 1, f1->as_VMReg() ); -+reg_def F1_H ( SOC, SOC, Op_RegF, 1, f1->as_VMReg()->next() ); -+reg_def F2 ( SOC, SOC, Op_RegF, 2, f2->as_VMReg() ); -+reg_def F2_H ( SOC, SOC, Op_RegF, 2, f2->as_VMReg()->next() ); -+reg_def F3 ( SOC, SOC, Op_RegF, 3, f3->as_VMReg() ); -+reg_def F3_H ( SOC, SOC, Op_RegF, 3, f3->as_VMReg()->next() ); -+reg_def F4 ( SOC, SOC, Op_RegF, 4, f4->as_VMReg() ); -+reg_def F4_H ( SOC, SOC, Op_RegF, 4, f4->as_VMReg()->next() ); -+reg_def F5 ( SOC, SOC, Op_RegF, 5, f5->as_VMReg() ); -+reg_def F5_H ( SOC, SOC, Op_RegF, 5, f5->as_VMReg()->next() ); -+reg_def F6 ( SOC, SOC, Op_RegF, 6, f6->as_VMReg() ); -+reg_def F6_H ( SOC, SOC, Op_RegF, 6, f6->as_VMReg()->next() ); -+reg_def F7 ( SOC, SOC, Op_RegF, 7, f7->as_VMReg() ); -+reg_def F7_H ( SOC, SOC, Op_RegF, 7, f7->as_VMReg()->next() ); -+reg_def F8 ( SOC, SOE, Op_RegF, 8, f8->as_VMReg() ); -+reg_def F8_H ( SOC, SOE, Op_RegF, 8, f8->as_VMReg()->next() ); -+reg_def F9 ( SOC, SOE, Op_RegF, 9, f9->as_VMReg() ); -+reg_def F9_H ( SOC, SOE, Op_RegF, 9, f9->as_VMReg()->next() ); -+reg_def F10 ( SOC, SOC, Op_RegF, 10, f10->as_VMReg() ); -+reg_def F10_H ( SOC, SOC, Op_RegF, 10, f10->as_VMReg()->next() ); -+reg_def F11 ( SOC, SOC, Op_RegF, 11, f11->as_VMReg() ); -+reg_def F11_H ( SOC, SOC, Op_RegF, 11, f11->as_VMReg()->next() ); -+reg_def F12 ( SOC, SOC, Op_RegF, 12, f12->as_VMReg() ); -+reg_def F12_H ( SOC, SOC, Op_RegF, 12, f12->as_VMReg()->next() ); -+reg_def F13 ( SOC, SOC, Op_RegF, 13, f13->as_VMReg() ); -+reg_def F13_H ( SOC, SOC, Op_RegF, 13, f13->as_VMReg()->next() ); -+reg_def F14 ( SOC, SOC, Op_RegF, 14, f14->as_VMReg() ); -+reg_def F14_H ( SOC, SOC, Op_RegF, 14, f14->as_VMReg()->next() ); -+reg_def F15 ( SOC, SOC, Op_RegF, 15, f15->as_VMReg() ); -+reg_def F15_H ( SOC, SOC, Op_RegF, 15, f15->as_VMReg()->next() ); -+reg_def F16 ( SOC, SOC, Op_RegF, 16, f16->as_VMReg() ); -+reg_def F16_H ( SOC, SOC, Op_RegF, 16, f16->as_VMReg()->next() ); -+reg_def F17 ( SOC, SOC, Op_RegF, 17, f17->as_VMReg() ); -+reg_def F17_H ( SOC, SOC, Op_RegF, 17, f17->as_VMReg()->next() ); -+reg_def F18 ( SOC, SOE, Op_RegF, 18, f18->as_VMReg() ); -+reg_def F18_H ( SOC, SOE, Op_RegF, 18, f18->as_VMReg()->next() ); -+reg_def F19 ( SOC, SOE, Op_RegF, 19, f19->as_VMReg() ); -+reg_def F19_H ( SOC, SOE, Op_RegF, 19, f19->as_VMReg()->next() ); -+reg_def F20 ( SOC, SOE, Op_RegF, 20, f20->as_VMReg() ); -+reg_def F20_H ( SOC, SOE, Op_RegF, 20, f20->as_VMReg()->next() ); -+reg_def F21 ( SOC, SOE, Op_RegF, 21, f21->as_VMReg() ); -+reg_def F21_H ( SOC, SOE, Op_RegF, 21, f21->as_VMReg()->next() ); -+reg_def F22 ( SOC, SOE, Op_RegF, 22, f22->as_VMReg() ); -+reg_def F22_H ( SOC, SOE, Op_RegF, 22, f22->as_VMReg()->next() ); -+reg_def F23 ( SOC, SOE, Op_RegF, 23, f23->as_VMReg() ); -+reg_def F23_H ( SOC, SOE, Op_RegF, 23, f23->as_VMReg()->next() ); -+reg_def F24 ( SOC, SOE, Op_RegF, 24, f24->as_VMReg() ); -+reg_def F24_H ( SOC, SOE, Op_RegF, 24, f24->as_VMReg()->next() ); -+reg_def F25 ( SOC, SOE, Op_RegF, 25, f25->as_VMReg() ); -+reg_def F25_H ( SOC, SOE, Op_RegF, 25, f25->as_VMReg()->next() ); -+reg_def F26 ( SOC, SOE, Op_RegF, 26, f26->as_VMReg() ); -+reg_def F26_H ( SOC, SOE, Op_RegF, 26, f26->as_VMReg()->next() ); -+reg_def F27 ( SOC, SOE, Op_RegF, 27, f27->as_VMReg() ); -+reg_def F27_H ( SOC, SOE, Op_RegF, 27, f27->as_VMReg()->next() ); -+reg_def F28 ( SOC, SOC, Op_RegF, 28, f28->as_VMReg() ); -+reg_def F28_H ( SOC, SOC, Op_RegF, 28, f28->as_VMReg()->next() ); -+reg_def F29 ( SOC, SOC, Op_RegF, 29, f29->as_VMReg() ); -+reg_def F29_H ( SOC, SOC, Op_RegF, 29, f29->as_VMReg()->next() ); -+reg_def F30 ( SOC, SOC, Op_RegF, 30, f30->as_VMReg() ); -+reg_def F30_H ( SOC, SOC, Op_RegF, 30, f30->as_VMReg()->next() ); -+reg_def F31 ( SOC, SOC, Op_RegF, 31, f31->as_VMReg() ); -+reg_def F31_H ( SOC, SOC, Op_RegF, 31, f31->as_VMReg()->next() ); ++ return 0; ++} + -+// ---------------------------- -+// Vector Registers -+// ---------------------------- ++#ifndef PRODUCT ++void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const { ++ if (ra_ == NULL) { ++ st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx); ++ } else { ++ implementation(NULL, ra_, false, st); ++ } ++} ++#endif + -+// For RVV vector registers, we simply extend vector register size to 4 -+// 'logical' slots. This is nominally 128 bits but it actually covers -+// all possible 'physical' RVV vector register lengths from 128 ~ 1024 -+// bits. The 'physical' RVV vector register length is detected during -+// startup, so the register allocator is able to identify the correct -+// number of bytes needed for an RVV spill/unspill. -+ -+reg_def V0 ( SOC, SOC, Op_VecA, 0, v0->as_VMReg() ); -+reg_def V0_H ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next() ); -+reg_def V0_J ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next(2) ); -+reg_def V0_K ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next(3) ); -+ -+reg_def V1 ( SOC, SOC, Op_VecA, 1, v1->as_VMReg() ); -+reg_def V1_H ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next() ); -+reg_def V1_J ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next(2) ); -+reg_def V1_K ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next(3) ); -+ -+reg_def V2 ( SOC, SOC, Op_VecA, 2, v2->as_VMReg() ); -+reg_def V2_H ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next() ); -+reg_def V2_J ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next(2) ); -+reg_def V2_K ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next(3) ); -+ -+reg_def V3 ( SOC, SOC, Op_VecA, 3, v3->as_VMReg() ); -+reg_def V3_H ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next() ); -+reg_def V3_J ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next(2) ); -+reg_def V3_K ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next(3) ); -+ -+reg_def V4 ( SOC, SOC, Op_VecA, 4, v4->as_VMReg() ); -+reg_def V4_H ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next() ); -+reg_def V4_J ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next(2) ); -+reg_def V4_K ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next(3) ); -+ -+reg_def V5 ( SOC, SOC, Op_VecA, 5, v5->as_VMReg() ); -+reg_def V5_H ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next() ); -+reg_def V5_J ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next(2) ); -+reg_def V5_K ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next(3) ); -+ -+reg_def V6 ( SOC, SOC, Op_VecA, 6, v6->as_VMReg() ); -+reg_def V6_H ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next() ); -+reg_def V6_J ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next(2) ); -+reg_def V6_K ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next(3) ); -+ -+reg_def V7 ( SOC, SOC, Op_VecA, 7, v7->as_VMReg() ); -+reg_def V7_H ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next() ); -+reg_def V7_J ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next(2) ); -+reg_def V7_K ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next(3) ); -+ -+reg_def V8 ( SOC, SOC, Op_VecA, 8, v8->as_VMReg() ); -+reg_def V8_H ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next() ); -+reg_def V8_J ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next(2) ); -+reg_def V8_K ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next(3) ); -+ -+reg_def V9 ( SOC, SOC, Op_VecA, 9, v9->as_VMReg() ); -+reg_def V9_H ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next() ); -+reg_def V9_J ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next(2) ); -+reg_def V9_K ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next(3) ); -+ -+reg_def V10 ( SOC, SOC, Op_VecA, 10, v10->as_VMReg() ); -+reg_def V10_H ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next() ); -+reg_def V10_J ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(2) ); -+reg_def V10_K ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(3) ); -+ -+reg_def V11 ( SOC, SOC, Op_VecA, 11, v11->as_VMReg() ); -+reg_def V11_H ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next() ); -+reg_def V11_J ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(2) ); -+reg_def V11_K ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(3) ); -+ -+reg_def V12 ( SOC, SOC, Op_VecA, 12, v12->as_VMReg() ); -+reg_def V12_H ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next() ); -+reg_def V12_J ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(2) ); -+reg_def V12_K ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(3) ); -+ -+reg_def V13 ( SOC, SOC, Op_VecA, 13, v13->as_VMReg() ); -+reg_def V13_H ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next() ); -+reg_def V13_J ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(2) ); -+reg_def V13_K ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(3) ); -+ -+reg_def V14 ( SOC, SOC, Op_VecA, 14, v14->as_VMReg() ); -+reg_def V14_H ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next() ); -+reg_def V14_J ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(2) ); -+reg_def V14_K ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(3) ); -+ -+reg_def V15 ( SOC, SOC, Op_VecA, 15, v15->as_VMReg() ); -+reg_def V15_H ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next() ); -+reg_def V15_J ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(2) ); -+reg_def V15_K ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(3) ); -+ -+reg_def V16 ( SOC, SOC, Op_VecA, 16, v16->as_VMReg() ); -+reg_def V16_H ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next() ); -+reg_def V16_J ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(2) ); -+reg_def V16_K ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(3) ); -+ -+reg_def V17 ( SOC, SOC, Op_VecA, 17, v17->as_VMReg() ); -+reg_def V17_H ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next() ); -+reg_def V17_J ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(2) ); -+reg_def V17_K ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(3) ); -+ -+reg_def V18 ( SOC, SOC, Op_VecA, 18, v18->as_VMReg() ); -+reg_def V18_H ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next() ); -+reg_def V18_J ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(2) ); -+reg_def V18_K ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(3) ); -+ -+reg_def V19 ( SOC, SOC, Op_VecA, 19, v19->as_VMReg() ); -+reg_def V19_H ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next() ); -+reg_def V19_J ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(2) ); -+reg_def V19_K ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(3) ); -+ -+reg_def V20 ( SOC, SOC, Op_VecA, 20, v20->as_VMReg() ); -+reg_def V20_H ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next() ); -+reg_def V20_J ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(2) ); -+reg_def V20_K ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(3) ); -+ -+reg_def V21 ( SOC, SOC, Op_VecA, 21, v21->as_VMReg() ); -+reg_def V21_H ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next() ); -+reg_def V21_J ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(2) ); -+reg_def V21_K ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(3) ); -+ -+reg_def V22 ( SOC, SOC, Op_VecA, 22, v22->as_VMReg() ); -+reg_def V22_H ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next() ); -+reg_def V22_J ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(2) ); -+reg_def V22_K ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(3) ); -+ -+reg_def V23 ( SOC, SOC, Op_VecA, 23, v23->as_VMReg() ); -+reg_def V23_H ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next() ); -+reg_def V23_J ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(2) ); -+reg_def V23_K ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(3) ); -+ -+reg_def V24 ( SOC, SOC, Op_VecA, 24, v24->as_VMReg() ); -+reg_def V24_H ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next() ); -+reg_def V24_J ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(2) ); -+reg_def V24_K ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(3) ); -+ -+reg_def V25 ( SOC, SOC, Op_VecA, 25, v25->as_VMReg() ); -+reg_def V25_H ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next() ); -+reg_def V25_J ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(2) ); -+reg_def V25_K ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(3) ); -+ -+reg_def V26 ( SOC, SOC, Op_VecA, 26, v26->as_VMReg() ); -+reg_def V26_H ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next() ); -+reg_def V26_J ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(2) ); -+reg_def V26_K ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(3) ); -+ -+reg_def V27 ( SOC, SOC, Op_VecA, 27, v27->as_VMReg() ); -+reg_def V27_H ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next() ); -+reg_def V27_J ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(2) ); -+reg_def V27_K ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(3) ); -+ -+reg_def V28 ( SOC, SOC, Op_VecA, 28, v28->as_VMReg() ); -+reg_def V28_H ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next() ); -+reg_def V28_J ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(2) ); -+reg_def V28_K ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(3) ); -+ -+reg_def V29 ( SOC, SOC, Op_VecA, 29, v29->as_VMReg() ); -+reg_def V29_H ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next() ); -+reg_def V29_J ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(2) ); -+reg_def V29_K ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(3) ); -+ -+reg_def V30 ( SOC, SOC, Op_VecA, 30, v30->as_VMReg() ); -+reg_def V30_H ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next() ); -+reg_def V30_J ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(2) ); -+reg_def V30_K ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(3) ); -+ -+reg_def V31 ( SOC, SOC, Op_VecA, 31, v31->as_VMReg() ); -+reg_def V31_H ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next() ); -+reg_def V31_J ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(2) ); -+reg_def V31_K ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(3) ); ++void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ implementation(&cbuf, ra_, false, NULL); ++} + -+// ---------------------------- -+// Special Registers -+// ---------------------------- ++uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); ++} + -+// On riscv, the physical flag register is missing, so we use t1 instead, -+// to bridge the RegFlag semantics in share/opto ++//============================================================================= + -+reg_def RFLAGS (SOC, SOC, Op_RegFlags, 6, x6->as_VMReg() ); ++#ifndef PRODUCT ++void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const { ++ assert_cond(ra_ != NULL && st != NULL); ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ int reg = ra_->get_reg_first(this); ++ st->print("add %s, sp, #%d\t# box lock", ++ Matcher::regName[reg], offset); ++} ++#endif + -+// Specify priority of register selection within phases of register -+// allocation. Highest priority is first. A useful heuristic is to -+// give registers a low priority when they are required by machine -+// instructions, like EAX and EDX on I486, and choose no-save registers -+// before save-on-call, & save-on-call before save-on-entry. Registers -+// which participate in fixed calling sequences should come last. -+// Registers which are used as pairs must fall on an even boundary. ++void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ MacroAssembler _masm(&cbuf); + -+alloc_class chunk0( -+ // volatiles -+ R7, R7_H, -+ R28, R28_H, -+ R29, R29_H, -+ R30, R30_H, -+ R31, R31_H, ++ assert_cond(ra_ != NULL); ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ int reg = ra_->get_encode(this); + -+ // arg registers -+ R10, R10_H, -+ R11, R11_H, -+ R12, R12_H, -+ R13, R13_H, -+ R14, R14_H, -+ R15, R15_H, -+ R16, R16_H, -+ R17, R17_H, ++ if (is_imm_in_range(offset, 12, 0)) { ++ __ addi(as_Register(reg), sp, offset); ++ } else if (is_imm_in_range(offset, 32, 0)) { ++ __ li32(t0, offset); ++ __ add(as_Register(reg), sp, t0); ++ } else { ++ ShouldNotReachHere(); ++ } ++} + -+ // non-volatiles -+ R9, R9_H, -+ R18, R18_H, -+ R19, R19_H, -+ R20, R20_H, -+ R21, R21_H, -+ R22, R22_H, -+ R24, R24_H, -+ R25, R25_H, -+ R26, R26_H, ++uint BoxLockNode::size(PhaseRegAlloc *ra_) const { ++ // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_). ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + -+ // non-allocatable registers -+ R23, R23_H, // java thread -+ R27, R27_H, // heapbase -+ R4, R4_H, // thread -+ R8, R8_H, // fp -+ R0, R0_H, // zero -+ R1, R1_H, // ra -+ R2, R2_H, // sp -+ R3, R3_H, // gp -+); ++ if (is_imm_in_range(offset, 12, 0)) { ++ return NativeInstruction::instruction_size; ++ } else { ++ return 3 * NativeInstruction::instruction_size; // lui + addiw + add; ++ } ++} + -+alloc_class chunk1( ++//============================================================================= + -+ // no save -+ F0, F0_H, -+ F1, F1_H, -+ F2, F2_H, -+ F3, F3_H, -+ F4, F4_H, -+ F5, F5_H, -+ F6, F6_H, -+ F7, F7_H, -+ F28, F28_H, -+ F29, F29_H, -+ F30, F30_H, -+ F31, F31_H, ++#ifndef PRODUCT ++void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const ++{ ++ assert_cond(st != NULL); ++ st->print_cr("# MachUEPNode"); ++ if (UseCompressedClassPointers) { ++ st->print_cr("\tlwu t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass"); ++ if (Universe::narrow_klass_shift() != 0) { ++ st->print_cr("\tdecode_klass_not_null t0, t0"); ++ } ++ } else { ++ st->print_cr("\tld t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass"); ++ } ++ st->print_cr("\tbeq t0, t1, ic_hit"); ++ st->print_cr("\tj, SharedRuntime::_ic_miss_stub\t # Inline cache check"); ++ st->print_cr("\tic_hit:"); ++} ++#endif + -+ // arg registers -+ F10, F10_H, -+ F11, F11_H, -+ F12, F12_H, -+ F13, F13_H, -+ F14, F14_H, -+ F15, F15_H, -+ F16, F16_H, -+ F17, F17_H, ++void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const ++{ ++ // This is the unverified entry point. ++ MacroAssembler _masm(&cbuf); + -+ // non-volatiles -+ F8, F8_H, -+ F9, F9_H, -+ F18, F18_H, -+ F19, F19_H, -+ F20, F20_H, -+ F21, F21_H, -+ F22, F22_H, -+ F23, F23_H, -+ F24, F24_H, -+ F25, F25_H, -+ F26, F26_H, -+ F27, F27_H, -+); ++ Label skip; ++ __ cmp_klass(j_rarg0, t1, t0, skip); ++ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); ++ __ bind(skip); ++} + -+alloc_class chunk2( -+ V0, V0_H, V0_J, V0_K, -+ V1, V1_H, V1_J, V1_K, -+ V2, V2_H, V2_J, V2_K, -+ V3, V3_H, V3_J, V3_K, -+ V4, V4_H, V4_J, V4_K, -+ V5, V5_H, V5_J, V5_K, -+ V6, V6_H, V6_J, V6_K, -+ V7, V7_H, V7_J, V7_K, -+ V8, V8_H, V8_J, V8_K, -+ V9, V9_H, V9_J, V9_K, -+ V10, V10_H, V10_J, V10_K, -+ V11, V11_H, V11_J, V11_K, -+ V12, V12_H, V12_J, V12_K, -+ V13, V13_H, V13_J, V13_K, -+ V14, V14_H, V14_J, V14_K, -+ V15, V15_H, V15_J, V15_K, -+ V16, V16_H, V16_J, V16_K, -+ V17, V17_H, V17_J, V17_K, -+ V18, V18_H, V18_J, V18_K, -+ V19, V19_H, V19_J, V19_K, -+ V20, V20_H, V20_J, V20_K, -+ V21, V21_H, V21_J, V21_K, -+ V22, V22_H, V22_J, V22_K, -+ V23, V23_H, V23_J, V23_K, -+ V24, V24_H, V24_J, V24_K, -+ V25, V25_H, V25_J, V25_K, -+ V26, V26_H, V26_J, V26_K, -+ V27, V27_H, V27_J, V27_K, -+ V28, V28_H, V28_J, V28_K, -+ V29, V29_H, V29_J, V29_K, -+ V30, V30_H, V30_J, V30_K, -+ V31, V31_H, V31_J, V31_K, -+); ++uint MachUEPNode::size(PhaseRegAlloc* ra_) const ++{ ++ assert_cond(ra_ != NULL); ++ return MachNode::size(ra_); ++} + -+alloc_class chunk3(RFLAGS); ++// REQUIRED EMIT CODE + -+//----------Architecture Description Register Classes-------------------------- -+// Several register classes are automatically defined based upon information in -+// this architecture description. -+// 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) -+// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) -+// ++//============================================================================= + -+// Class for all 32 bit general purpose registers -+reg_class all_reg32( -+ R0, -+ R1, -+ R2, -+ R3, -+ R4, -+ R7, -+ R8, -+ R9, -+ R10, -+ R11, -+ R12, -+ R13, -+ R14, -+ R15, -+ R16, -+ R17, -+ R18, -+ R19, -+ R20, -+ R21, -+ R22, -+ R23, -+ R24, -+ R25, -+ R26, -+ R27, -+ R28, -+ R29, -+ R30, -+ R31 -+); ++// Emit exception handler code. ++int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) ++{ ++ // la_patchable t0, #exception_blob_entry_point ++ // jr (offset)t0 ++ // or ++ // j #exception_blob_entry_point ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a handler. ++ MacroAssembler _masm(&cbuf); ++ address base = __ start_a_stub(size_exception_handler()); ++ if (base == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return 0; // CodeBuffer::expand failed ++ } ++ int offset = __ offset(); ++ __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); ++ assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); ++ __ end_a_stub(); ++ return offset; ++} + -+// Class for any 32 bit integer registers (excluding zr) -+reg_class any_reg32 %{ -+ return _ANY_REG32_mask; -+%} ++// Emit deopt handler code. ++int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) ++{ ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a handler. ++ MacroAssembler _masm(&cbuf); ++ address base = __ start_a_stub(size_deopt_handler()); ++ if (base == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return 0; // CodeBuffer::expand failed ++ } ++ int offset = __ offset(); + -+// Singleton class for R10 int register -+reg_class int_r10_reg(R10); ++ __ auipc(ra, 0); ++ __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); + -+// Singleton class for R12 int register -+reg_class int_r12_reg(R12); ++ assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); ++ __ end_a_stub(); ++ return offset; + -+// Singleton class for R13 int register -+reg_class int_r13_reg(R13); ++} ++// REQUIRED MATCHER CODE + -+// Singleton class for R14 int register -+reg_class int_r14_reg(R14); ++//============================================================================= + -+// Class for all long integer registers -+reg_class all_reg( -+ R0, R0_H, -+ R1, R1_H, -+ R2, R2_H, -+ R3, R3_H, -+ R4, R4_H, -+ R7, R7_H, -+ R8, R8_H, -+ R9, R9_H, -+ R10, R10_H, -+ R11, R11_H, -+ R12, R12_H, -+ R13, R13_H, -+ R14, R14_H, -+ R15, R15_H, -+ R16, R16_H, -+ R17, R17_H, -+ R18, R18_H, -+ R19, R19_H, -+ R20, R20_H, -+ R21, R21_H, -+ R22, R22_H, -+ R23, R23_H, -+ R24, R24_H, -+ R25, R25_H, -+ R26, R26_H, -+ R27, R27_H, -+ R28, R28_H, -+ R29, R29_H, -+ R30, R30_H, -+ R31, R31_H -+); ++const bool Matcher::match_rule_supported(int opcode) { ++ if (!has_match_rule(opcode)) { ++ return false; ++ } + -+// Class for all long integer registers (excluding zr) -+reg_class any_reg %{ -+ return _ANY_REG_mask; -+%} ++ switch (opcode) { ++ case Op_PopCountI: ++ case Op_PopCountL: ++ return UsePopCountInstruction; + -+// Class for non-allocatable 32 bit registers -+reg_class non_allocatable_reg32( -+ R0, // zr -+ R1, // ra -+ R2, // sp -+ R3, // gp -+ R4, // tp -+ R23 // java thread -+); ++ case Op_CountLeadingZerosI: ++ case Op_CountLeadingZerosL: ++ case Op_CountTrailingZerosI: ++ case Op_CountTrailingZerosL: ++ return UseZbb; ++ } + -+// Class for non-allocatable 64 bit registers -+reg_class non_allocatable_reg( -+ R0, R0_H, // zr -+ R1, R1_H, // ra -+ R2, R2_H, // sp -+ R3, R3_H, // gp -+ R4, R4_H, // tp -+ R23, R23_H // java thread -+); ++ return true; // Per default match rules are supported. ++} + -+reg_class no_special_reg32 %{ -+ return _NO_SPECIAL_REG32_mask; -+%} ++// Identify extra cases that we might want to provide match rules for vector nodes and ++// other intrinsics guarded with vector length (vlen). ++const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { ++ return false; ++} + -+reg_class no_special_reg %{ -+ return _NO_SPECIAL_REG_mask; -+%} ++const bool Matcher::has_predicated_vectors(void) { ++ return false; ++} + -+reg_class ptr_reg %{ -+ return _PTR_REG_mask; -+%} ++const int Matcher::float_pressure(int default_pressure_threshold) { ++ return default_pressure_threshold; ++} + -+reg_class no_special_ptr_reg %{ -+ return _NO_SPECIAL_PTR_REG_mask; -+%} ++int Matcher::regnum_to_fpu_offset(int regnum) ++{ ++ Unimplemented(); ++ return 0; ++} + -+// Class for 64 bit register r10 -+reg_class r10_reg( -+ R10, R10_H -+); ++// Is this branch offset short enough that a short branch can be used? ++// ++// NOTE: If the platform does not provide any short branch variants, then ++// this method should return false for offset 0. ++// |---label(L1)-----| ++// |-----------------| ++// |-----------------|----------eq: float------------------- ++// |-----------------| // far_cmpD_branch | cmpD_branch ++// |------- ---------| feq; | feq; ++// |-far_cmpD_branch-| beqz done; | bnez L; ++// |-----------------| j L; | ++// |-----------------| bind(done); | ++// |-----------------|-------------------------------------- ++// |-----------------| // so shortBrSize = br_size - 4; ++// |-----------------| // so offs = offset - shortBrSize + 4; ++// |---label(L2)-----| ++bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { ++ // The passed offset is relative to address of the branch. ++ int shortBrSize = br_size - 4; ++ int offs = offset - shortBrSize + 4; ++ return (-4096 <= offs && offs < 4096); ++} + -+// Class for 64 bit register r11 -+reg_class r11_reg( -+ R11, R11_H -+); ++const bool Matcher::isSimpleConstant64(jlong value) { ++ // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. ++ // Probably always true, even if a temp register is required. ++ return true; ++} + -+// Class for 64 bit register r12 -+reg_class r12_reg( -+ R12, R12_H -+); ++// true just means we have fast l2f conversion ++const bool Matcher::convL2FSupported(void) { ++ return true; ++} + -+// Class for 64 bit register r13 -+reg_class r13_reg( -+ R13, R13_H -+); ++// Vector width in bytes. ++const int Matcher::vector_width_in_bytes(BasicType bt) { ++ return 0; ++} + -+// Class for 64 bit register r14 -+reg_class r14_reg( -+ R14, R14_H -+); ++// Limits on vector size (number of elements) loaded into vector. ++const int Matcher::max_vector_size(const BasicType bt) { ++ return vector_width_in_bytes(bt) / type2aelembytes(bt); ++} ++const int Matcher::min_vector_size(const BasicType bt) { ++ return max_vector_size(bt); ++} + -+// Class for 64 bit register r15 -+reg_class r15_reg( -+ R15, R15_H -+); ++// Vector ideal reg. ++const uint Matcher::vector_ideal_reg(int len) { ++ ShouldNotReachHere(); ++ return 0; ++} + -+// Class for 64 bit register r16 -+reg_class r16_reg( -+ R16, R16_H -+); ++const uint Matcher::vector_shift_count_ideal_reg(int size) { ++ fatal("vector shift is not supported"); ++ return Node::NotAMachineReg; ++} + -+// Class for method register -+reg_class method_reg( -+ R31, R31_H -+); ++// AES support not yet implemented ++const bool Matcher::pass_original_key_for_aes() { ++ return false; ++} + -+// Class for heapbase register -+reg_class heapbase_reg( -+ R27, R27_H -+); ++// RISC-V supports misaligned vectors store/load. ++const bool Matcher::misaligned_vectors_ok() { ++ return true; ++} + -+// Class for java thread register -+reg_class java_thread_reg( -+ R23, R23_H -+); ++// false => size gets scaled to BytesPerLong, ok. ++const bool Matcher::init_array_count_is_in_bytes = false; + -+reg_class r28_reg( -+ R28, R28_H -+); ++// Use conditional move (CMOVL) ++const int Matcher::long_cmove_cost() { ++ // long cmoves are no more expensive than int cmoves ++ return 0; ++} + -+reg_class r29_reg( -+ R29, R29_H -+); ++const int Matcher::float_cmove_cost() { ++ // float cmoves are no more expensive than int cmoves ++ return 0; ++} + -+reg_class r30_reg( -+ R30, R30_H -+); ++// Does the CPU require late expand (see block.cpp for description of late expand)? ++const bool Matcher::require_postalloc_expand = false; + -+// Class for zero registesr -+reg_class zr_reg( -+ R0, R0_H -+); ++// Do we need to mask the count passed to shift instructions or does ++// the cpu only look at the lower 5/6 bits anyway? ++const bool Matcher::need_masked_shift_count = false; + -+// Class for thread register -+reg_class thread_reg( -+ R4, R4_H -+); ++// This affects two different things: ++// - how Decode nodes are matched ++// - how ImplicitNullCheck opportunities are recognized ++// If true, the matcher will try to remove all Decodes and match them ++// (as operands) into nodes. NullChecks are not prepared to deal with ++// Decodes by final_graph_reshaping(). ++// If false, final_graph_reshaping() forces the decode behind the Cmp ++// for a NullCheck. The matcher matches the Decode node into a register. ++// Implicit_null_check optimization moves the Decode along with the ++// memory operation back up before the NullCheck. ++bool Matcher::narrow_oop_use_complex_address() { ++ return Universe::narrow_oop_shift() == 0; ++} + -+// Class for frame pointer register -+reg_class fp_reg( -+ R8, R8_H -+); ++bool Matcher::narrow_klass_use_complex_address() { ++// TODO ++// decide whether we need to set this to true ++ return false; ++} + -+// Class for link register -+reg_class ra_reg( -+ R1, R1_H -+); ++bool Matcher::const_oop_prefer_decode() { ++ // Prefer ConN+DecodeN over ConP in simple compressed oops mode. ++ return Universe::narrow_oop_base() == NULL; ++} + -+// Class for long sp register -+reg_class sp_reg( -+ R2, R2_H -+); ++bool Matcher::const_klass_prefer_decode() { ++ // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode. ++ return Universe::narrow_klass_base() == NULL; ++} + -+// Class for all float registers -+reg_class float_reg( -+ F0, -+ F1, -+ F2, -+ F3, -+ F4, -+ F5, -+ F6, -+ F7, -+ F8, -+ F9, -+ F10, -+ F11, -+ F12, -+ F13, -+ F14, -+ F15, -+ F16, -+ F17, -+ F18, -+ F19, -+ F20, -+ F21, -+ F22, -+ F23, -+ F24, -+ F25, -+ F26, -+ F27, -+ F28, -+ F29, -+ F30, -+ F31 -+); ++// Is it better to copy float constants, or load them directly from ++// memory? Intel can load a float constant from a direct address, ++// requiring no extra registers. Most RISCs will have to materialize ++// an address into a register first, so they would do better to copy ++// the constant from stack. ++const bool Matcher::rematerialize_float_constants = false; + -+// Double precision float registers have virtual `high halves' that -+// are needed by the allocator. -+// Class for all double registers -+reg_class double_reg( -+ F0, F0_H, -+ F1, F1_H, -+ F2, F2_H, -+ F3, F3_H, -+ F4, F4_H, -+ F5, F5_H, -+ F6, F6_H, -+ F7, F7_H, -+ F8, F8_H, -+ F9, F9_H, -+ F10, F10_H, -+ F11, F11_H, -+ F12, F12_H, -+ F13, F13_H, -+ F14, F14_H, -+ F15, F15_H, -+ F16, F16_H, -+ F17, F17_H, -+ F18, F18_H, -+ F19, F19_H, -+ F20, F20_H, -+ F21, F21_H, -+ F22, F22_H, -+ F23, F23_H, -+ F24, F24_H, -+ F25, F25_H, -+ F26, F26_H, -+ F27, F27_H, -+ F28, F28_H, -+ F29, F29_H, -+ F30, F30_H, -+ F31, F31_H -+); ++// If CPU can load and store mis-aligned doubles directly then no ++// fixup is needed. Else we split the double into 2 integer pieces ++// and move it piece-by-piece. Only happens when passing doubles into ++// C code as the Java calling convention forces doubles to be aligned. ++const bool Matcher::misaligned_doubles_ok = true; + -+// Class for all RVV vector registers -+reg_class vectora_reg( -+ V1, V1_H, V1_J, V1_K, -+ V2, V2_H, V2_J, V2_K, -+ V3, V3_H, V3_J, V3_K, -+ V4, V4_H, V4_J, V4_K, -+ V5, V5_H, V5_J, V5_K, -+ V6, V6_H, V6_J, V6_K, -+ V7, V7_H, V7_J, V7_K, -+ V8, V8_H, V8_J, V8_K, -+ V9, V9_H, V9_J, V9_K, -+ V10, V10_H, V10_J, V10_K, -+ V11, V11_H, V11_J, V11_K, -+ V12, V12_H, V12_J, V12_K, -+ V13, V13_H, V13_J, V13_K, -+ V14, V14_H, V14_J, V14_K, -+ V15, V15_H, V15_J, V15_K, -+ V16, V16_H, V16_J, V16_K, -+ V17, V17_H, V17_J, V17_K, -+ V18, V18_H, V18_J, V18_K, -+ V19, V19_H, V19_J, V19_K, -+ V20, V20_H, V20_J, V20_K, -+ V21, V21_H, V21_J, V21_K, -+ V22, V22_H, V22_J, V22_K, -+ V23, V23_H, V23_J, V23_K, -+ V24, V24_H, V24_J, V24_K, -+ V25, V25_H, V25_J, V25_K, -+ V26, V26_H, V26_J, V26_K, -+ V27, V27_H, V27_J, V27_K, -+ V28, V28_H, V28_J, V28_K, -+ V29, V29_H, V29_J, V29_K, -+ V30, V30_H, V30_J, V30_K, -+ V31, V31_H, V31_J, V31_K -+); ++// No-op on amd64 ++void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { ++ Unimplemented(); ++} + -+// Class for 64 bit register f0 -+reg_class f0_reg( -+ F0, F0_H -+); ++// Advertise here if the CPU requires explicit rounding operations to ++// implement the UseStrictFP mode. ++const bool Matcher::strict_fp_requires_explicit_rounding = false; + -+// Class for 64 bit register f1 -+reg_class f1_reg( -+ F1, F1_H -+); ++// Are floats converted to double when stored to stack during ++// deoptimization? ++bool Matcher::float_in_double() { return false; } + -+// Class for 64 bit register f2 -+reg_class f2_reg( -+ F2, F2_H -+); ++// Do ints take an entire long register or just half? ++// The relevant question is how the int is callee-saved: ++// the whole long is written but de-opt'ing will have to extract ++// the relevant 32 bits. ++const bool Matcher::int_in_long = true; + -+// Class for 64 bit register f3 -+reg_class f3_reg( -+ F3, F3_H -+); ++// Return whether or not this register is ever used as an argument. ++// This function is used on startup to build the trampoline stubs in ++// generateOptoStub. Registers not mentioned will be killed by the VM ++// call in the trampoline, and arguments in those registers not be ++// available to the callee. ++bool Matcher::can_be_java_arg(int reg) ++{ ++ return ++ reg == R10_num || reg == R10_H_num || ++ reg == R11_num || reg == R11_H_num || ++ reg == R12_num || reg == R12_H_num || ++ reg == R13_num || reg == R13_H_num || ++ reg == R14_num || reg == R14_H_num || ++ reg == R15_num || reg == R15_H_num || ++ reg == R16_num || reg == R16_H_num || ++ reg == R17_num || reg == R17_H_num || ++ reg == F10_num || reg == F10_H_num || ++ reg == F11_num || reg == F11_H_num || ++ reg == F12_num || reg == F12_H_num || ++ reg == F13_num || reg == F13_H_num || ++ reg == F14_num || reg == F14_H_num || ++ reg == F15_num || reg == F15_H_num || ++ reg == F16_num || reg == F16_H_num || ++ reg == F17_num || reg == F17_H_num; ++} + -+// class for vector register v1 -+reg_class v1_reg( -+ V1, V1_H, V1_J, V1_K -+); ++bool Matcher::is_spillable_arg(int reg) ++{ ++ return can_be_java_arg(reg); ++} + -+// class for vector register v2 -+reg_class v2_reg( -+ V2, V2_H, V2_J, V2_K -+); ++bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) { ++ return false; ++} + -+// class for vector register v3 -+reg_class v3_reg( -+ V3, V3_H, V3_J, V3_K -+); ++RegMask Matcher::divI_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} + -+// class for vector register v4 -+reg_class v4_reg( -+ V4, V4_H, V4_J, V4_K -+); ++// Register for MODI projection of divmodI. ++RegMask Matcher::modI_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} + -+// class for vector register v5 -+reg_class v5_reg( -+ V5, V5_H, V5_J, V5_K -+); ++// Register for DIVL projection of divmodL. ++RegMask Matcher::divL_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} + -+// class for condition codes -+reg_class reg_flags(RFLAGS); -+%} ++// Register for MODL projection of divmodL. ++RegMask Matcher::modL_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} + -+//----------DEFINITION BLOCK--------------------------------------------------- -+// Define name --> value mappings to inform the ADLC of an integer valued name -+// Current support includes integer values in the range [0, 0x7FFFFFFF] -+// Format: -+// int_def ( , ); -+// Generated Code in ad_.hpp -+// #define () -+// // value == -+// Generated code in ad_.cpp adlc_verification() -+// assert( == , "Expect () to equal "); -+// ++const RegMask Matcher::method_handle_invoke_SP_save_mask() { ++ return FP_REG_mask(); ++} + -+// we follow the ppc-aix port in using a simple cost model which ranks -+// register operations as cheap, memory ops as more expensive and -+// branches as most expensive. the first two have a low as well as a -+// normal cost. huge cost appears to be a way of saying don't do -+// something ++bool size_fits_all_mem_uses(AddPNode* addp, int shift) { ++ assert_cond(addp != NULL); ++ for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) { ++ Node* u = addp->fast_out(i); ++ if (u != NULL && u->is_Mem()) { ++ int opsize = u->as_Mem()->memory_size(); ++ assert(opsize > 0, "unexpected memory operand size"); ++ if (u->as_Mem()->memory_size() != (1 << shift)) { ++ return false; ++ } ++ } ++ } ++ return true; ++} + -+definitions %{ -+ // The default cost (of a register move instruction). -+ int_def DEFAULT_COST ( 100, 100); -+ int_def ALU_COST ( 100, 1 * DEFAULT_COST); // unknown, const, arith, shift, slt, -+ // multi, auipc, nop, logical, move -+ int_def LOAD_COST ( 300, 3 * DEFAULT_COST); // load, fpload -+ int_def STORE_COST ( 100, 1 * DEFAULT_COST); // store, fpstore -+ int_def XFER_COST ( 300, 3 * DEFAULT_COST); // mfc, mtc, fcvt, fmove, fcmp -+ int_def BRANCH_COST ( 100, 1 * DEFAULT_COST); // branch, jmp, call -+ int_def IMUL_COST ( 1000, 10 * DEFAULT_COST); // imul -+ int_def IDIVSI_COST ( 3400, 34 * DEFAULT_COST); // idivdi -+ int_def IDIVDI_COST ( 6600, 66 * DEFAULT_COST); // idivsi -+ int_def FMUL_SINGLE_COST ( 500, 5 * DEFAULT_COST); // fadd, fmul, fmadd -+ int_def FMUL_DOUBLE_COST ( 700, 7 * DEFAULT_COST); // fadd, fmul, fmadd -+ int_def FDIV_COST ( 2000, 20 * DEFAULT_COST); // fdiv -+ int_def FSQRT_COST ( 2500, 25 * DEFAULT_COST); // fsqrt -+ int_def VOLATILE_REF_COST ( 1000, 10 * DEFAULT_COST); -+%} ++const bool Matcher::convi2l_type_required = false; + ++// Should the Matcher clone shifts on addressing modes, expecting them ++// to be subsumed into complex addressing expressions or compute them ++// into registers? ++bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { ++ return clone_base_plus_offset_address(m, mstack, address_visited); ++} + ++void Compile::reshape_address(AddPNode* addp) { ++} + -+//----------SOURCE BLOCK------------------------------------------------------- -+// This is a block of C++ code which provides values, functions, and -+// definitions necessary in the rest of the architecture description ++%} + -+source_hpp %{ + -+#include "asm/macroAssembler.hpp" -+#include "gc/shared/cardTable.hpp" -+#include "gc/shared/cardTableBarrierSet.hpp" -+#include "gc/shared/collectedHeap.hpp" -+#include "opto/addnode.hpp" -+#include "opto/convertnode.hpp" + -+extern RegMask _ANY_REG32_mask; -+extern RegMask _ANY_REG_mask; -+extern RegMask _PTR_REG_mask; -+extern RegMask _NO_SPECIAL_REG32_mask; -+extern RegMask _NO_SPECIAL_REG_mask; -+extern RegMask _NO_SPECIAL_PTR_REG_mask; ++//----------ENCODING BLOCK----------------------------------------------------- ++// This block specifies the encoding classes used by the compiler to ++// output byte streams. Encoding classes are parameterized macros ++// used by Machine Instruction Nodes in order to generate the bit ++// encoding of the instruction. Operands specify their base encoding ++// interface with the interface keyword. There are currently ++// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, & ++// COND_INTER. REG_INTER causes an operand to generate a function ++// which returns its register number when queried. CONST_INTER causes ++// an operand to generate a function which returns the value of the ++// constant when queried. MEMORY_INTER causes an operand to generate ++// four functions which return the Base Register, the Index Register, ++// the Scale Value, and the Offset Value of the operand when queried. ++// COND_INTER causes an operand to generate six functions which return ++// the encoding code (ie - encoding bits for the instruction) ++// associated with each basic boolean condition for a conditional ++// instruction. ++// ++// Instructions specify two basic values for encoding. Again, a ++// function is available to check if the constant displacement is an ++// oop. They use the ins_encode keyword to specify their encoding ++// classes (which must be a sequence of enc_class names, and their ++// parameters, specified in the encoding block), and they use the ++// opcode keyword to specify, in order, their primary, secondary, and ++// tertiary opcode. Only the opcode sections which a particular ++// instruction needs for encoding need to be specified. ++encode %{ ++ // BEGIN Non-volatile memory access + -+class CallStubImpl { ++ enc_class riscv_enc_li_imm(iRegIorL dst, immIorL src) %{ ++ MacroAssembler _masm(&cbuf); ++ Assembler::CompressibleRegion cr(&_masm); ++ int64_t con = (int64_t)$src$$constant; ++ Register dst_reg = as_Register($dst$$reg); ++ __ li(dst_reg, con); ++ %} + -+ //-------------------------------------------------------------- -+ //---< Used for optimization in Compile::shorten_branches >--- -+ //-------------------------------------------------------------- ++ enc_class riscv_enc_mov_p(iRegP dst, immP src) %{ ++ MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ address con = (address)$src$$constant; ++ if (con == NULL || con == (address)1) { ++ ShouldNotReachHere(); ++ } else { ++ relocInfo::relocType rtype = $src->constant_reloc(); ++ if (rtype == relocInfo::oop_type) { ++ __ movoop(dst_reg, (jobject)con, /*immediate*/true); ++ } else if (rtype == relocInfo::metadata_type) { ++ __ mov_metadata(dst_reg, (Metadata*)con); ++ } else { ++ assert(rtype == relocInfo::none, "unexpected reloc type"); ++ __ li(dst_reg, $src$$constant); ++ } ++ } ++ %} + -+ public: -+ // Size of call trampoline stub. -+ static uint size_call_trampoline() { -+ return 0; // no call trampolines on this platform -+ } ++ enc_class riscv_enc_mov_p1(iRegP dst) %{ ++ MacroAssembler _masm(&cbuf); ++ Assembler::CompressibleRegion cr(&_masm); ++ Register dst_reg = as_Register($dst$$reg); ++ __ li(dst_reg, 1); ++ %} + -+ // number of relocations needed by a call trampoline stub -+ static uint reloc_call_trampoline() { -+ return 0; // no call trampolines on this platform -+ } -+}; ++ enc_class riscv_enc_mov_poll_page(iRegP dst, immPollPage src) %{ ++ MacroAssembler _masm(&cbuf); ++ int32_t offset = 0; ++ address page = (address)$src$$constant; ++ unsigned long align = (unsigned long)page & 0xfff; ++ assert(align == 0, "polling page must be page aligned"); ++ Register dst_reg = as_Register($dst$$reg); ++ __ la_patchable(dst_reg, Address(page, relocInfo::poll_type), offset); ++ __ addi(dst_reg, dst_reg, offset); ++ %} + -+class HandlerImpl { ++ enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{ ++ MacroAssembler _masm(&cbuf); ++ __ load_byte_map_base($dst$$Register); ++ %} + -+ public: ++ enc_class riscv_enc_mov_n(iRegN dst, immN src) %{ ++ MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ address con = (address)$src$$constant; ++ if (con == NULL) { ++ ShouldNotReachHere(); ++ } else { ++ relocInfo::relocType rtype = $src->constant_reloc(); ++ assert(rtype == relocInfo::oop_type, "unexpected reloc type"); ++ __ set_narrow_oop(dst_reg, (jobject)con); ++ } ++ %} + -+ static int emit_exception_handler(CodeBuffer &cbuf); -+ static int emit_deopt_handler(CodeBuffer& cbuf); ++ enc_class riscv_enc_mov_zero(iRegNorP dst) %{ ++ MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ __ mv(dst_reg, zr); ++ %} + -+ static uint size_exception_handler() { -+ return MacroAssembler::far_branch_size(); -+ } ++ enc_class riscv_enc_mov_nk(iRegN dst, immNKlass src) %{ ++ MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ address con = (address)$src$$constant; ++ if (con == NULL) { ++ ShouldNotReachHere(); ++ } else { ++ relocInfo::relocType rtype = $src->constant_reloc(); ++ assert(rtype == relocInfo::metadata_type, "unexpected reloc type"); ++ __ set_narrow_klass(dst_reg, (Klass *)con); ++ } ++ %} + -+ static uint size_deopt_handler() { -+ // count auipc + far branch -+ return NativeInstruction::instruction_size + MacroAssembler::far_branch_size(); -+ } -+}; ++ enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{ ++ MacroAssembler _masm(&cbuf); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ /*result as bool*/ true); ++ %} + -+class Node::PD { -+public: -+ enum NodeFlags { -+ _last_flag = Node::_last_flag -+ }; -+}; ++ enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{ ++ MacroAssembler _masm(&cbuf); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ /*result as bool*/ true); ++ %} + -+bool is_CAS(int opcode, bool maybe_volatile); ++ enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegL oldval, iRegL newval) %{ ++ MacroAssembler _masm(&cbuf); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ /*result as bool*/ true); ++ %} + -+// predicate controlling translation of CompareAndSwapX -+bool needs_acquiring_load_reserved(const Node *load); ++ enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{ ++ MacroAssembler _masm(&cbuf); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ /*result as bool*/ true); ++ %} + -+// predicate controlling addressing modes -+bool size_fits_all_mem_uses(AddPNode* addp, int shift); -+%} ++ enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{ ++ MacroAssembler _masm(&cbuf); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ /*result as bool*/ true); ++ %} + -+source %{ ++ enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegL oldval, iRegL newval) %{ ++ MacroAssembler _masm(&cbuf); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ /*result as bool*/ true); ++ %} + -+// Derived RegMask with conditionally allocatable registers ++ // compare and branch instruction encodings + -+RegMask _ANY_REG32_mask; -+RegMask _ANY_REG_mask; -+RegMask _PTR_REG_mask; -+RegMask _NO_SPECIAL_REG32_mask; -+RegMask _NO_SPECIAL_REG_mask; -+RegMask _NO_SPECIAL_PTR_REG_mask; ++ enc_class riscv_enc_j(label lbl) %{ ++ MacroAssembler _masm(&cbuf); ++ Label* L = $lbl$$label; ++ __ j(*L); ++ %} + -+void reg_mask_init() { ++ enc_class riscv_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{ ++ MacroAssembler _masm(&cbuf); ++ Label* L = $lbl$$label; ++ switch ($cmp$$cmpcode) { ++ case(BoolTest::ge): ++ __ j(*L); ++ break; ++ case(BoolTest::lt): ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} + -+ _ANY_REG32_mask = _ALL_REG32_mask; -+ _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(x0->as_VMReg())); ++ // call instruction encodings + -+ _ANY_REG_mask = _ALL_REG_mask; -+ _ANY_REG_mask.SUBTRACT(_ZR_REG_mask); ++ enc_class riscv_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result) %{ ++ Register sub_reg = as_Register($sub$$reg); ++ Register super_reg = as_Register($super$$reg); ++ Register temp_reg = as_Register($temp$$reg); ++ Register result_reg = as_Register($result$$reg); ++ Register cr_reg = t1; + -+ _PTR_REG_mask = _ALL_REG_mask; -+ _PTR_REG_mask.SUBTRACT(_ZR_REG_mask); ++ Label miss; ++ Label done; ++ MacroAssembler _masm(&cbuf); ++ __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg, ++ NULL, &miss); ++ if ($primary) { ++ __ mv(result_reg, zr); ++ } else { ++ __ mv(cr_reg, zr); ++ __ j(done); ++ } + -+ _NO_SPECIAL_REG32_mask = _ALL_REG32_mask; -+ _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask); ++ __ bind(miss); ++ if (!$primary) { ++ __ li(cr_reg, 1); ++ } + -+ _NO_SPECIAL_REG_mask = _ALL_REG_mask; -+ _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask); ++ __ bind(done); ++ %} + -+ _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask; -+ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask); ++ enc_class riscv_enc_java_static_call(method meth) %{ ++ MacroAssembler _masm(&cbuf); + -+ // x27 is not allocatable when compressed oops is on -+ if (UseCompressedOops) { -+ _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x27->as_VMReg())); -+ _NO_SPECIAL_REG_mask.SUBTRACT(_HEAPBASE_REG_mask); -+ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_HEAPBASE_REG_mask); -+ } ++ address addr = (address)$meth$$method; ++ address call = NULL; ++ assert_cond(addr != NULL); ++ if (!_method) { ++ // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. ++ call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf); ++ if (call == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ } else { ++ int method_index = resolved_method_index(cbuf); ++ RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) ++ : static_call_Relocation::spec(method_index); ++ call = __ trampoline_call(Address(addr, rspec), &cbuf); ++ if (call == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } + -+ // x8 is not allocatable when PreserveFramePointer is on -+ if (PreserveFramePointer) { -+ _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x8->as_VMReg())); -+ _NO_SPECIAL_REG_mask.SUBTRACT(_FP_REG_mask); -+ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_FP_REG_mask); -+ } -+} ++ // Emit stub for static call ++ address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); ++ if (stub == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ } ++ %} + -+void PhaseOutput::pd_perform_mach_node_analysis() { -+} ++ enc_class riscv_enc_java_dynamic_call(method meth) %{ ++ MacroAssembler _masm(&cbuf); ++ int method_index = resolved_method_index(cbuf); ++ address call = __ ic_call((address)$meth$$method, method_index); ++ if (call == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ %} + -+int MachNode::pd_alignment_required() const { -+ return 1; -+} ++ enc_class riscv_enc_call_epilog() %{ ++ MacroAssembler _masm(&cbuf); ++ if (VerifyStackAtCalls) { ++ // Check that stack depth is unchanged: find majik cookie on stack ++ __ call_Unimplemented(); ++ } ++ %} + -+int MachNode::compute_padding(int current_offset) const { -+ return 0; -+} ++ enc_class riscv_enc_java_to_runtime(method meth) %{ ++ MacroAssembler _masm(&cbuf); + -+// is_CAS(int opcode, bool maybe_volatile) -+// -+// return true if opcode is one of the possible CompareAndSwapX -+// values otherwise false. -+bool is_CAS(int opcode, bool maybe_volatile) -+{ -+ switch (opcode) { -+ // We handle these -+ case Op_CompareAndSwapI: -+ case Op_CompareAndSwapL: -+ case Op_CompareAndSwapP: -+ case Op_CompareAndSwapN: -+ case Op_ShenandoahCompareAndSwapP: -+ case Op_ShenandoahCompareAndSwapN: -+ case Op_CompareAndSwapB: -+ case Op_CompareAndSwapS: -+ case Op_GetAndSetI: -+ case Op_GetAndSetL: -+ case Op_GetAndSetP: -+ case Op_GetAndSetN: -+ case Op_GetAndAddI: -+ case Op_GetAndAddL: -+ return true; -+ case Op_CompareAndExchangeI: -+ case Op_CompareAndExchangeN: -+ case Op_CompareAndExchangeB: -+ case Op_CompareAndExchangeS: -+ case Op_CompareAndExchangeL: -+ case Op_CompareAndExchangeP: -+ case Op_WeakCompareAndSwapB: -+ case Op_WeakCompareAndSwapS: -+ case Op_WeakCompareAndSwapI: -+ case Op_WeakCompareAndSwapL: -+ case Op_WeakCompareAndSwapP: -+ case Op_WeakCompareAndSwapN: -+ case Op_ShenandoahWeakCompareAndSwapP: -+ case Op_ShenandoahWeakCompareAndSwapN: -+ case Op_ShenandoahCompareAndExchangeP: -+ case Op_ShenandoahCompareAndExchangeN: -+ return maybe_volatile; -+ default: -+ return false; -+ } -+} ++ // some calls to generated routines (arraycopy code) are scheduled ++ // by C2 as runtime calls. if so we can call them using a jr (they ++ // will be in a reachable segment) otherwise we have to use a jalr ++ // which loads the absolute address into a register. ++ address entry = (address)$meth$$method; ++ CodeBlob *cb = CodeCache::find_blob(entry); ++ if (cb != NULL) { ++ address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type)); ++ if (call == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ } else { ++ Label retaddr; ++ __ la(t1, retaddr); ++ __ la(t0, RuntimeAddress(entry)); ++ // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc() ++ __ addi(sp, sp, -2 * wordSize); ++ __ sd(t1, Address(sp, wordSize)); ++ __ jalr(t0); ++ __ bind(retaddr); ++ __ addi(sp, sp, 2 * wordSize); ++ } ++ %} + -+// predicate controlling translation of CAS -+// -+// returns true if CAS needs to use an acquiring load otherwise false -+bool needs_acquiring_load_reserved(const Node *n) -+{ -+ assert(n != NULL && is_CAS(n->Opcode(), true), "expecting a compare and swap"); ++ // using the cr register as the bool result: 0 for success; others failed. ++ enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) %{ ++ MacroAssembler _masm(&cbuf); ++ Register flag = t1; ++ Register oop = as_Register($object$$reg); ++ Register box = as_Register($box$$reg); ++ Register disp_hdr = as_Register($tmp1$$reg); ++ Register tmp = as_Register($tmp2$$reg); ++ Label cont; ++ Label object_has_monitor; + -+ LoadStoreNode* ldst = n->as_LoadStore(); -+ if (n != NULL && is_CAS(n->Opcode(), false)) { -+ assert(ldst != NULL && ldst->trailing_membar() != NULL, "expected trailing membar"); -+ } else { -+ return ldst != NULL && ldst->trailing_membar() != NULL; -+ } -+ // so we can just return true here -+ return true; -+} -+#define __ _masm. ++ assert_different_registers(oop, box, tmp, disp_hdr, t0); + -+// advance declarations for helper functions to convert register -+// indices to register objects ++ // Load markWord from object into displaced_header. ++ __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes())); + -+// the ad file has to provide implementations of certain methods -+// expected by the generic code -+// -+// REQUIRED FUNCTIONALITY ++ // Always do locking in runtime. ++ if (EmitSync & 0x01) { ++ __ mv(flag, 1); ++ return; ++ } + -+//============================================================================= ++ if (UseBiasedLocking && !UseOptoBiasInlining) { ++ __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont, /*slow_case*/NULL, NULL, flag); ++ } + -+// !!!!! Special hack to get all types of calls to specify the byte offset -+// from the start of the call to the point where the return address -+// will point. ++ // Check for existing monitor ++ if ((EmitSync & 0x02) == 0) { ++ __ andi(t0, disp_hdr, markOopDesc::monitor_value); ++ __ bnez(t0, object_has_monitor); ++ } + -+int MachCallStaticJavaNode::ret_addr_offset() -+{ -+ // jal -+ return 1 * NativeInstruction::instruction_size; -+} ++ // Set tmp to be (markWord of object | UNLOCK_VALUE). ++ __ ori(tmp, disp_hdr, markOopDesc::unlocked_value); + -+int MachCallDynamicJavaNode::ret_addr_offset() -+{ -+ return 7 * NativeInstruction::instruction_size; // movptr, jal -+} ++ // Initialize the box. (Must happen before we update the object mark!) ++ __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); + -+int MachCallRuntimeNode::ret_addr_offset() { -+ // for generated stubs the call will be -+ // jal(addr) -+ // or with far branches -+ // jal(trampoline_stub) -+ // for real runtime callouts it will be 11 instructions -+ // see riscv_enc_java_to_runtime -+ // la(t1, retaddr) -> auipc + addi -+ // la(t0, RuntimeAddress(addr)) -> lui + addi + slli + addi + slli + addi -+ // addi(sp, sp, -2 * wordSize) -> addi -+ // sd(t1, Address(sp, wordSize)) -> sd -+ // jalr(t0) -> jalr -+ CodeBlob *cb = CodeCache::find_blob(_entry_point); -+ if (cb != NULL) { -+ return 1 * NativeInstruction::instruction_size; -+ } else { -+ return 11 * NativeInstruction::instruction_size; -+ } -+} ++ // Compare object markWord with an unlocked value (tmp) and if ++ // equal exchange the stack address of our box with object markWord. ++ // On failure disp_hdr contains the possibly locked markWord. ++ __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq, ++ Assembler::rl, /*result*/disp_hdr); ++ __ mv(flag, zr); ++ __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas + -+int MachCallNativeNode::ret_addr_offset() { -+ Unimplemented(); -+ return -1; -+} ++ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); + -+// -+// Compute padding required for nodes which need alignment -+// ++ // If the compare-and-exchange succeeded, then we found an unlocked ++ // object, will have now locked it will continue at label cont ++ // We did not see an unlocked object so try the fast recursive case. + -+// With RVC a call instruction may get 2-byte aligned. -+// The address of the call instruction needs to be 4-byte aligned to -+// ensure that it does not span a cache line so that it can be patched. -+int CallStaticJavaDirectNode::compute_padding(int current_offset) const -+{ -+ // to make sure the address of jal 4-byte aligned. -+ return align_up(current_offset, alignment_required()) - current_offset; -+} ++ // Check if the owner is self by comparing the value in the ++ // markWord of object (disp_hdr) with the stack pointer. ++ __ sub(disp_hdr, disp_hdr, sp); ++ __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place)); ++ // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont, ++ // hence we can store 0 as the displaced header in the box, which indicates that it is a ++ // recursive lock. ++ __ andr(tmp/*==0?*/, disp_hdr, tmp); ++ __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); ++ __ mv(flag, tmp); // we can use the value of tmp as the result here + -+// With RVC a call instruction may get 2-byte aligned. -+// The address of the call instruction needs to be 4-byte aligned to -+// ensure that it does not span a cache line so that it can be patched. -+int CallDynamicJavaDirectNode::compute_padding(int current_offset) const -+{ -+ // skip the movptr in MacroAssembler::ic_call(): -+ // lui + addi + slli + addi + slli + addi -+ // Though movptr() has already 4-byte aligned with or without RVC, -+ // We need to prevent from further changes by explicitly calculating the size. -+ const int movptr_size = 6 * NativeInstruction::instruction_size; -+ current_offset += movptr_size; -+ // to make sure the address of jal 4-byte aligned. -+ return align_up(current_offset, alignment_required()) - current_offset; -+} ++ if ((EmitSync & 0x02) == 0) { ++ __ j(cont); + -+//============================================================================= ++ // Handle existing monitor. ++ __ bind(object_has_monitor); ++ // The object's monitor m is unlocked iff m->owner == NULL, ++ // otherwise m->owner may contain a thread or a stack address. ++ // ++ // Try to CAS m->owner from NULL to current thread. ++ __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value)); ++ __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq, ++ Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected) + -+#ifndef PRODUCT -+void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const { -+ assert_cond(st != NULL); -+ st->print("BREAKPOINT"); -+} -+#endif ++ // Store a non-null value into the box to avoid looking like a re-entrant ++ // lock. The fast-path monitor unlock code checks for ++ // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the ++ // relevant bit set, and also matches ObjectSynchronizer::slow_enter. ++ __ mv(tmp, (address)markOopDesc::unused_mark()); ++ __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); ++ } + -+void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { -+ C2_MacroAssembler _masm(&cbuf); -+ Assembler::CompressibleRegion cr(&_masm); -+ __ ebreak(); -+} ++ __ bind(cont); ++ %} + -+uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const { -+ return MachNode::size(ra_); -+} ++ // using cr flag to indicate the fast_unlock result: 0 for success; others failed. ++ enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) %{ ++ MacroAssembler _masm(&cbuf); ++ Register flag = t1; ++ Register oop = as_Register($object$$reg); ++ Register box = as_Register($box$$reg); ++ Register disp_hdr = as_Register($tmp1$$reg); ++ Register tmp = as_Register($tmp2$$reg); ++ Label cont; ++ Label object_has_monitor; + -+//============================================================================= ++ assert_different_registers(oop, box, tmp, disp_hdr, flag); + -+#ifndef PRODUCT -+ void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { -+ st->print("nop \t# %d bytes pad for loops and calls", _count); -+ } -+#endif ++ // Always do locking in runtime. ++ if (EmitSync & 0x01) { ++ __ mv(flag, 1); ++ return; ++ } + -+ void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { -+ C2_MacroAssembler _masm(&cbuf); -+ Assembler::CompressibleRegion cr(&_masm); // nops shall be 2-byte under RVC for alignment purposes. -+ for (int i = 0; i < _count; i++) { -+ __ nop(); ++ if (UseBiasedLocking && !UseOptoBiasInlining) { ++ __ biased_locking_exit(oop, tmp, cont, flag); + } -+ } + -+ uint MachNopNode::size(PhaseRegAlloc*) const { -+ return _count * (UseRVC ? NativeInstruction::compressed_instruction_size : NativeInstruction::instruction_size); -+ } ++ // Find the lock address and load the displaced header from the stack. ++ __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); + -+//============================================================================= -+const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; ++ // If the displaced header is 0, we have a recursive unlock. ++ __ mv(flag, disp_hdr); ++ __ beqz(disp_hdr, cont); + -+int ConstantTable::calculate_table_base_offset() const { -+ return 0; // absolute addressing, no offset -+} ++ // Handle existing monitor. ++ if ((EmitSync & 0x02) == 0) { ++ __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes())); ++ __ andi(t0, tmp, markOopDesc::monitor_value); ++ __ bnez(t0, object_has_monitor); ++ } + -+bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } -+void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) { -+ ShouldNotReachHere(); -+} ++ // Check if it is still a light weight lock, this is true if we ++ // see the stack address of the basicLock in the markWord of the ++ // object. + -+void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { -+ // Empty encoding -+} ++ __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed, ++ Assembler::rl, /*result*/tmp); ++ __ xorr(flag, box, tmp); // box == tmp if cas succeeds ++ __ j(cont); + -+uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { -+ return 0; -+} ++ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); + -+#ifndef PRODUCT -+void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { -+ assert_cond(st != NULL); -+ st->print("-- \t// MachConstantBaseNode (empty encoding)"); -+} -+#endif ++ // Handle existing monitor. ++ if ((EmitSync & 0x02) == 0) { ++ __ bind(object_has_monitor); ++ STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX); ++ __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor ++ __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); ++ __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); ++ __ xorr(flag, flag, xthread); // Will be 0 if we are the owner. ++ __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions ++ __ bnez(flag, cont); + -+#ifndef PRODUCT -+void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const { -+ assert_cond(st != NULL && ra_ != NULL); -+ Compile* C = ra_->C; ++ __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes())); ++ __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes())); ++ __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0. ++ __ bnez(flag, cont); ++ // need a release store here ++ __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ __ sd(zr, Address(tmp)); // set unowned ++ } + -+ int framesize = C->output()->frame_slots() << LogBytesPerInt; ++ __ bind(cont); ++ %} + -+ if (C->output()->need_stack_bang(framesize)) { -+ st->print("# stack bang size=%d\n\t", framesize); -+ } ++ // arithmetic encodings + -+ st->print("sd fp, [sp, #%d]\n\t", - 2 * wordSize); -+ st->print("sd ra, [sp, #%d]\n\t", - wordSize); -+ if (PreserveFramePointer) { st->print("sub fp, sp, #%d\n\t", 2 * wordSize); } -+ st->print("sub sp, sp, #%d\n\t", framesize); ++ enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{ ++ MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ __ corrected_idivl(dst_reg, src1_reg, src2_reg, false); ++ %} + -+ if (C->stub_function() == NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) { -+ st->print("ld t0, [guard]\n\t"); -+ st->print("membar LoadLoad\n\t"); -+ st->print("ld t1, [xthread, #thread_disarmed_offset]\n\t"); -+ st->print("beq t0, t1, skip\n\t"); -+ st->print("jalr #nmethod_entry_barrier_stub\n\t"); -+ st->print("j skip\n\t"); -+ st->print("guard: int\n\t"); -+ st->print("skip:\n\t"); -+ } -+} -+#endif ++ enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{ ++ MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ __ corrected_idivq(dst_reg, src1_reg, src2_reg, false); ++ %} + -+void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { -+ assert_cond(ra_ != NULL); -+ Compile* C = ra_->C; -+ C2_MacroAssembler _masm(&cbuf); ++ enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{ ++ MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ __ corrected_idivl(dst_reg, src1_reg, src2_reg, true); ++ %} + -+ // n.b. frame size includes space for return pc and fp -+ const int framesize = C->output()->frame_size_in_bytes(); ++ enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{ ++ MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ __ corrected_idivq(dst_reg, src1_reg, src2_reg, true); ++ %} + -+ // insert a nop at the start of the prolog so we can patch in a -+ // branch if we need to invalidate the method later -+ __ nop(); ++ enc_class riscv_enc_tail_call(iRegP jump_target) %{ ++ MacroAssembler _masm(&cbuf); ++ Assembler::CompressibleRegion cr(&_masm); ++ Register target_reg = as_Register($jump_target$$reg); ++ __ jr(target_reg); ++ %} + -+ assert_cond(C != NULL); ++ enc_class riscv_enc_tail_jmp(iRegP jump_target) %{ ++ MacroAssembler _masm(&cbuf); ++ Assembler::CompressibleRegion cr(&_masm); ++ Register target_reg = as_Register($jump_target$$reg); ++ // exception oop should be in x10 ++ // ret addr has been popped into ra ++ // callee expects it in x13 ++ __ mv(x13, ra); ++ __ jr(target_reg); ++ %} + -+ if (C->clinit_barrier_on_entry()) { -+ assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started"); ++ enc_class riscv_enc_rethrow() %{ ++ MacroAssembler _masm(&cbuf); ++ __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub())); ++ %} + -+ Label L_skip_barrier; ++ enc_class riscv_enc_ret() %{ ++ MacroAssembler _masm(&cbuf); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ ret(); ++ %} + -+ __ mov_metadata(t1, C->method()->holder()->constant_encoding()); -+ __ clinit_barrier(t1, t0, &L_skip_barrier); -+ __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); -+ __ bind(L_skip_barrier); -+ } ++%} + -+ int bangsize = C->output()->bang_size_in_bytes(); -+ if (C->output()->need_stack_bang(bangsize)) { -+ __ generate_stack_overflow_check(bangsize); -+ } ++//----------FRAME-------------------------------------------------------------- ++// Definition of frame structure and management information. ++// ++// S T A C K L A Y O U T Allocators stack-slot number ++// | (to get allocators register number ++// G Owned by | | v add OptoReg::stack0()) ++// r CALLER | | ++// o | +--------+ pad to even-align allocators stack-slot ++// w V | pad0 | numbers; owned by CALLER ++// t -----------+--------+----> Matcher::_in_arg_limit, unaligned ++// h ^ | in | 5 ++// | | args | 4 Holes in incoming args owned by SELF ++// | | | | 3 ++// | | +--------+ ++// V | | old out| Empty on Intel, window on Sparc ++// | old |preserve| Must be even aligned. ++// | SP-+--------+----> Matcher::_old_SP, even aligned ++// | | in | 3 area for Intel ret address ++// Owned by |preserve| Empty on Sparc. ++// SELF +--------+ ++// | | pad2 | 2 pad to align old SP ++// | +--------+ 1 ++// | | locks | 0 ++// | +--------+----> OptoReg::stack0(), even aligned ++// | | pad1 | 11 pad to align new SP ++// | +--------+ ++// | | | 10 ++// | | spills | 9 spills ++// V | | 8 (pad0 slot for callee) ++// -----------+--------+----> Matcher::_out_arg_limit, unaligned ++// ^ | out | 7 ++// | | args | 6 Holes in outgoing args owned by CALLEE ++// Owned by +--------+ ++// CALLEE | new out| 6 Empty on Intel, window on Sparc ++// | new |preserve| Must be even-aligned. ++// | SP-+--------+----> Matcher::_new_SP, even aligned ++// | | | ++// ++// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is ++// known from SELF's arguments and the Java calling convention. ++// Region 6-7 is determined per call site. ++// Note 2: If the calling convention leaves holes in the incoming argument ++// area, those holes are owned by SELF. Holes in the outgoing area ++// are owned by the CALLEE. Holes should not be nessecary in the ++// incoming area, as the Java calling convention is completely under ++// the control of the AD file. Doubles can be sorted and packed to ++// avoid holes. Holes in the outgoing arguments may be nessecary for ++// varargs C calling conventions. ++// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is ++// even aligned with pad0 as needed. ++// Region 6 is even aligned. Region 6-7 is NOT even aligned; ++// (the latter is true on Intel but is it false on RISCV?) ++// region 6-11 is even aligned; it may be padded out more so that ++// the region from SP to FP meets the minimum stack alignment. ++// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack ++// alignment. Region 11, pad1, may be dynamically extended so that ++// SP meets the minimum alignment. + -+ __ build_frame(framesize); ++frame %{ ++ // What direction does stack grow in (assumed to be same for C & Java) ++ stack_direction(TOWARDS_LOW); + -+ if (C->stub_function() == NULL) { -+ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ bs->nmethod_entry_barrier(&_masm); -+ } ++ // These three registers define part of the calling convention ++ // between compiled code and the interpreter. + -+ if (VerifyStackAtCalls) { -+ Unimplemented(); -+ } ++ // Inline Cache Register or methodOop for I2C. ++ inline_cache_reg(R31); + -+ C->output()->set_frame_complete(cbuf.insts_size()); ++ // Method Oop Register when calling interpreter. ++ interpreter_method_oop_reg(R31); + -+ if (C->has_mach_constant_base_node()) { -+ // NOTE: We set the table base offset here because users might be -+ // emitted before MachConstantBaseNode. -+ ConstantTable& constant_table = C->output()->constant_table(); -+ constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); -+ } -+} ++ // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] ++ cisc_spilling_operand_name(indOffset); + -+uint MachPrologNode::size(PhaseRegAlloc* ra_) const -+{ -+ assert_cond(ra_ != NULL); -+ return MachNode::size(ra_); // too many variables; just compute it -+ // the hard way -+} ++ // Number of stack slots consumed by locking an object ++ // generate Compile::sync_stack_slots ++ // VMRegImpl::slots_per_word = wordSize / stack_slot_size = 8 / 4 = 2 ++ sync_stack_slots(1 * VMRegImpl::slots_per_word); + -+int MachPrologNode::reloc() const -+{ -+ return 0; -+} ++ // Compiled code's Frame Pointer ++ frame_pointer(R2); + -+//============================================================================= ++ // Interpreter stores its frame pointer in a register which is ++ // stored to the stack by I2CAdaptors. ++ // I2CAdaptors convert from interpreted java to compiled java. ++ interpreter_frame_pointer(R8); + -+#ifndef PRODUCT -+void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const { -+ assert_cond(st != NULL && ra_ != NULL); -+ Compile* C = ra_->C; -+ assert_cond(C != NULL); -+ int framesize = C->output()->frame_size_in_bytes(); ++ // Stack alignment requirement ++ stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes) + -+ st->print("# pop frame %d\n\t", framesize); ++ // Number of stack slots between incoming argument block and the start of ++ // a new frame. The PROLOG must add this many slots to the stack. The ++ // EPILOG must remove this many slots. RISC-V needs two slots for ++ // return address and fp. ++ in_preserve_stack_slots(2 * VMRegImpl::slots_per_word); + -+ if (framesize == 0) { -+ st->print("ld ra, [sp,#%d]\n\t", (2 * wordSize)); -+ st->print("ld fp, [sp,#%d]\n\t", (3 * wordSize)); -+ st->print("add sp, sp, #%d\n\t", (2 * wordSize)); -+ } else { -+ st->print("add sp, sp, #%d\n\t", framesize); -+ st->print("ld ra, [sp,#%d]\n\t", - 2 * wordSize); -+ st->print("ld fp, [sp,#%d]\n\t", - wordSize); -+ } ++ // Number of outgoing stack slots killed above the out_preserve_stack_slots ++ // for calls to C. Supports the var-args backing area for register parms. ++ varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes / BytesPerInt); + -+ if (do_polling() && C->is_method_compilation()) { -+ st->print("# test polling word\n\t"); -+ st->print("ld t0, [xthread,#%d]\n\t", in_bytes(JavaThread::polling_word_offset())); -+ st->print("bgtu sp, t0, #slow_path"); -+ } -+} -+#endif ++ // The after-PROLOG location of the return address. Location of ++ // return address specifies a type (REG or STACK) and a number ++ // representing the register number (i.e. - use a register name) or ++ // stack slot. ++ // Ret Addr is on stack in slot 0 if no locks or verification or alignment. ++ // Otherwise, it is above the locks and verification slot and alignment word ++ // TODO this may well be correct but need to check why that - 2 is there ++ // ppc port uses 0 but we definitely need to allow for fixed_slots ++ // which folds in the space used for monitors ++ return_addr(STACK - 2 + ++ align_up((Compile::current()->in_preserve_stack_slots() + ++ Compile::current()->fixed_slots()), ++ stack_alignment_in_slots())); + -+void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { -+ assert_cond(ra_ != NULL); -+ Compile* C = ra_->C; -+ C2_MacroAssembler _masm(&cbuf); -+ assert_cond(C != NULL); -+ int framesize = C->output()->frame_size_in_bytes(); ++ // Body of function which returns an integer array locating ++ // arguments either in registers or in stack slots. Passed an array ++ // of ideal registers called "sig" and a "length" count. Stack-slot ++ // offsets are based on outgoing arguments, i.e. a CALLER setting up ++ // arguments for a CALLEE. Incoming stack arguments are ++ // automatically biased by the preserve_stack_slots field above. + -+ __ remove_frame(framesize); ++ calling_convention ++ %{ ++ // No difference between ingoing/outgoing just pass false ++ SharedRuntime::java_calling_convention(sig_bt, regs, length, false); ++ %} + -+ if (StackReservedPages > 0 && C->has_reserved_stack_access()) { -+ __ reserved_stack_check(); -+ } ++ c_calling_convention ++ %{ ++ // This is obviously always outgoing ++ (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length); ++ %} + -+ if (do_polling() && C->is_method_compilation()) { -+ Label dummy_label; -+ Label* code_stub = &dummy_label; -+ if (!C->output()->in_scratch_emit_size()) { -+ code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset()); -+ } -+ __ relocate(relocInfo::poll_return_type); -+ __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */); -+ } -+} ++ // Location of compiled Java return values. Same as C for now. ++ return_value ++ %{ ++ assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, ++ "only return normal values"); + -+uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { -+ assert_cond(ra_ != NULL); -+ // Variable size. Determine dynamically. -+ return MachNode::size(ra_); -+} ++ static const int lo[Op_RegL + 1] = { // enum name ++ 0, // Op_Node ++ 0, // Op_Set ++ R10_num, // Op_RegN ++ R10_num, // Op_RegI ++ R10_num, // Op_RegP ++ F10_num, // Op_RegF ++ F10_num, // Op_RegD ++ R10_num // Op_RegL ++ }; + -+int MachEpilogNode::reloc() const { -+ // Return number of relocatable values contained in this instruction. -+ return 1; // 1 for polling page. -+} -+const Pipeline * MachEpilogNode::pipeline() const { -+ return MachNode::pipeline_class(); -+} ++ static const int hi[Op_RegL + 1] = { // enum name ++ 0, // Op_Node ++ 0, // Op_Set ++ OptoReg::Bad, // Op_RegN ++ OptoReg::Bad, // Op_RegI ++ R10_H_num, // Op_RegP ++ OptoReg::Bad, // Op_RegF ++ F10_H_num, // Op_RegD ++ R10_H_num // Op_RegL ++ }; + -+//============================================================================= ++ return OptoRegPair(hi[ideal_reg], lo[ideal_reg]); ++ %} ++%} + -+// Figure out which register class each belongs in: rc_int, rc_float or -+// rc_stack. -+enum RC { rc_bad, rc_int, rc_float, rc_vector, rc_stack }; ++//----------ATTRIBUTES--------------------------------------------------------- ++//----------Operand Attributes------------------------------------------------- ++op_attrib op_cost(1); // Required cost attribute + -+static enum RC rc_class(OptoReg::Name reg) { ++//----------Instruction Attributes--------------------------------------------- ++ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute ++ins_attrib ins_size(32); // Required size attribute (in bits) ++ins_attrib ins_short_branch(0); // Required flag: is this instruction ++ // a non-matching short branch variant ++ // of some long branch? ++ins_attrib ins_alignment(4); // Required alignment attribute (must ++ // be a power of 2) specifies the ++ // alignment that some part of the ++ // instruction (not necessarily the ++ // start) requires. If > 1, a ++ // compute_padding() function must be ++ // provided for the instruction + -+ if (reg == OptoReg::Bad) { -+ return rc_bad; -+ } ++//----------OPERANDS----------------------------------------------------------- ++// Operand definitions must precede instruction definitions for correct parsing ++// in the ADLC because operands constitute user defined types which are used in ++// instruction definitions. + -+ // we have 30 int registers * 2 halves -+ // (t0 and t1 are omitted) -+ int slots_of_int_registers = RegisterImpl::max_slots_per_register * (RegisterImpl::number_of_registers - 2); -+ if (reg < slots_of_int_registers) { -+ return rc_int; -+ } ++//----------Simple Operands---------------------------------------------------- + -+ // we have 32 float register * 2 halves -+ int slots_of_float_registers = FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers; -+ if (reg < slots_of_int_registers + slots_of_float_registers) { -+ return rc_float; -+ } ++// Integer operands 32 bit ++// 32 bit immediate ++operand immI() ++%{ ++ match(ConI); + -+ // we have 32 vector register * 4 halves -+ int slots_of_vector_registers = VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers; -+ if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_vector_registers) { -+ return rc_vector; -+ } ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ // Between vector regs & stack is the flags regs. -+ assert(OptoReg::is_stack(reg), "blow up if spilling flags"); ++// 32 bit zero ++operand immI0() ++%{ ++ predicate(n->get_int() == 0); ++ match(ConI); + -+ return rc_stack; -+} ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const { -+ assert_cond(ra_ != NULL); -+ Compile* C = ra_->C; ++// 32 bit unit increment ++operand immI_1() ++%{ ++ predicate(n->get_int() == 1); ++ match(ConI); + -+ // Get registers to move. -+ OptoReg::Name src_hi = ra_->get_reg_second(in(1)); -+ OptoReg::Name src_lo = ra_->get_reg_first(in(1)); -+ OptoReg::Name dst_hi = ra_->get_reg_second(this); -+ OptoReg::Name dst_lo = ra_->get_reg_first(this); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ enum RC src_hi_rc = rc_class(src_hi); -+ enum RC src_lo_rc = rc_class(src_lo); -+ enum RC dst_hi_rc = rc_class(dst_hi); -+ enum RC dst_lo_rc = rc_class(dst_lo); ++// 32 bit unit decrement ++operand immI_M1() ++%{ ++ predicate(n->get_int() == -1); ++ match(ConI); + -+ assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register"); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ if (src_hi != OptoReg::Bad) { -+ assert((src_lo & 1) == 0 && src_lo + 1 == src_hi && -+ (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi, -+ "expected aligned-adjacent pairs"); -+ } ++// Unsigned Integer Immediate: 6-bit int, greater than 32 ++operand uimmI6_ge32() %{ ++ predicate(((unsigned int)(n->get_int()) < 64) && (n->get_int() >= 32)); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ if (src_lo == dst_lo && src_hi == dst_hi) { -+ return 0; // Self copy, no move. -+ } ++operand immI_le_4() ++%{ ++ predicate(n->get_int() <= 4); ++ match(ConI); + -+ bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi && -+ (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi; -+ int src_offset = ra_->reg2offset(src_lo); -+ int dst_offset = ra_->reg2offset(dst_lo); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ if (bottom_type()->isa_vect() != NULL) { -+ uint ireg = ideal_reg(); -+ if (ireg == Op_VecA && cbuf) { -+ C2_MacroAssembler _masm(cbuf); -+ Assembler::CompressibleRegion cr(&_masm); -+ int vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); -+ if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { -+ // stack to stack -+ __ spill_copy_vector_stack_to_stack(src_offset, dst_offset, -+ vector_reg_size_in_bytes); -+ } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_stack) { -+ // vpr to stack -+ __ spill(as_VectorRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo)); -+ } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vector) { -+ // stack to vpr -+ __ unspill(as_VectorRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo)); -+ } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_vector) { -+ // vpr to vpr -+ __ vmv1r_v(as_VectorRegister(Matcher::_regEncode[dst_lo]), as_VectorRegister(Matcher::_regEncode[src_lo])); -+ } else { -+ ShouldNotReachHere(); -+ } -+ } -+ } else if (cbuf != NULL) { -+ C2_MacroAssembler _masm(cbuf); -+ Assembler::CompressibleRegion cr(&_masm); -+ switch (src_lo_rc) { -+ case rc_int: -+ if (dst_lo_rc == rc_int) { // gpr --> gpr copy -+ if (!is64 && this->ideal_reg() != Op_RegI) { // zero extended for narrow oop or klass -+ __ zero_extend(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo]), 32); -+ } else { -+ __ mv(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo])); -+ } -+ } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy -+ if (is64) { -+ __ fmv_d_x(as_FloatRegister(Matcher::_regEncode[dst_lo]), -+ as_Register(Matcher::_regEncode[src_lo])); -+ } else { -+ __ fmv_w_x(as_FloatRegister(Matcher::_regEncode[dst_lo]), -+ as_Register(Matcher::_regEncode[src_lo])); -+ } -+ } else { // gpr --> stack spill -+ assert(dst_lo_rc == rc_stack, "spill to bad register class"); -+ __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset); -+ } -+ break; -+ case rc_float: -+ if (dst_lo_rc == rc_int) { // fpr --> gpr copy -+ if (is64) { -+ __ fmv_x_d(as_Register(Matcher::_regEncode[dst_lo]), -+ as_FloatRegister(Matcher::_regEncode[src_lo])); -+ } else { -+ __ fmv_x_w(as_Register(Matcher::_regEncode[dst_lo]), -+ as_FloatRegister(Matcher::_regEncode[src_lo])); -+ } -+ } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy -+ if (is64) { -+ __ fmv_d(as_FloatRegister(Matcher::_regEncode[dst_lo]), -+ as_FloatRegister(Matcher::_regEncode[src_lo])); -+ } else { -+ __ fmv_s(as_FloatRegister(Matcher::_regEncode[dst_lo]), -+ as_FloatRegister(Matcher::_regEncode[src_lo])); -+ } -+ } else { // fpr --> stack spill -+ assert(dst_lo_rc == rc_stack, "spill to bad register class"); -+ __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]), -+ is64, dst_offset); -+ } -+ break; -+ case rc_stack: -+ if (dst_lo_rc == rc_int) { // stack --> gpr load -+ if (this->ideal_reg() == Op_RegI) { -+ __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset); -+ } else { // // zero extended for narrow oop or klass -+ __ unspillu(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset); -+ } -+ } else if (dst_lo_rc == rc_float) { // stack --> fpr load -+ __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]), -+ is64, src_offset); -+ } else { // stack --> stack copy -+ assert(dst_lo_rc == rc_stack, "spill to bad register class"); -+ if (this->ideal_reg() == Op_RegI) { -+ __ unspill(t0, is64, src_offset); -+ } else { // zero extended for narrow oop or klass -+ __ unspillu(t0, is64, src_offset); -+ } -+ __ spill(t0, is64, dst_offset); -+ } -+ break; -+ default: -+ ShouldNotReachHere(); -+ } -+ } ++operand immI_16() ++%{ ++ predicate(n->get_int() == 16); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ if (st != NULL) { -+ st->print("spill "); -+ if (src_lo_rc == rc_stack) { -+ st->print("[sp, #%d] -> ", src_offset); -+ } else { -+ st->print("%s -> ", Matcher::regName[src_lo]); -+ } -+ if (dst_lo_rc == rc_stack) { -+ st->print("[sp, #%d]", dst_offset); -+ } else { -+ st->print("%s", Matcher::regName[dst_lo]); -+ } -+ if (bottom_type()->isa_vect() != NULL) { -+ int vsize = 0; -+ if (ideal_reg() == Op_VecA) { -+ vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8; -+ } else { -+ ShouldNotReachHere(); -+ } -+ st->print("\t# vector spill size = %d", vsize); -+ } else { -+ st->print("\t# spill size = %d", is64 ? 64 : 32); -+ } -+ } ++operand immI_24() ++%{ ++ predicate(n->get_int() == 24); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ return 0; -+} ++operand immI_31() ++%{ ++ predicate(n->get_int() == 31); ++ match(ConI); + -+#ifndef PRODUCT -+void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const { -+ if (ra_ == NULL) { -+ st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx); -+ } else { -+ implementation(NULL, ra_, false, st); -+ } -+} -+#endif ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { -+ implementation(&cbuf, ra_, false, NULL); -+} ++operand immI_63() ++%{ ++ predicate(n->get_int() == 63); ++ match(ConI); + -+uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { -+ return MachNode::size(ra_); -+} ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+//============================================================================= ++// 32 bit integer valid for add immediate ++operand immIAdd() ++%{ ++ predicate(Assembler::operand_valid_for_add_immediate((int64_t)n->get_int())); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+#ifndef PRODUCT -+void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const { -+ assert_cond(ra_ != NULL && st != NULL); -+ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); -+ int reg = ra_->get_reg_first(this); -+ st->print("add %s, sp, #%d\t# box lock", -+ Matcher::regName[reg], offset); -+} -+#endif ++// 32 bit integer valid for sub immediate ++operand immISub() ++%{ ++ predicate(Assembler::operand_valid_for_add_immediate(-(int64_t)n->get_int())); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { -+ C2_MacroAssembler _masm(&cbuf); ++// 5 bit signed value. ++operand immI5() ++%{ ++ predicate(n->get_int() <= 15 && n->get_int() >= -16); ++ match(ConI); + -+ assert_cond(ra_ != NULL); -+ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); -+ int reg = ra_->get_encode(this); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ if (is_imm_in_range(offset, 12, 0)) { -+ __ addi(as_Register(reg), sp, offset); -+ } else if (is_imm_in_range(offset, 32, 0)) { -+ __ li32(t0, offset); -+ __ add(as_Register(reg), sp, t0); -+ } else { -+ ShouldNotReachHere(); -+ } -+} ++// 5 bit signed value (simm5) ++operand immL5() ++%{ ++ predicate(n->get_long() <= 15 && n->get_long() >= -16); ++ match(ConL); + -+uint BoxLockNode::size(PhaseRegAlloc *ra_) const { -+ // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_). -+ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ if (is_imm_in_range(offset, 12, 0)) { -+ return NativeInstruction::instruction_size; -+ } else { -+ return 3 * NativeInstruction::instruction_size; // lui + addiw + add; -+ } -+} ++// Integer operands 64 bit ++// 64 bit immediate ++operand immL() ++%{ ++ match(ConL); + -+//============================================================================= ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+#ifndef PRODUCT -+void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const -+{ -+ assert_cond(st != NULL); -+ st->print_cr("# MachUEPNode"); -+ if (UseCompressedClassPointers) { -+ st->print_cr("\tlwu t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass"); -+ if (CompressedKlassPointers::shift() != 0) { -+ st->print_cr("\tdecode_klass_not_null t0, t0"); -+ } -+ } else { -+ st->print_cr("\tld t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass"); -+ } -+ st->print_cr("\tbeq t0, t1, ic_hit"); -+ st->print_cr("\tj, SharedRuntime::_ic_miss_stub\t # Inline cache check"); -+ st->print_cr("\tic_hit:"); -+} -+#endif ++// 64 bit zero ++operand immL0() ++%{ ++ predicate(n->get_long() == 0); ++ match(ConL); + -+void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const -+{ -+ // This is the unverified entry point. -+ C2_MacroAssembler _masm(&cbuf); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ Label skip; -+ __ cmp_klass(j_rarg0, t1, t0, skip); -+ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); -+ __ bind(skip); -+} ++// Pointer operands ++// Pointer Immediate ++operand immP() ++%{ ++ match(ConP); + -+uint MachUEPNode::size(PhaseRegAlloc* ra_) const -+{ -+ assert_cond(ra_ != NULL); -+ return MachNode::size(ra_); -+} ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+// REQUIRED EMIT CODE ++// NULL Pointer Immediate ++operand immP0() ++%{ ++ predicate(n->get_ptr() == 0); ++ match(ConP); + -+//============================================================================= ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+// Emit exception handler code. -+int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) -+{ -+ // la_patchable t0, #exception_blob_entry_point -+ // jr (offset)t0 -+ // or -+ // j #exception_blob_entry_point -+ // Note that the code buffer's insts_mark is always relative to insts. -+ // That's why we must use the macroassembler to generate a handler. -+ C2_MacroAssembler _masm(&cbuf); -+ address base = __ start_a_stub(size_exception_handler()); -+ if (base == NULL) { -+ ciEnv::current()->record_failure("CodeCache is full"); -+ return 0; // CodeBuffer::expand failed -+ } -+ int offset = __ offset(); -+ __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); -+ assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); -+ __ end_a_stub(); -+ return offset; -+} ++// Pointer Immediate One ++// this is used in object initialization (initial object header) ++operand immP_1() ++%{ ++ predicate(n->get_ptr() == 1); ++ match(ConP); + -+// Emit deopt handler code. -+int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) -+{ -+ // Note that the code buffer's insts_mark is always relative to insts. -+ // That's why we must use the macroassembler to generate a handler. -+ C2_MacroAssembler _masm(&cbuf); -+ address base = __ start_a_stub(size_deopt_handler()); -+ if (base == NULL) { -+ ciEnv::current()->record_failure("CodeCache is full"); -+ return 0; // CodeBuffer::expand failed -+ } -+ int offset = __ offset(); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ __ auipc(ra, 0); -+ __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); ++// Polling Page Pointer Immediate ++operand immPollPage() ++%{ ++ predicate((address)n->get_ptr() == os::get_polling_page()); ++ match(ConP); + -+ assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); -+ __ end_a_stub(); -+ return offset; ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+} -+// REQUIRED MATCHER CODE ++// Card Table Byte Map Base ++operand immByteMapBase() ++%{ ++ // Get base of card map ++ predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) && ++ (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base()); ++ match(ConP); + -+//============================================================================= ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+const bool Matcher::match_rule_supported(int opcode) { -+ if (!has_match_rule(opcode)) { -+ return false; -+ } ++// Int Immediate: low 16-bit mask ++operand immI_16bits() ++%{ ++ predicate(n->get_int() == 0xFFFF); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ switch (opcode) { -+ case Op_CacheWB: // fall through -+ case Op_CacheWBPreSync: // fall through -+ case Op_CacheWBPostSync: -+ if (!VM_Version::supports_data_cache_line_flush()) { -+ return false; -+ } -+ break; ++operand immIpowerOf2() %{ ++ predicate(is_power_of_2((juint)(n->get_int()))); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ case Op_StrCompressedCopy: // fall through -+ case Op_StrInflatedCopy: // fall through -+ case Op_CountPositives: -+ return UseRVV; ++// Long Immediate: low 32-bit mask ++operand immL_32bits() ++%{ ++ predicate(n->get_long() == 0xFFFFFFFFL); ++ match(ConL); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ case Op_EncodeISOArray: -+ return UseRVV && SpecialEncodeISOArray; ++// 64 bit unit decrement ++operand immL_M1() ++%{ ++ predicate(n->get_long() == -1); ++ match(ConL); + -+ case Op_PopCountI: -+ case Op_PopCountL: -+ return UsePopCountInstruction; ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ case Op_RotateRight: -+ case Op_RotateLeft: -+ case Op_CountLeadingZerosI: -+ case Op_CountLeadingZerosL: -+ case Op_CountTrailingZerosI: -+ case Op_CountTrailingZerosL: -+ return UseRVB; -+ } + -+ return true; // Per default match rules are supported. -+} ++// 32 bit offset of pc in thread anchor + -+// Identify extra cases that we might want to provide match rules for vector nodes and -+// other intrinsics guarded with vector length (vlen) and element type (bt). -+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { -+ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { -+ return false; -+ } ++operand immL_pc_off() ++%{ ++ predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) + ++ in_bytes(JavaFrameAnchor::last_Java_pc_offset())); ++ match(ConL); + -+ return op_vec_supported(opcode); -+} ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { -+ return false; -+} ++// 64 bit integer valid for add immediate ++operand immLAdd() ++%{ ++ predicate(Assembler::operand_valid_for_add_immediate(n->get_long())); ++ match(ConL); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+const RegMask* Matcher::predicate_reg_mask(void) { -+ return NULL; -+} ++// 64 bit integer valid for sub immediate ++operand immLSub() ++%{ ++ predicate(Assembler::operand_valid_for_add_immediate(-(n->get_long()))); ++ match(ConL); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) { -+ return NULL; -+} ++// Narrow pointer operands ++// Narrow Pointer Immediate ++operand immN() ++%{ ++ match(ConN); + -+// Vector calling convention not yet implemented. -+const bool Matcher::supports_vector_calling_convention(void) { -+ return false; -+} ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+OptoRegPair Matcher::vector_return_value(uint ideal_reg) { -+ Unimplemented(); -+ return OptoRegPair(0, 0); -+} ++// Narrow NULL Pointer Immediate ++operand immN0() ++%{ ++ predicate(n->get_narrowcon() == 0); ++ match(ConN); + -+// Is this branch offset short enough that a short branch can be used? -+// -+// NOTE: If the platform does not provide any short branch variants, then -+// this method should return false for offset 0. -+// |---label(L1)-----| -+// |-----------------| -+// |-----------------|----------eq: float------------------- -+// |-----------------| // far_cmpD_branch | cmpD_branch -+// |------- ---------| feq; | feq; -+// |-far_cmpD_branch-| beqz done; | bnez L; -+// |-----------------| j L; | -+// |-----------------| bind(done); | -+// |-----------------|-------------------------------------- -+// |-----------------| // so shortBrSize = br_size - 4; -+// |-----------------| // so offs = offset - shortBrSize + 4; -+// |---label(L2)-----| -+bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { -+ // The passed offset is relative to address of the branch. -+ int shortBrSize = br_size - 4; -+ int offs = offset - shortBrSize + 4; -+ return (-4096 <= offs && offs < 4096); -+} ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+// Vector width in bytes. -+const int Matcher::vector_width_in_bytes(BasicType bt) { -+ if (UseRVV) { -+ // The MaxVectorSize should have been set by detecting RVV max vector register size when check UseRVV. -+ // MaxVectorSize == VM_Version::_initial_vector_length -+ return MaxVectorSize; -+ } -+ return 0; -+} ++operand immNKlass() ++%{ ++ match(ConNKlass); + -+// Limits on vector size (number of elements) loaded into vector. -+const int Matcher::max_vector_size(const BasicType bt) { -+ return vector_width_in_bytes(bt) / type2aelembytes(bt); -+} -+const int Matcher::min_vector_size(const BasicType bt) { -+ return max_vector_size(bt); -+} ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+// Vector ideal reg. -+const uint Matcher::vector_ideal_reg(int len) { -+ assert(MaxVectorSize >= len, ""); -+ if (UseRVV) { -+ return Op_VecA; -+ } ++// Float and Double operands ++// Double Immediate ++operand immD() ++%{ ++ match(ConD); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+ ShouldNotReachHere(); -+ return 0; -+} ++// Double Immediate: +0.0d ++operand immD0() ++%{ ++ predicate(jlong_cast(n->getd()) == 0); ++ match(ConD); + -+const int Matcher::scalable_vector_reg_size(const BasicType bt) { -+ return Matcher::max_vector_size(bt); -+} ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) { -+ ShouldNotReachHere(); // generic vector operands not supported -+ return NULL; -+} ++// Float Immediate ++operand immF() ++%{ ++ match(ConF); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+bool Matcher::is_reg2reg_move(MachNode* m) { -+ ShouldNotReachHere(); // generic vector operands not supported -+ return false; -+} ++// Float Immediate: +0.0f. ++operand immF0() ++%{ ++ predicate(jint_cast(n->getf()) == 0); ++ match(ConF); + -+bool Matcher::is_generic_vector(MachOper* opnd) { -+ ShouldNotReachHere(); // generic vector operands not supported -+ return false; -+} ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+// Return whether or not this register is ever used as an argument. -+// This function is used on startup to build the trampoline stubs in -+// generateOptoStub. Registers not mentioned will be killed by the VM -+// call in the trampoline, and arguments in those registers not be -+// available to the callee. -+bool Matcher::can_be_java_arg(int reg) -+{ -+ return -+ reg == R10_num || reg == R10_H_num || -+ reg == R11_num || reg == R11_H_num || -+ reg == R12_num || reg == R12_H_num || -+ reg == R13_num || reg == R13_H_num || -+ reg == R14_num || reg == R14_H_num || -+ reg == R15_num || reg == R15_H_num || -+ reg == R16_num || reg == R16_H_num || -+ reg == R17_num || reg == R17_H_num || -+ reg == F10_num || reg == F10_H_num || -+ reg == F11_num || reg == F11_H_num || -+ reg == F12_num || reg == F12_H_num || -+ reg == F13_num || reg == F13_H_num || -+ reg == F14_num || reg == F14_H_num || -+ reg == F15_num || reg == F15_H_num || -+ reg == F16_num || reg == F16_H_num || -+ reg == F17_num || reg == F17_H_num; -+} ++operand immIOffset() ++%{ ++ predicate(is_imm_in_range(n->get_int(), 12, 0)); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+bool Matcher::is_spillable_arg(int reg) -+{ -+ return can_be_java_arg(reg); -+} ++operand immLOffset() ++%{ ++ predicate(is_imm_in_range(n->get_long(), 12, 0)); ++ match(ConL); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+uint Matcher::int_pressure_limit() -+{ -+ // A derived pointer is live at CallNode and then is flagged by RA -+ // as a spilled LRG. Spilling heuristics(Spill-USE) explicitly skip -+ // derived pointers and lastly fail to spill after reaching maximum -+ // number of iterations. Lowering the default pressure threshold to -+ // (_NO_SPECIAL_REG32_mask.Size() minus 1) forces CallNode to become -+ // a high register pressure area of the code so that split_DEF can -+ // generate DefinitionSpillCopy for the derived pointer. -+ uint default_int_pressure_threshold = _NO_SPECIAL_REG32_mask.Size() - 1; -+ if (!PreserveFramePointer) { -+ // When PreserveFramePointer is off, frame pointer is allocatable, -+ // but different from other SOC registers, it is excluded from -+ // fatproj's mask because its save type is No-Save. Decrease 1 to -+ // ensure high pressure at fatproj when PreserveFramePointer is off. -+ // See check_pressure_at_fatproj(). -+ default_int_pressure_threshold--; -+ } -+ return (INTPRESSURE == -1) ? default_int_pressure_threshold : INTPRESSURE; -+} -+ -+uint Matcher::float_pressure_limit() -+{ -+ // _FLOAT_REG_mask is generated by adlc from the float_reg register class. -+ return (FLOATPRESSURE == -1) ? _FLOAT_REG_mask.Size() : FLOATPRESSURE; -+} ++// Scale values ++operand immIScale() ++%{ ++ predicate(1 <= n->get_int() && (n->get_int() <= 3)); ++ match(ConI); + -+bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) { -+ return false; -+} ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+RegMask Matcher::divI_proj_mask() { -+ ShouldNotReachHere(); -+ return RegMask(); -+} ++// Integer 32 bit Register Operands ++operand iRegI() ++%{ ++ constraint(ALLOC_IN_RC(any_reg32)); ++ match(RegI); ++ match(iRegINoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+// Register for MODI projection of divmodI. -+RegMask Matcher::modI_proj_mask() { -+ ShouldNotReachHere(); -+ return RegMask(); -+} ++// Integer 32 bit Register not Special ++operand iRegINoSp() ++%{ ++ constraint(ALLOC_IN_RC(no_special_reg32)); ++ match(RegI); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+// Register for DIVL projection of divmodL. -+RegMask Matcher::divL_proj_mask() { -+ ShouldNotReachHere(); -+ return RegMask(); -+} ++// Register R10 only ++operand iRegI_R10() ++%{ ++ constraint(ALLOC_IN_RC(int_r10_reg)); ++ match(RegI); ++ match(iRegINoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+// Register for MODL projection of divmodL. -+RegMask Matcher::modL_proj_mask() { -+ ShouldNotReachHere(); -+ return RegMask(); -+} ++// Register R12 only ++operand iRegI_R12() ++%{ ++ constraint(ALLOC_IN_RC(int_r12_reg)); ++ match(RegI); ++ match(iRegINoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+const RegMask Matcher::method_handle_invoke_SP_save_mask() { -+ return FP_REG_mask(); -+} ++// Register R13 only ++operand iRegI_R13() ++%{ ++ constraint(ALLOC_IN_RC(int_r13_reg)); ++ match(RegI); ++ match(iRegINoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+bool size_fits_all_mem_uses(AddPNode* addp, int shift) { -+ assert_cond(addp != NULL); -+ for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) { -+ Node* u = addp->fast_out(i); -+ if (u != NULL && u->is_Mem()) { -+ int opsize = u->as_Mem()->memory_size(); -+ assert(opsize > 0, "unexpected memory operand size"); -+ if (u->as_Mem()->memory_size() != (1 << shift)) { -+ return false; -+ } -+ } -+ } -+ return true; -+} ++// Register R14 only ++operand iRegI_R14() ++%{ ++ constraint(ALLOC_IN_RC(int_r14_reg)); ++ match(RegI); ++ match(iRegINoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+// Should the Matcher clone input 'm' of node 'n'? -+bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { -+ assert_cond(m != NULL); -+ if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) -+ mstack.push(m, Visit); // m = ShiftCntV -+ return true; -+ } -+ return false; -+} ++// Integer 64 bit Register Operands ++operand iRegL() ++%{ ++ constraint(ALLOC_IN_RC(any_reg)); ++ match(RegL); ++ match(iRegLNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+// Should the Matcher clone shifts on addressing modes, expecting them -+// to be subsumed into complex addressing expressions or compute them -+// into registers? -+bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { -+ return clone_base_plus_offset_address(m, mstack, address_visited); -+} ++// Integer 64 bit Register not Special ++operand iRegLNoSp() ++%{ ++ constraint(ALLOC_IN_RC(no_special_reg)); ++ match(RegL); ++ match(iRegL_R10); ++ format %{ %} ++ interface(REG_INTER); ++%} + ++// Long 64 bit Register R28 only ++operand iRegL_R28() ++%{ ++ constraint(ALLOC_IN_RC(r28_reg)); ++ match(RegL); ++ match(iRegLNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + ++// Long 64 bit Register R29 only ++operand iRegL_R29() ++%{ ++ constraint(ALLOC_IN_RC(r29_reg)); ++ match(RegL); ++ match(iRegLNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + ++// Long 64 bit Register R30 only ++operand iRegL_R30() ++%{ ++ constraint(ALLOC_IN_RC(r30_reg)); ++ match(RegL); ++ match(iRegLNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+//----------ENCODING BLOCK----------------------------------------------------- -+// This block specifies the encoding classes used by the compiler to -+// output byte streams. Encoding classes are parameterized macros -+// used by Machine Instruction Nodes in order to generate the bit -+// encoding of the instruction. Operands specify their base encoding -+// interface with the interface keyword. There are currently -+// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, & -+// COND_INTER. REG_INTER causes an operand to generate a function -+// which returns its register number when queried. CONST_INTER causes -+// an operand to generate a function which returns the value of the -+// constant when queried. MEMORY_INTER causes an operand to generate -+// four functions which return the Base Register, the Index Register, -+// the Scale Value, and the Offset Value of the operand when queried. -+// COND_INTER causes an operand to generate six functions which return -+// the encoding code (ie - encoding bits for the instruction) -+// associated with each basic boolean condition for a conditional -+// instruction. -+// -+// Instructions specify two basic values for encoding. Again, a -+// function is available to check if the constant displacement is an -+// oop. They use the ins_encode keyword to specify their encoding -+// classes (which must be a sequence of enc_class names, and their -+// parameters, specified in the encoding block), and they use the -+// opcode keyword to specify, in order, their primary, secondary, and -+// tertiary opcode. Only the opcode sections which a particular -+// instruction needs for encoding need to be specified. -+encode %{ -+ // BEGIN Non-volatile memory access ++// Pointer Register Operands ++// Pointer Register ++operand iRegP() ++%{ ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(RegP); ++ match(iRegPNoSp); ++ match(iRegP_R10); ++ match(javaThread_RegP); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ enc_class riscv_enc_li_imm(iRegIorL dst, immIorL src) %{ -+ C2_MacroAssembler _masm(&cbuf); -+ Assembler::CompressibleRegion cr(&_masm); -+ int64_t con = (int64_t)$src$$constant; -+ Register dst_reg = as_Register($dst$$reg); -+ __ li(dst_reg, con); -+ %} ++// Pointer 64 bit Register not Special ++operand iRegPNoSp() ++%{ ++ constraint(ALLOC_IN_RC(no_special_ptr_reg)); ++ match(RegP); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ enc_class riscv_enc_mov_p(iRegP dst, immP src) %{ -+ C2_MacroAssembler _masm(&cbuf); -+ Register dst_reg = as_Register($dst$$reg); -+ address con = (address)$src$$constant; -+ if (con == NULL || con == (address)1) { -+ ShouldNotReachHere(); -+ } else { -+ relocInfo::relocType rtype = $src->constant_reloc(); -+ if (rtype == relocInfo::oop_type) { -+ __ movoop(dst_reg, (jobject)con, /*immediate*/true); -+ } else if (rtype == relocInfo::metadata_type) { -+ __ mov_metadata(dst_reg, (Metadata*)con); -+ } else { -+ assert(rtype == relocInfo::none, "unexpected reloc type"); -+ __ li(dst_reg, $src$$constant); -+ } -+ } -+ %} ++operand iRegP_R10() ++%{ ++ constraint(ALLOC_IN_RC(r10_reg)); ++ match(RegP); ++ // match(iRegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ enc_class riscv_enc_mov_p1(iRegP dst) %{ -+ C2_MacroAssembler _masm(&cbuf); -+ Assembler::CompressibleRegion cr(&_masm); -+ Register dst_reg = as_Register($dst$$reg); -+ __ li(dst_reg, 1); -+ %} ++// Pointer 64 bit Register R11 only ++operand iRegP_R11() ++%{ ++ constraint(ALLOC_IN_RC(r11_reg)); ++ match(RegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{ -+ C2_MacroAssembler _masm(&cbuf); -+ __ load_byte_map_base($dst$$Register); -+ %} ++operand iRegP_R12() ++%{ ++ constraint(ALLOC_IN_RC(r12_reg)); ++ match(RegP); ++ // match(iRegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ enc_class riscv_enc_mov_n(iRegN dst, immN src) %{ -+ C2_MacroAssembler _masm(&cbuf); -+ Register dst_reg = as_Register($dst$$reg); -+ address con = (address)$src$$constant; -+ if (con == NULL) { -+ ShouldNotReachHere(); -+ } else { -+ relocInfo::relocType rtype = $src->constant_reloc(); -+ assert(rtype == relocInfo::oop_type, "unexpected reloc type"); -+ __ set_narrow_oop(dst_reg, (jobject)con); -+ } -+ %} ++// Pointer 64 bit Register R13 only ++operand iRegP_R13() ++%{ ++ constraint(ALLOC_IN_RC(r13_reg)); ++ match(RegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ enc_class riscv_enc_mov_zero(iRegNorP dst) %{ -+ C2_MacroAssembler _masm(&cbuf); -+ Register dst_reg = as_Register($dst$$reg); -+ __ mv(dst_reg, zr); -+ %} ++operand iRegP_R14() ++%{ ++ constraint(ALLOC_IN_RC(r14_reg)); ++ match(RegP); ++ // match(iRegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ enc_class riscv_enc_mov_nk(iRegN dst, immNKlass src) %{ -+ C2_MacroAssembler _masm(&cbuf); -+ Register dst_reg = as_Register($dst$$reg); -+ address con = (address)$src$$constant; -+ if (con == NULL) { -+ ShouldNotReachHere(); -+ } else { -+ relocInfo::relocType rtype = $src->constant_reloc(); -+ assert(rtype == relocInfo::metadata_type, "unexpected reloc type"); -+ __ set_narrow_klass(dst_reg, (Klass *)con); -+ } ++operand iRegP_R15() ++%{ ++ constraint(ALLOC_IN_RC(r15_reg)); ++ match(RegP); ++ // match(iRegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand iRegP_R16() ++%{ ++ constraint(ALLOC_IN_RC(r16_reg)); ++ match(RegP); ++ // match(iRegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Pointer 64 bit Register R28 only ++operand iRegP_R28() ++%{ ++ constraint(ALLOC_IN_RC(r28_reg)); ++ match(RegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Pointer Register Operands ++// Narrow Pointer Register ++operand iRegN() ++%{ ++ constraint(ALLOC_IN_RC(any_reg32)); ++ match(RegN); ++ match(iRegNNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Integer 64 bit Register not Special ++operand iRegNNoSp() ++%{ ++ constraint(ALLOC_IN_RC(no_special_reg32)); ++ match(RegN); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// heap base register -- used for encoding immN0 ++operand iRegIHeapbase() ++%{ ++ constraint(ALLOC_IN_RC(heapbase_reg)); ++ match(RegI); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Long 64 bit Register R10 only ++operand iRegL_R10() ++%{ ++ constraint(ALLOC_IN_RC(r10_reg)); ++ match(RegL); ++ match(iRegLNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Float Register ++// Float register operands ++operand fRegF() ++%{ ++ constraint(ALLOC_IN_RC(float_reg)); ++ match(RegF); ++ ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Double Register ++// Double register operands ++operand fRegD() ++%{ ++ constraint(ALLOC_IN_RC(double_reg)); ++ match(RegD); ++ ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Java Thread Register ++operand javaThread_RegP(iRegP reg) ++%{ ++ constraint(ALLOC_IN_RC(java_thread_reg)); // java_thread_reg ++ match(reg); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++//----------Memory Operands---------------------------------------------------- ++// RISCV has only base_plus_offset and literal address mode, so no need to use ++// index and scale. Here set index as 0xffffffff and scale as 0x0. ++operand indirect(iRegP reg) ++%{ ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(reg); ++ op_cost(0); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp(0x0); + %} ++%} + -+ enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ -+ C2_MacroAssembler _masm(&cbuf); -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, -+ /*result as bool*/ true); ++operand indOffI(iRegP reg, immIOffset off) ++%{ ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP reg off); ++ op_cost(0); ++ format %{ "[$reg, $off]" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp($off); + %} ++%} + -+ enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ -+ C2_MacroAssembler _masm(&cbuf); -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, -+ /*result as bool*/ true); ++operand indOffL(iRegP reg, immLOffset off) ++%{ ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP reg off); ++ op_cost(0); ++ format %{ "[$reg, $off]" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp($off); + %} ++%} + -+ enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ -+ C2_MacroAssembler _masm(&cbuf); -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, -+ /*result as bool*/ true); ++operand indirectN(iRegN reg) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(DecodeN reg); ++ op_cost(0); ++ format %{ "[$reg]\t# narrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp(0x0); + %} ++%} + -+ enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ -+ C2_MacroAssembler _masm(&cbuf); -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, -+ /*result as bool*/ true); ++operand indOffIN(iRegN reg, immIOffset off) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP (DecodeN reg) off); ++ op_cost(0); ++ format %{ "[$reg, $off]\t# narrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp($off); + %} ++%} + -+ enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ -+ C2_MacroAssembler _masm(&cbuf); -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, -+ /*result as bool*/ true); ++operand indOffLN(iRegN reg, immLOffset off) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP (DecodeN reg) off); ++ op_cost(0); ++ format %{ "[$reg, $off]\t# narrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp($off); + %} ++%} + -+ enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ -+ C2_MacroAssembler _masm(&cbuf); -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, -+ /*result as bool*/ true); ++// RISCV opto stubs need to write to the pc slot in the thread anchor ++operand thread_anchor_pc(javaThread_RegP reg, immL_pc_off off) ++%{ ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP reg off); ++ op_cost(0); ++ format %{ "[$reg, $off]" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp($off); + %} ++%} + -+ // compare and branch instruction encodings + -+ enc_class riscv_enc_j(label lbl) %{ -+ C2_MacroAssembler _masm(&cbuf); -+ Label* L = $lbl$$label; -+ __ j(*L); ++//----------Special Memory Operands-------------------------------------------- ++// Stack Slot Operand - This operand is used for loading and storing temporary ++// values on the stack where a match requires a value to ++// flow through memory. ++operand stackSlotI(sRegI reg) ++%{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ // match(RegI); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x02); // RSP ++ index(0xffffffff); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset + %} ++%} + -+ enc_class riscv_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{ -+ C2_MacroAssembler _masm(&cbuf); -+ Label* L = $lbl$$label; -+ switch ($cmp$$cmpcode) { -+ case(BoolTest::ge): -+ __ j(*L); -+ break; -+ case(BoolTest::lt): -+ break; -+ default: -+ Unimplemented(); -+ } ++operand stackSlotF(sRegF reg) ++%{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ // match(RegF); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x02); // RSP ++ index(0xffffffff); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset + %} ++%} + -+ // call instruction encodings -+ -+ enc_class riscv_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result) %{ -+ Register sub_reg = as_Register($sub$$reg); -+ Register super_reg = as_Register($super$$reg); -+ Register temp_reg = as_Register($temp$$reg); -+ Register result_reg = as_Register($result$$reg); -+ Register cr_reg = t1; ++operand stackSlotD(sRegD reg) ++%{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ // match(RegD); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x02); // RSP ++ index(0xffffffff); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} + -+ Label miss; -+ Label done; -+ C2_MacroAssembler _masm(&cbuf); -+ __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg, -+ NULL, &miss); -+ if ($primary) { -+ __ mv(result_reg, zr); -+ } else { -+ __ mv(cr_reg, zr); -+ __ j(done); -+ } ++operand stackSlotL(sRegL reg) ++%{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ // match(RegL); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x02); // RSP ++ index(0xffffffff); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} + -+ __ bind(miss); -+ if (!$primary) { -+ __ li(cr_reg, 1); -+ } ++// Special operand allowing long args to int ops to be truncated for free + -+ __ bind(done); -+ %} ++operand iRegL2I(iRegL reg) %{ + -+ enc_class riscv_enc_java_static_call(method meth) %{ -+ C2_MacroAssembler _masm(&cbuf); ++ op_cost(0); + -+ address addr = (address)$meth$$method; -+ address call = NULL; -+ assert_cond(addr != NULL); -+ if (!_method) { -+ // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. -+ call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf); -+ if (call == NULL) { -+ ciEnv::current()->record_failure("CodeCache is full"); -+ return; -+ } -+ } else { -+ int method_index = resolved_method_index(cbuf); -+ RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) -+ : static_call_Relocation::spec(method_index); -+ call = __ trampoline_call(Address(addr, rspec), &cbuf); -+ if (call == NULL) { -+ ciEnv::current()->record_failure("CodeCache is full"); -+ return; -+ } ++ match(ConvL2I reg); + -+ // Emit stub for static call -+ address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); -+ if (stub == NULL) { -+ ciEnv::current()->record_failure("CodeCache is full"); -+ return; -+ } -+ } -+ %} ++ format %{ "l2i($reg)" %} + -+ enc_class riscv_enc_java_dynamic_call(method meth) %{ -+ C2_MacroAssembler _masm(&cbuf); -+ int method_index = resolved_method_index(cbuf); -+ address call = __ ic_call((address)$meth$$method, method_index); -+ if (call == NULL) { -+ ciEnv::current()->record_failure("CodeCache is full"); -+ return; -+ } -+ %} ++ interface(REG_INTER) ++%} + -+ enc_class riscv_enc_call_epilog() %{ -+ C2_MacroAssembler _masm(&cbuf); -+ if (VerifyStackAtCalls) { -+ // Check that stack depth is unchanged: find majik cookie on stack -+ __ call_Unimplemented(); -+ } -+ %} + -+ enc_class riscv_enc_java_to_runtime(method meth) %{ -+ C2_MacroAssembler _masm(&cbuf); ++// Comparison Operands ++// NOTE: Label is a predefined operand which should not be redefined in ++// the AD file. It is generically handled within the ADLC. + -+ // some calls to generated routines (arraycopy code) are scheduled -+ // by C2 as runtime calls. if so we can call them using a jr (they -+ // will be in a reachable segment) otherwise we have to use a jalr -+ // which loads the absolute address into a register. -+ address entry = (address)$meth$$method; -+ CodeBlob *cb = CodeCache::find_blob(entry); -+ if (cb != NULL) { -+ address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type)); -+ if (call == NULL) { -+ ciEnv::current()->record_failure("CodeCache is full"); -+ return; -+ } -+ } else { -+ Label retaddr; -+ __ la(t1, retaddr); -+ __ la(t0, RuntimeAddress(entry)); -+ // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc() -+ __ addi(sp, sp, -2 * wordSize); -+ __ sd(t1, Address(sp, wordSize)); -+ __ jalr(t0); -+ __ bind(retaddr); -+ __ addi(sp, sp, 2 * wordSize); -+ } -+ %} -+ -+ // using the cr register as the bool result: 0 for success; others failed. -+ enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{ -+ C2_MacroAssembler _masm(&cbuf); -+ Register flag = t1; -+ Register oop = as_Register($object$$reg); -+ Register box = as_Register($box$$reg); -+ Register disp_hdr = as_Register($tmp1$$reg); -+ Register tmp = as_Register($tmp2$$reg); -+ Label cont; -+ Label object_has_monitor; -+ -+ assert_different_registers(oop, box, tmp, disp_hdr, t0); -+ -+ // Load markWord from object into displaced_header. -+ __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes())); -+ -+ if (DiagnoseSyncOnValueBasedClasses != 0) { -+ __ load_klass(flag, oop); -+ __ lwu(flag, Address(flag, Klass::access_flags_offset())); -+ __ andi(flag, flag, JVM_ACC_IS_VALUE_BASED_CLASS, tmp /* tmp */); -+ __ bnez(flag, cont, true /* is_far */); -+ } -+ -+ // Check for existing monitor -+ __ andi(t0, disp_hdr, markWord::monitor_value); -+ __ bnez(t0, object_has_monitor); -+ -+ if (!UseHeavyMonitors) { -+ // Set tmp to be (markWord of object | UNLOCK_VALUE). -+ __ ori(tmp, disp_hdr, markWord::unlocked_value); -+ -+ // Initialize the box. (Must happen before we update the object mark!) -+ __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); -+ -+ // Compare object markWord with an unlocked value (tmp) and if -+ // equal exchange the stack address of our box with object markWord. -+ // On failure disp_hdr contains the possibly locked markWord. -+ __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq, -+ Assembler::rl, /*result*/disp_hdr); -+ __ mv(flag, zr); -+ __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas -+ -+ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); -+ -+ // If the compare-and-exchange succeeded, then we found an unlocked -+ // object, will have now locked it will continue at label cont -+ // We did not see an unlocked object so try the fast recursive case. -+ -+ // Check if the owner is self by comparing the value in the -+ // markWord of object (disp_hdr) with the stack pointer. -+ __ sub(disp_hdr, disp_hdr, sp); -+ __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markWord::lock_mask_in_place)); -+ // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont, -+ // hence we can store 0 as the displaced header in the box, which indicates that it is a -+ // recursive lock. -+ __ andr(tmp/*==0?*/, disp_hdr, tmp); -+ __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); -+ __ mv(flag, tmp); // we can use the value of tmp as the result here -+ } else { -+ __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow-path -+ } -+ -+ __ j(cont); -+ -+ // Handle existing monitor. -+ __ bind(object_has_monitor); -+ // The object's monitor m is unlocked iff m->owner == NULL, -+ // otherwise m->owner may contain a thread or a stack address. -+ // -+ // Try to CAS m->owner from NULL to current thread. -+ __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markWord::monitor_value)); -+ __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq, -+ Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected) -+ -+ // Store a non-null value into the box to avoid looking like a re-entrant -+ // lock. The fast-path monitor unlock code checks for -+ // markWord::monitor_value so use markWord::unused_mark which has the -+ // relevant bit set, and also matches ObjectSynchronizer::slow_enter. -+ __ mv(tmp, (address)markWord::unused_mark().value()); -+ __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); ++//----------Conditional Branch Operands---------------------------------------- ++// Comparison Op - This is the operation of the comparison, and is limited to ++// the following set of codes: ++// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) ++// ++// Other attributes of the comparison, such as unsignedness, are specified ++// by the comparison instruction that sets a condition code flags register. ++// That result is represented by a flags operand whose subtype is appropriate ++// to the unsignedness (etc.) of the comparison. ++// ++// Later, the instruction which matches both the Comparison Op (a Bool) and ++// the flags (produced by the Cmp) specifies the coding of the comparison op ++// by matching a specific subtype of Bool operand below, such as cmpOpU. + -+ __ beqz(flag, cont); // CAS success means locking succeeded + -+ __ bne(flag, xthread, cont); // Check for recursive locking ++// used for signed integral comparisons and fp comparisons ++operand cmpOp() ++%{ ++ match(Bool); + -+ // Recursive lock case -+ __ mv(flag, zr); -+ __ ld(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value)); -+ __ add(tmp, tmp, 1u); -+ __ sd(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value)); ++ format %{ "" %} + -+ __ bind(cont); ++ // the values in interface derives from struct BoolTest::mask ++ interface(COND_INTER) %{ ++ equal(0x0, "eq"); ++ greater(0x1, "gt"); ++ overflow(0x2, "overflow"); ++ less(0x3, "lt"); ++ not_equal(0x4, "ne"); ++ less_equal(0x5, "le"); ++ no_overflow(0x6, "no_overflow"); ++ greater_equal(0x7, "ge"); + %} ++%} + -+ // using cr flag to indicate the fast_unlock result: 0 for success; others failed. -+ enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{ -+ C2_MacroAssembler _masm(&cbuf); -+ Register flag = t1; -+ Register oop = as_Register($object$$reg); -+ Register box = as_Register($box$$reg); -+ Register disp_hdr = as_Register($tmp1$$reg); -+ Register tmp = as_Register($tmp2$$reg); -+ Label cont; -+ Label object_has_monitor; -+ -+ assert_different_registers(oop, box, tmp, disp_hdr, flag); -+ -+ if (!UseHeavyMonitors) { -+ // Find the lock address and load the displaced header from the stack. -+ __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); -+ -+ // If the displaced header is 0, we have a recursive unlock. -+ __ mv(flag, disp_hdr); -+ __ beqz(disp_hdr, cont); -+ } -+ -+ // Handle existing monitor. -+ __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes())); -+ __ andi(t0, disp_hdr, markWord::monitor_value); -+ __ bnez(t0, object_has_monitor); -+ -+ if (!UseHeavyMonitors) { -+ // Check if it is still a light weight lock, this is true if we -+ // see the stack address of the basicLock in the markWord of the -+ // object. -+ -+ __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed, -+ Assembler::rl, /*result*/tmp); -+ __ xorr(flag, box, tmp); // box == tmp if cas succeeds -+ } else { -+ __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow path -+ } -+ __ j(cont); -+ -+ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); -+ -+ // Handle existing monitor. -+ __ bind(object_has_monitor); -+ STATIC_ASSERT(markWord::monitor_value <= INT_MAX); -+ __ add(tmp, tmp, -(int)markWord::monitor_value); // monitor -+ __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); -+ -+ Label notRecursive; -+ __ beqz(disp_hdr, notRecursive); // Will be 0 if not recursive. -+ -+ // Recursive lock -+ __ addi(disp_hdr, disp_hdr, -1); -+ __ sd(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); -+ __ mv(flag, zr); -+ __ j(cont); -+ -+ __ bind(notRecursive); -+ __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes())); -+ __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes())); -+ __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0. -+ __ bnez(flag, cont); -+ // need a release store here -+ __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); -+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); -+ __ sd(zr, Address(tmp)); // set unowned ++// used for unsigned integral comparisons ++operand cmpOpU() ++%{ ++ match(Bool); + -+ __ bind(cont); ++ format %{ "" %} ++ // the values in interface derives from struct BoolTest::mask ++ interface(COND_INTER) %{ ++ equal(0x0, "eq"); ++ greater(0x1, "gtu"); ++ overflow(0x2, "overflow"); ++ less(0x3, "ltu"); ++ not_equal(0x4, "ne"); ++ less_equal(0x5, "leu"); ++ no_overflow(0x6, "no_overflow"); ++ greater_equal(0x7, "geu"); + %} ++%} + -+ // arithmetic encodings -+ -+ enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{ -+ C2_MacroAssembler _masm(&cbuf); -+ Register dst_reg = as_Register($dst$$reg); -+ Register src1_reg = as_Register($src1$$reg); -+ Register src2_reg = as_Register($src2$$reg); -+ __ corrected_idivl(dst_reg, src1_reg, src2_reg, false); -+ %} ++// used for certain integral comparisons which can be ++// converted to bxx instructions ++operand cmpOpEqNe() ++%{ ++ match(Bool); ++ op_cost(0); ++ predicate(n->as_Bool()->_test._test == BoolTest::ne || ++ n->as_Bool()->_test._test == BoolTest::eq); + -+ enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{ -+ C2_MacroAssembler _masm(&cbuf); -+ Register dst_reg = as_Register($dst$$reg); -+ Register src1_reg = as_Register($src1$$reg); -+ Register src2_reg = as_Register($src2$$reg); -+ __ corrected_idivq(dst_reg, src1_reg, src2_reg, false); ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x0, "eq"); ++ greater(0x1, "gt"); ++ overflow(0x2, "overflow"); ++ less(0x3, "lt"); ++ not_equal(0x4, "ne"); ++ less_equal(0x5, "le"); ++ no_overflow(0x6, "no_overflow"); ++ greater_equal(0x7, "ge"); + %} ++%} + -+ enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{ -+ C2_MacroAssembler _masm(&cbuf); -+ Register dst_reg = as_Register($dst$$reg); -+ Register src1_reg = as_Register($src1$$reg); -+ Register src2_reg = as_Register($src2$$reg); -+ __ corrected_idivl(dst_reg, src1_reg, src2_reg, true); -+ %} ++operand cmpOpULtGe() ++%{ ++ match(Bool); ++ op_cost(0); ++ predicate(n->as_Bool()->_test._test == BoolTest::lt || ++ n->as_Bool()->_test._test == BoolTest::ge); + -+ enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{ -+ C2_MacroAssembler _masm(&cbuf); -+ Register dst_reg = as_Register($dst$$reg); -+ Register src1_reg = as_Register($src1$$reg); -+ Register src2_reg = as_Register($src2$$reg); -+ __ corrected_idivq(dst_reg, src1_reg, src2_reg, true); ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x0, "eq"); ++ greater(0x1, "gtu"); ++ overflow(0x2, "overflow"); ++ less(0x3, "ltu"); ++ not_equal(0x4, "ne"); ++ less_equal(0x5, "leu"); ++ no_overflow(0x6, "no_overflow"); ++ greater_equal(0x7, "geu"); + %} ++%} + -+ enc_class riscv_enc_tail_call(iRegP jump_target) %{ -+ C2_MacroAssembler _masm(&cbuf); -+ Assembler::CompressibleRegion cr(&_masm); -+ Register target_reg = as_Register($jump_target$$reg); -+ __ jr(target_reg); -+ %} ++operand cmpOpUEqNeLeGt() ++%{ ++ match(Bool); ++ op_cost(0); ++ predicate(n->as_Bool()->_test._test == BoolTest::ne || ++ n->as_Bool()->_test._test == BoolTest::eq || ++ n->as_Bool()->_test._test == BoolTest::le || ++ n->as_Bool()->_test._test == BoolTest::gt); + -+ enc_class riscv_enc_tail_jmp(iRegP jump_target) %{ -+ C2_MacroAssembler _masm(&cbuf); -+ Assembler::CompressibleRegion cr(&_masm); -+ Register target_reg = as_Register($jump_target$$reg); -+ // exception oop should be in x10 -+ // ret addr has been popped into ra -+ // callee expects it in x13 -+ __ mv(x13, ra); -+ __ jr(target_reg); ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x0, "eq"); ++ greater(0x1, "gtu"); ++ overflow(0x2, "overflow"); ++ less(0x3, "ltu"); ++ not_equal(0x4, "ne"); ++ less_equal(0x5, "leu"); ++ no_overflow(0x6, "no_overflow"); ++ greater_equal(0x7, "geu"); + %} ++%} + -+ enc_class riscv_enc_rethrow() %{ -+ C2_MacroAssembler _masm(&cbuf); -+ __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub())); -+ %} + -+ enc_class riscv_enc_ret() %{ -+ C2_MacroAssembler _masm(&cbuf); -+ Assembler::CompressibleRegion cr(&_masm); -+ __ ret(); -+ %} ++// Flags register, used as output of compare logic ++operand rFlagsReg() ++%{ ++ constraint(ALLOC_IN_RC(reg_flags)); ++ match(RegFlags); + ++ op_cost(0); ++ format %{ "RFLAGS" %} ++ interface(REG_INTER); +%} + -+//----------FRAME-------------------------------------------------------------- -+// Definition of frame structure and management information. -+// -+// S T A C K L A Y O U T Allocators stack-slot number -+// | (to get allocators register number -+// G Owned by | | v add OptoReg::stack0()) -+// r CALLER | | -+// o | +--------+ pad to even-align allocators stack-slot -+// w V | pad0 | numbers; owned by CALLER -+// t -----------+--------+----> Matcher::_in_arg_limit, unaligned -+// h ^ | in | 5 -+// | | args | 4 Holes in incoming args owned by SELF -+// | | | | 3 -+// | | +--------+ -+// V | | old out| Empty on Intel, window on Sparc -+// | old |preserve| Must be even aligned. -+// | SP-+--------+----> Matcher::_old_SP, even aligned -+// | | in | 3 area for Intel ret address -+// Owned by |preserve| Empty on Sparc. -+// SELF +--------+ -+// | | pad2 | 2 pad to align old SP -+// | +--------+ 1 -+// | | locks | 0 -+// | +--------+----> OptoReg::stack0(), even aligned -+// | | pad1 | 11 pad to align new SP -+// | +--------+ -+// | | | 10 -+// | | spills | 9 spills -+// V | | 8 (pad0 slot for callee) -+// -----------+--------+----> Matcher::_out_arg_limit, unaligned -+// ^ | out | 7 -+// | | args | 6 Holes in outgoing args owned by CALLEE -+// Owned by +--------+ -+// CALLEE | new out| 6 Empty on Intel, window on Sparc -+// | new |preserve| Must be even-aligned. -+// | SP-+--------+----> Matcher::_new_SP, even aligned -+// | | | -+// -+// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is -+// known from SELF's arguments and the Java calling convention. -+// Region 6-7 is determined per call site. -+// Note 2: If the calling convention leaves holes in the incoming argument -+// area, those holes are owned by SELF. Holes in the outgoing area -+// are owned by the CALLEE. Holes should not be nessecary in the -+// incoming area, as the Java calling convention is completely under -+// the control of the AD file. Doubles can be sorted and packed to -+// avoid holes. Holes in the outgoing arguments may be nessecary for -+// varargs C calling conventions. -+// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is -+// even aligned with pad0 as needed. -+// Region 6 is even aligned. Region 6-7 is NOT even aligned; -+// (the latter is true on Intel but is it false on RISCV?) -+// region 6-11 is even aligned; it may be padded out more so that -+// the region from SP to FP meets the minimum stack alignment. -+// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack -+// alignment. Region 11, pad1, may be dynamically extended so that -+// SP meets the minimum alignment. ++// Special Registers + -+frame %{ -+ // These three registers define part of the calling convention -+ // between compiled code and the interpreter. ++// Method Register ++operand inline_cache_RegP(iRegP reg) ++%{ ++ constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg ++ match(reg); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ // Inline Cache Register or methodOop for I2C. -+ inline_cache_reg(R31); ++//----------OPERAND CLASSES---------------------------------------------------- ++// Operand Classes are groups of operands that are used as to simplify ++// instruction definitions by not requiring the AD writer to specify ++// separate instructions for every form of operand when the ++// instruction accepts multiple operand types with the same basic ++// encoding and format. The classic case of this is memory operands. + -+ // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] -+ cisc_spilling_operand_name(indOffset); ++// memory is used to define read/write location for load/store ++// instruction defs. we can turn a memory op into an Address + -+ // Number of stack slots consumed by locking an object -+ // generate Compile::sync_stack_slots -+ // VMRegImpl::slots_per_word = wordSize / stack_slot_size = 8 / 4 = 2 -+ sync_stack_slots(1 * VMRegImpl::slots_per_word); ++opclass memory(indirect, indOffI, indOffL, indirectN, indOffIN, indOffLN); + -+ // Compiled code's Frame Pointer -+ frame_pointer(R2); ++// iRegIorL2I is used for src inputs in rules for 32 bit int (I) ++// operations. it allows the src to be either an iRegI or a (ConvL2I ++// iRegL). in the latter case the l2i normally planted for a ConvL2I ++// can be elided because the 32-bit instruction will just employ the ++// lower 32 bits anyway. ++// ++// n.b. this does not elide all L2I conversions. if the truncated ++// value is consumed by more than one operation then the ConvL2I ++// cannot be bundled into the consuming nodes so an l2i gets planted ++// (actually a mvw $dst $src) and the downstream instructions consume ++// the result of the l2i as an iRegI input. That's a shame since the ++// mvw is actually redundant but its not too costly. + -+ // Interpreter stores its frame pointer in a register which is -+ // stored to the stack by I2CAdaptors. -+ // I2CAdaptors convert from interpreted java to compiled java. -+ interpreter_frame_pointer(R8); ++opclass iRegIorL2I(iRegI, iRegL2I); ++opclass iRegIorL(iRegI, iRegL); ++opclass iRegNorP(iRegN, iRegP); ++opclass iRegILNP(iRegI, iRegL, iRegN, iRegP); ++opclass iRegILNPNoSp(iRegINoSp, iRegLNoSp, iRegNNoSp, iRegPNoSp); ++opclass immIorL(immI, immL); + -+ // Stack alignment requirement -+ stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes) ++//----------PIPELINE----------------------------------------------------------- ++// Rules which define the behavior of the target architectures pipeline. + -+ // Number of outgoing stack slots killed above the out_preserve_stack_slots -+ // for calls to C. Supports the var-args backing area for register parms. -+ varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes / BytesPerInt); ++// For specific pipelines, e.g. generic RISC-V, define the stages of that pipeline ++//pipe_desc(ID, EX, MEM, WR); ++#define ID S0 ++#define EX S1 ++#define MEM S2 ++#define WR S3 + -+ // The after-PROLOG location of the return address. Location of -+ // return address specifies a type (REG or STACK) and a number -+ // representing the register number (i.e. - use a register name) or -+ // stack slot. -+ // Ret Addr is on stack in slot 0 if no locks or verification or alignment. -+ // Otherwise, it is above the locks and verification slot and alignment word -+ // TODO this may well be correct but need to check why that - 2 is there -+ // ppc port uses 0 but we definitely need to allow for fixed_slots -+ // which folds in the space used for monitors -+ return_addr(STACK - 2 + -+ align_up((Compile::current()->in_preserve_stack_slots() + -+ Compile::current()->fixed_slots()), -+ stack_alignment_in_slots())); ++// Integer ALU reg operation ++pipeline %{ + -+ // Location of compiled Java return values. Same as C for now. -+ return_value -+ %{ -+ assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, -+ "only return normal values"); ++attributes %{ ++ // RISC-V instructions are of fixed length ++ fixed_size_instructions; // Fixed size instructions TODO does ++ max_instructions_per_bundle = 2; // Generic RISC-V 1, Sifive Series 7 2 ++ // RISC-V instructions come in 32-bit word units ++ instruction_unit_size = 4; // An instruction is 4 bytes long ++ instruction_fetch_unit_size = 64; // The processor fetches one line ++ instruction_fetch_units = 1; // of 64 bytes + -+ static const int lo[Op_RegL + 1] = { // enum name -+ 0, // Op_Node -+ 0, // Op_Set -+ R10_num, // Op_RegN -+ R10_num, // Op_RegI -+ R10_num, // Op_RegP -+ F10_num, // Op_RegF -+ F10_num, // Op_RegD -+ R10_num // Op_RegL -+ }; ++ // List of nop instructions ++ nops( MachNop ); ++%} + -+ static const int hi[Op_RegL + 1] = { // enum name -+ 0, // Op_Node -+ 0, // Op_Set -+ OptoReg::Bad, // Op_RegN -+ OptoReg::Bad, // Op_RegI -+ R10_H_num, // Op_RegP -+ OptoReg::Bad, // Op_RegF -+ F10_H_num, // Op_RegD -+ R10_H_num // Op_RegL -+ }; ++// We don't use an actual pipeline model so don't care about resources ++// or description. we do use pipeline classes to introduce fixed ++// latencies + -+ return OptoRegPair(hi[ideal_reg], lo[ideal_reg]); -+ %} -+%} ++//----------RESOURCES---------------------------------------------------------- ++// Resources are the functional units available to the machine + -+//----------ATTRIBUTES--------------------------------------------------------- -+//----------Operand Attributes------------------------------------------------- -+op_attrib op_cost(1); // Required cost attribute ++// Generic RISC-V pipeline ++// 1 decoder ++// 1 instruction decoded per cycle ++// 1 load/store ops per cycle, 1 branch, 1 FPU ++// 1 mul, 1 div + -+//----------Instruction Attributes--------------------------------------------- -+ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute -+ins_attrib ins_size(32); // Required size attribute (in bits) -+ins_attrib ins_short_branch(0); // Required flag: is this instruction -+ // a non-matching short branch variant -+ // of some long branch? -+ins_attrib ins_alignment(4); // Required alignment attribute (must -+ // be a power of 2) specifies the -+ // alignment that some part of the -+ // instruction (not necessarily the -+ // start) requires. If > 1, a -+ // compute_padding() function must be -+ // provided for the instruction ++resources ( DECODE, ++ ALU, ++ MUL, ++ DIV, ++ BRANCH, ++ LDST, ++ FPU); + -+//----------OPERANDS----------------------------------------------------------- -+// Operand definitions must precede instruction definitions for correct parsing -+// in the ADLC because operands constitute user defined types which are used in -+// instruction definitions. ++//----------PIPELINE DESCRIPTION----------------------------------------------- ++// Pipeline Description specifies the stages in the machine's pipeline + -+//----------Simple Operands---------------------------------------------------- ++// Define the pipeline as a generic 6 stage pipeline ++pipe_desc(S0, S1, S2, S3, S4, S5); + -+// Integer operands 32 bit -+// 32 bit immediate -+operand immI() -+%{ -+ match(ConI); ++//----------PIPELINE CLASSES--------------------------------------------------- ++// Pipeline Classes describe the stages in which input and output are ++// referenced by the hardware pipeline. + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++pipe_class fp_dop_reg_reg_s(fRegF dst, fRegF src1, fRegF src2) ++%{ ++ single_instruction; ++ src1 : S1(read); ++ src2 : S2(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+// 32 bit zero -+operand immI0() ++pipe_class fp_dop_reg_reg_d(fRegD dst, fRegD src1, fRegD src2) +%{ -+ predicate(n->get_int() == 0); -+ match(ConI); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ src1 : S1(read); ++ src2 : S2(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+// 32 bit unit increment -+operand immI_1() ++pipe_class fp_uop_s(fRegF dst, fRegF src) +%{ -+ predicate(n->get_int() == 1); -+ match(ConI); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+// 32 bit unit decrement -+operand immI_M1() ++pipe_class fp_uop_d(fRegD dst, fRegD src) +%{ -+ predicate(n->get_int() == -1); -+ match(ConI); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+// Unsigned Integer Immediate: 6-bit int, greater than 32 -+operand uimmI6_ge32() %{ -+ predicate(((unsigned int)(n->get_int()) < 64) && (n->get_int() >= 32)); -+ match(ConI); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++pipe_class fp_d2f(fRegF dst, fRegD src) ++%{ ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+operand immI_le_4() ++pipe_class fp_f2d(fRegD dst, fRegF src) +%{ -+ predicate(n->get_int() <= 4); -+ match(ConI); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+operand immI_16() ++pipe_class fp_f2i(iRegINoSp dst, fRegF src) +%{ -+ predicate(n->get_int() == 16); -+ match(ConI); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+operand immI_24() ++pipe_class fp_f2l(iRegLNoSp dst, fRegF src) +%{ -+ predicate(n->get_int() == 24); -+ match(ConI); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+operand immI_31() ++pipe_class fp_i2f(fRegF dst, iRegIorL2I src) +%{ -+ predicate(n->get_int() == 31); -+ match(ConI); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+operand immI_63() ++pipe_class fp_l2f(fRegF dst, iRegL src) +%{ -+ predicate(n->get_int() == 63); -+ match(ConI); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+// 32 bit integer valid for add immediate -+operand immIAdd() ++pipe_class fp_d2i(iRegINoSp dst, fRegD src) +%{ -+ predicate(Assembler::operand_valid_for_add_immediate((int64_t)n->get_int())); -+ match(ConI); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+// 32 bit integer valid for sub immediate -+operand immISub() ++pipe_class fp_d2l(iRegLNoSp dst, fRegD src) +%{ -+ predicate(Assembler::operand_valid_for_add_immediate(-(int64_t)n->get_int())); -+ match(ConI); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+// 5 bit signed value. -+operand immI5() ++pipe_class fp_i2d(fRegD dst, iRegIorL2I src) +%{ -+ predicate(n->get_int() <= 15 && n->get_int() >= -16); -+ match(ConI); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+// 5 bit signed value (simm5) -+operand immL5() ++pipe_class fp_l2d(fRegD dst, iRegIorL2I src) +%{ -+ predicate(n->get_long() <= 15 && n->get_long() >= -16); -+ match(ConL); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+// Integer operands 64 bit -+// 64 bit immediate -+operand immL() ++pipe_class fp_div_s(fRegF dst, fRegF src1, fRegF src2) +%{ -+ match(ConL); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ src1 : S1(read); ++ src2 : S2(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+// 64 bit zero -+operand immL0() ++pipe_class fp_div_d(fRegD dst, fRegD src1, fRegD src2) +%{ -+ predicate(n->get_long() == 0); -+ match(ConL); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ src1 : S1(read); ++ src2 : S2(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+// Pointer operands -+// Pointer Immediate -+operand immP() ++pipe_class fp_sqrt_s(fRegF dst, fRegF src1, fRegF src2) +%{ -+ match(ConP); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ src1 : S1(read); ++ src2 : S2(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+// NULL Pointer Immediate -+operand immP0() ++pipe_class fp_sqrt_d(fRegD dst, fRegD src1, fRegD src2) +%{ -+ predicate(n->get_ptr() == 0); -+ match(ConP); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ src1 : S1(read); ++ src2 : S2(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+// Pointer Immediate One -+// this is used in object initialization (initial object header) -+operand immP_1() ++pipe_class fp_load_constant_s(fRegF dst) +%{ -+ predicate(n->get_ptr() == 1); -+ match(ConP); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+// Card Table Byte Map Base -+operand immByteMapBase() ++pipe_class fp_load_constant_d(fRegD dst) +%{ -+ // Get base of card map -+ predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) && -+ (CardTable::CardValue*)n->get_ptr() == -+ ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base()); -+ match(ConP); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+// Int Immediate: low 16-bit mask -+operand immI_16bits() ++pipe_class fp_load_mem_s(fRegF dst, memory mem) +%{ -+ predicate(n->get_int() == 0xFFFF); -+ match(ConI); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ mem : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ LDST : MEM; +%} + -+// Long Immediate: low 32-bit mask -+operand immL_32bits() ++pipe_class fp_load_mem_d(fRegD dst, memory mem) +%{ -+ predicate(n->get_long() == 0xFFFFFFFFL); -+ match(ConL); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ mem : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ LDST : MEM; +%} + -+// 64 bit unit decrement -+operand immL_M1() ++pipe_class fp_store_reg_s(fRegF src, memory mem) +%{ -+ predicate(n->get_long() == -1); -+ match(ConL); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ src : S1(read); ++ mem : S5(write); ++ DECODE : ID; ++ LDST : MEM; +%} + -+ -+// 32 bit offset of pc in thread anchor -+ -+operand immL_pc_off() ++pipe_class fp_store_reg_d(fRegD src, memory mem) +%{ -+ predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) + -+ in_bytes(JavaFrameAnchor::last_Java_pc_offset())); -+ match(ConL); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ src : S1(read); ++ mem : S5(write); ++ DECODE : ID; ++ LDST : MEM; +%} + -+// 64 bit integer valid for add immediate -+operand immLAdd() ++//------- Integer ALU operations -------------------------- ++ ++// Integer ALU reg-reg operation ++// Operands needs in ID, result generated in EX ++// E.g. ADD Rd, Rs1, Rs2 ++pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2) +%{ -+ predicate(Assembler::operand_valid_for_add_immediate(n->get_long())); -+ match(ConL); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ dst : EX(write); ++ src1 : ID(read); ++ src2 : ID(read); ++ DECODE : ID; ++ ALU : EX; +%} + -+// 64 bit integer valid for sub immediate -+operand immLSub() ++// Integer ALU reg operation with constant shift ++// E.g. SLLI Rd, Rs1, #shift ++pipe_class ialu_reg_shift(iRegI dst, iRegI src1) +%{ -+ predicate(Assembler::operand_valid_for_add_immediate(-(n->get_long()))); -+ match(ConL); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ dst : EX(write); ++ src1 : ID(read); ++ DECODE : ID; ++ ALU : EX; +%} + -+// Narrow pointer operands -+// Narrow Pointer Immediate -+operand immN() ++// Integer ALU reg-reg operation with variable shift ++// both operands must be available in ID ++// E.g. SLL Rd, Rs1, Rs2 ++pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2) +%{ -+ match(ConN); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ dst : EX(write); ++ src1 : ID(read); ++ src2 : ID(read); ++ DECODE : ID; ++ ALU : EX; +%} + -+// Narrow NULL Pointer Immediate -+operand immN0() ++// Integer ALU reg operation ++// E.g. NEG Rd, Rs2 ++pipe_class ialu_reg(iRegI dst, iRegI src) +%{ -+ predicate(n->get_narrowcon() == 0); -+ match(ConN); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ dst : EX(write); ++ src : ID(read); ++ DECODE : ID; ++ ALU : EX; +%} + -+operand immNKlass() ++// Integer ALU reg immediate operation ++// E.g. ADDI Rd, Rs1, #imm ++pipe_class ialu_reg_imm(iRegI dst, iRegI src1) +%{ -+ match(ConNKlass); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ dst : EX(write); ++ src1 : ID(read); ++ DECODE : ID; ++ ALU : EX; +%} + -+// Float and Double operands -+// Double Immediate -+operand immD() ++// Integer ALU immediate operation (no source operands) ++// E.g. LI Rd, #imm ++pipe_class ialu_imm(iRegI dst) +%{ -+ match(ConD); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ dst : EX(write); ++ DECODE : ID; ++ ALU : EX; +%} + -+// Double Immediate: +0.0d -+operand immD0() -+%{ -+ predicate(jlong_cast(n->getd()) == 0); -+ match(ConD); ++//------- Multiply pipeline operations -------------------- + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++// Multiply reg-reg ++// E.g. MULW Rd, Rs1, Rs2 ++pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2) ++%{ ++ single_instruction; ++ dst : WR(write); ++ src1 : ID(read); ++ src2 : ID(read); ++ DECODE : ID; ++ MUL : WR; +%} + -+// Float Immediate -+operand immF() ++// E.g. MUL RD, Rs1, Rs2 ++pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2) +%{ -+ match(ConF); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ fixed_latency(3); // Maximum latency for 64 bit mul ++ dst : WR(write); ++ src1 : ID(read); ++ src2 : ID(read); ++ DECODE : ID; ++ MUL : WR; +%} + -+// Float Immediate: +0.0f. -+operand immF0() -+%{ -+ predicate(jint_cast(n->getf()) == 0); -+ match(ConF); ++//------- Divide pipeline operations -------------------- + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++// E.g. DIVW Rd, Rs1, Rs2 ++pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2) ++%{ ++ single_instruction; ++ fixed_latency(8); // Maximum latency for 32 bit divide ++ dst : WR(write); ++ src1 : ID(read); ++ src2 : ID(read); ++ DECODE : ID; ++ DIV : WR; +%} + -+operand immIOffset() ++// E.g. DIV RD, Rs1, Rs2 ++pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2) +%{ -+ predicate(is_imm_in_range(n->get_int(), 12, 0)); -+ match(ConI); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ fixed_latency(16); // Maximum latency for 64 bit divide ++ dst : WR(write); ++ src1 : ID(read); ++ src2 : ID(read); ++ DECODE : ID; ++ DIV : WR; +%} + -+operand immLOffset() ++//------- Load pipeline operations ------------------------ ++ ++// Load - reg, mem ++// E.g. LA Rd, mem ++pipe_class iload_reg_mem(iRegI dst, memory mem) +%{ -+ predicate(is_imm_in_range(n->get_long(), 12, 0)); -+ match(ConL); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ dst : WR(write); ++ mem : ID(read); ++ DECODE : ID; ++ LDST : MEM; +%} + -+// Scale values -+operand immIScale() ++// Load - reg, reg ++// E.g. LD Rd, Rs ++pipe_class iload_reg_reg(iRegI dst, iRegI src) +%{ -+ predicate(1 <= n->get_int() && (n->get_int() <= 3)); -+ match(ConI); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); ++ single_instruction; ++ dst : WR(write); ++ src : ID(read); ++ DECODE : ID; ++ LDST : MEM; +%} + -+// Integer 32 bit Register Operands -+operand iRegI() ++//------- Control transfer pipeline operations ------------ ++ ++// Store - zr, mem ++// E.g. SD zr, mem ++pipe_class istore_mem(memory mem) +%{ -+ constraint(ALLOC_IN_RC(any_reg32)); -+ match(RegI); -+ match(iRegINoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); ++ single_instruction; ++ mem : ID(read); ++ DECODE : ID; ++ LDST : MEM; +%} + -+// Integer 32 bit Register not Special -+operand iRegINoSp() ++// Store - reg, mem ++// E.g. SD Rs, mem ++pipe_class istore_reg_mem(iRegI src, memory mem) +%{ -+ constraint(ALLOC_IN_RC(no_special_reg32)); -+ match(RegI); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); ++ single_instruction; ++ mem : ID(read); ++ src : EX(read); ++ DECODE : ID; ++ LDST : MEM; +%} + -+// Register R10 only -+operand iRegI_R10() -+%{ -+ constraint(ALLOC_IN_RC(int_r10_reg)); -+ match(RegI); -+ match(iRegINoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+// Register R12 only -+operand iRegI_R12() ++// Store - reg, reg ++// E.g. SD Rs2, Rs1 ++pipe_class istore_reg_reg(iRegI dst, iRegI src) +%{ -+ constraint(ALLOC_IN_RC(int_r12_reg)); -+ match(RegI); -+ match(iRegINoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); ++ single_instruction; ++ dst : ID(read); ++ src : EX(read); ++ DECODE : ID; ++ LDST : MEM; +%} + -+// Register R13 only -+operand iRegI_R13() -+%{ -+ constraint(ALLOC_IN_RC(int_r13_reg)); -+ match(RegI); -+ match(iRegINoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++//------- Store pipeline operations ----------------------- + -+// Register R14 only -+operand iRegI_R14() ++// Branch ++pipe_class pipe_branch() +%{ -+ constraint(ALLOC_IN_RC(int_r14_reg)); -+ match(RegI); -+ match(iRegINoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); ++ single_instruction; ++ DECODE : ID; ++ BRANCH : EX; +%} + -+// Integer 64 bit Register Operands -+operand iRegL() ++// Branch ++pipe_class pipe_branch_reg(iRegI src) +%{ -+ constraint(ALLOC_IN_RC(any_reg)); -+ match(RegL); -+ match(iRegLNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); ++ single_instruction; ++ src : ID(read); ++ DECODE : ID; ++ BRANCH : EX; +%} + -+// Integer 64 bit Register not Special -+operand iRegLNoSp() ++// Compare & Branch ++// E.g. BEQ Rs1, Rs2, L ++pipe_class pipe_cmp_branch(iRegI src1, iRegI src2) +%{ -+ constraint(ALLOC_IN_RC(no_special_reg)); -+ match(RegL); -+ match(iRegL_R10); -+ format %{ %} -+ interface(REG_INTER); ++ single_instruction; ++ src1 : ID(read); ++ src2 : ID(read); ++ DECODE : ID; ++ BRANCH : EX; +%} + -+// Long 64 bit Register R28 only -+operand iRegL_R28() ++// E.g. BEQZ Rs, L ++pipe_class pipe_cmpz_branch(iRegI src) +%{ -+ constraint(ALLOC_IN_RC(r28_reg)); -+ match(RegL); -+ match(iRegLNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); ++ single_instruction; ++ src : ID(read); ++ DECODE : ID; ++ BRANCH : EX; +%} + -+// Long 64 bit Register R29 only -+operand iRegL_R29() ++//------- Synchronisation operations ---------------------- ++// Any operation requiring serialization ++// E.g. FENCE/Atomic Ops/Load Acquire/Store Release ++pipe_class pipe_serial() +%{ -+ constraint(ALLOC_IN_RC(r29_reg)); -+ match(RegL); -+ match(iRegLNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); ++ single_instruction; ++ force_serialization; ++ fixed_latency(16); ++ DECODE : ID; ++ LDST : MEM; +%} + -+// Long 64 bit Register R30 only -+operand iRegL_R30() ++pipe_class pipe_slow() +%{ -+ constraint(ALLOC_IN_RC(r30_reg)); -+ match(RegL); -+ match(iRegLNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); ++ instruction_count(10); ++ multiple_bundles; ++ force_serialization; ++ fixed_latency(16); ++ DECODE : ID; ++ LDST : MEM; +%} + -+// Pointer Register Operands -+// Pointer Register -+operand iRegP() ++// Empty pipeline class ++pipe_class pipe_class_empty() +%{ -+ constraint(ALLOC_IN_RC(ptr_reg)); -+ match(RegP); -+ match(iRegPNoSp); -+ match(iRegP_R10); -+ match(javaThread_RegP); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); ++ single_instruction; ++ fixed_latency(0); +%} + -+// Pointer 64 bit Register not Special -+operand iRegPNoSp() ++// Default pipeline class. ++pipe_class pipe_class_default() +%{ -+ constraint(ALLOC_IN_RC(no_special_ptr_reg)); -+ match(RegP); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); ++ single_instruction; ++ fixed_latency(2); +%} + -+operand iRegP_R10() ++// Pipeline class for compares. ++pipe_class pipe_class_compare() +%{ -+ constraint(ALLOC_IN_RC(r10_reg)); -+ match(RegP); -+ // match(iRegP); -+ match(iRegPNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); ++ single_instruction; ++ fixed_latency(16); +%} + -+// Pointer 64 bit Register R11 only -+operand iRegP_R11() ++// Pipeline class for memory operations. ++pipe_class pipe_class_memory() +%{ -+ constraint(ALLOC_IN_RC(r11_reg)); -+ match(RegP); -+ match(iRegPNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); ++ single_instruction; ++ fixed_latency(16); +%} + -+operand iRegP_R12() ++// Pipeline class for call. ++pipe_class pipe_class_call() +%{ -+ constraint(ALLOC_IN_RC(r12_reg)); -+ match(RegP); -+ // match(iRegP); -+ match(iRegPNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); ++ single_instruction; ++ fixed_latency(100); +%} + -+// Pointer 64 bit Register R13 only -+operand iRegP_R13() -+%{ -+ constraint(ALLOC_IN_RC(r13_reg)); -+ match(RegP); -+ match(iRegPNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); ++// Define the class for the Nop node. ++define %{ ++ MachNop = pipe_class_empty; +%} -+ -+operand iRegP_R14() -+%{ -+ constraint(ALLOC_IN_RC(r14_reg)); -+ match(RegP); -+ // match(iRegP); -+ match(iRegPNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); +%} ++//----------INSTRUCTIONS------------------------------------------------------- ++// ++// match -- States which machine-independent subtree may be replaced ++// by this instruction. ++// ins_cost -- The estimated cost of this instruction is used by instruction ++// selection to identify a minimum cost tree of machine ++// instructions that matches a tree of machine-independent ++// instructions. ++// format -- A string providing the disassembly for this instruction. ++// The value of an instruction's operand may be inserted ++// by referring to it with a '$' prefix. ++// opcode -- Three instruction opcodes may be provided. These are referred ++// to within an encode class as $primary, $secondary, and $tertiary ++// rrspectively. The primary opcode is commonly used to ++// indicate the type of machine instruction, while secondary ++// and tertiary are often used for prefix options or addressing ++// modes. ++// ins_encode -- A list of encode classes with parameters. The encode class ++// name must have been defined in an 'enc_class' specification ++// in the encode section of the architecture description. + -+operand iRegP_R15() -+%{ -+ constraint(ALLOC_IN_RC(r15_reg)); -+ match(RegP); -+ // match(iRegP); -+ match(iRegPNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++// ============================================================================ ++// Memory (Load/Store) Instructions + -+operand iRegP_R16() -+%{ -+ constraint(ALLOC_IN_RC(r16_reg)); -+ match(RegP); -+ // match(iRegP); -+ match(iRegPNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++// Load Instructions + -+// Pointer 64 bit Register R28 only -+operand iRegP_R28() ++// Load Byte (8 bit signed) ++instruct loadB(iRegINoSp dst, memory mem) +%{ -+ constraint(ALLOC_IN_RC(r28_reg)); -+ match(RegP); -+ match(iRegPNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++ match(Set dst (LoadB mem)); + -+// Pointer Register Operands -+// Narrow Pointer Register -+operand iRegN() -+%{ -+ constraint(ALLOC_IN_RC(any_reg32)); -+ match(RegN); -+ match(iRegNNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++ ins_cost(LOAD_COST); ++ format %{ "lb $dst, $mem\t# byte, #@loadB" %} + -+// Integer 64 bit Register not Special -+operand iRegNNoSp() -+%{ -+ constraint(ALLOC_IN_RC(no_special_reg32)); -+ match(RegN); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++ ins_encode %{ ++ __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} + -+// heap base register -- used for encoding immN0 -+operand iRegIHeapbase() -+%{ -+ constraint(ALLOC_IN_RC(heapbase_reg)); -+ match(RegI); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); ++ ins_pipe(iload_reg_mem); +%} + -+// Long 64 bit Register R10 only -+operand iRegL_R10() ++// Load Byte (8 bit signed) into long ++instruct loadB2L(iRegLNoSp dst, memory mem) +%{ -+ constraint(ALLOC_IN_RC(r10_reg)); -+ match(RegL); -+ match(iRegLNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++ match(Set dst (ConvI2L (LoadB mem))); + -+// Float Register -+// Float register operands -+operand fRegF() -+%{ -+ constraint(ALLOC_IN_RC(float_reg)); -+ match(RegF); ++ ins_cost(LOAD_COST); ++ format %{ "lb $dst, $mem\t# byte, #@loadB2L" %} + -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); ++ ins_encode %{ ++ __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(iload_reg_mem); +%} + -+// Double Register -+// Double register operands -+operand fRegD() ++// Load Byte (8 bit unsigned) ++instruct loadUB(iRegINoSp dst, memory mem) +%{ -+ constraint(ALLOC_IN_RC(double_reg)); -+ match(RegD); ++ match(Set dst (LoadUB mem)); + -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++ ins_cost(LOAD_COST); ++ format %{ "lbu $dst, $mem\t# byte, #@loadUB" %} + -+// Generic vector class. This will be used for -+// all vector operands. -+operand vReg() -+%{ -+ constraint(ALLOC_IN_RC(vectora_reg)); -+ match(VecA); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++ ins_encode %{ ++ __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} + -+operand vReg_V1() -+%{ -+ constraint(ALLOC_IN_RC(v1_reg)); -+ match(VecA); -+ match(vReg); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); ++ ins_pipe(iload_reg_mem); +%} + -+operand vReg_V2() ++// Load Byte (8 bit unsigned) into long ++instruct loadUB2L(iRegLNoSp dst, memory mem) +%{ -+ constraint(ALLOC_IN_RC(v2_reg)); -+ match(VecA); -+ match(vReg); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++ match(Set dst (ConvI2L (LoadUB mem))); + -+operand vReg_V3() -+%{ -+ constraint(ALLOC_IN_RC(v3_reg)); -+ match(VecA); -+ match(vReg); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++ ins_cost(LOAD_COST); ++ format %{ "lbu $dst, $mem\t# byte, #@loadUB2L" %} + -+operand vReg_V4() -+%{ -+ constraint(ALLOC_IN_RC(v4_reg)); -+ match(VecA); -+ match(vReg); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++ ins_encode %{ ++ __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} + -+operand vReg_V5() -+%{ -+ constraint(ALLOC_IN_RC(v5_reg)); -+ match(VecA); -+ match(vReg); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); ++ ins_pipe(iload_reg_mem); +%} + -+// Java Thread Register -+operand javaThread_RegP(iRegP reg) ++// Load Short (16 bit signed) ++instruct loadS(iRegINoSp dst, memory mem) +%{ -+ constraint(ALLOC_IN_RC(java_thread_reg)); // java_thread_reg -+ match(reg); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); ++ match(Set dst (LoadS mem)); ++ ++ ins_cost(LOAD_COST); ++ format %{ "lh $dst, $mem\t# short, #@loadS" %} ++ ++ ins_encode %{ ++ __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(iload_reg_mem); +%} + -+//----------Memory Operands---------------------------------------------------- -+// RISCV has only base_plus_offset and literal address mode, so no need to use -+// index and scale. Here set index as 0xffffffff and scale as 0x0. -+operand indirect(iRegP reg) ++// Load Short (16 bit signed) into long ++instruct loadS2L(iRegLNoSp dst, memory mem) +%{ -+ constraint(ALLOC_IN_RC(ptr_reg)); -+ match(reg); -+ op_cost(0); -+ format %{ "[$reg]" %} -+ interface(MEMORY_INTER) %{ -+ base($reg); -+ index(0xffffffff); -+ scale(0x0); -+ disp(0x0); ++ match(Set dst (ConvI2L (LoadS mem))); ++ ++ ins_cost(LOAD_COST); ++ format %{ "lh $dst, $mem\t# short, #@loadS2L" %} ++ ++ ins_encode %{ ++ __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} ++ ++ ins_pipe(iload_reg_mem); +%} + -+operand indOffI(iRegP reg, immIOffset off) ++// Load Char (16 bit unsigned) ++instruct loadUS(iRegINoSp dst, memory mem) +%{ -+ constraint(ALLOC_IN_RC(ptr_reg)); -+ match(AddP reg off); -+ op_cost(0); -+ format %{ "[$reg, $off]" %} -+ interface(MEMORY_INTER) %{ -+ base($reg); -+ index(0xffffffff); -+ scale(0x0); -+ disp($off); ++ match(Set dst (LoadUS mem)); ++ ++ ins_cost(LOAD_COST); ++ format %{ "lhu $dst, $mem\t# short, #@loadUS" %} ++ ++ ins_encode %{ ++ __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} ++ ++ ins_pipe(iload_reg_mem); +%} + -+operand indOffL(iRegP reg, immLOffset off) ++// Load Short/Char (16 bit unsigned) into long ++instruct loadUS2L(iRegLNoSp dst, memory mem) +%{ -+ constraint(ALLOC_IN_RC(ptr_reg)); -+ match(AddP reg off); -+ op_cost(0); -+ format %{ "[$reg, $off]" %} -+ interface(MEMORY_INTER) %{ -+ base($reg); -+ index(0xffffffff); -+ scale(0x0); -+ disp($off); ++ match(Set dst (ConvI2L (LoadUS mem))); ++ ++ ins_cost(LOAD_COST); ++ format %{ "lhu $dst, $mem\t# short, #@loadUS2L" %} ++ ++ ins_encode %{ ++ __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} ++ ++ ins_pipe(iload_reg_mem); +%} + -+operand indirectN(iRegN reg) ++// Load Integer (32 bit signed) ++instruct loadI(iRegINoSp dst, memory mem) +%{ -+ predicate(CompressedOops::shift() == 0); -+ constraint(ALLOC_IN_RC(ptr_reg)); -+ match(DecodeN reg); -+ op_cost(0); -+ format %{ "[$reg]\t# narrow" %} -+ interface(MEMORY_INTER) %{ -+ base($reg); -+ index(0xffffffff); -+ scale(0x0); -+ disp(0x0); ++ match(Set dst (LoadI mem)); ++ ++ ins_cost(LOAD_COST); ++ format %{ "lw $dst, $mem\t# int, #@loadI" %} ++ ++ ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); ++ __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} ++ ++ ins_pipe(iload_reg_mem); +%} + -+operand indOffIN(iRegN reg, immIOffset off) ++// Load Integer (32 bit signed) into long ++instruct loadI2L(iRegLNoSp dst, memory mem) +%{ -+ predicate(CompressedOops::shift() == 0); -+ constraint(ALLOC_IN_RC(ptr_reg)); -+ match(AddP (DecodeN reg) off); -+ op_cost(0); -+ format %{ "[$reg, $off]\t# narrow" %} -+ interface(MEMORY_INTER) %{ -+ base($reg); -+ index(0xffffffff); -+ scale(0x0); -+ disp($off); ++ match(Set dst (ConvI2L (LoadI mem))); ++ ++ ins_cost(LOAD_COST); ++ format %{ "lw $dst, $mem\t# int, #@loadI2L" %} ++ ++ ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); ++ __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} ++ ++ ins_pipe(iload_reg_mem); +%} + -+operand indOffLN(iRegN reg, immLOffset off) ++// Load Integer (32 bit unsigned) into long ++instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask) +%{ -+ predicate(CompressedOops::shift() == 0); -+ constraint(ALLOC_IN_RC(ptr_reg)); -+ match(AddP (DecodeN reg) off); -+ op_cost(0); -+ format %{ "[$reg, $off]\t# narrow" %} -+ interface(MEMORY_INTER) %{ -+ base($reg); -+ index(0xffffffff); -+ scale(0x0); -+ disp($off); ++ match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); ++ ++ ins_cost(LOAD_COST); ++ format %{ "lwu $dst, $mem\t# int, #@loadUI2L" %} ++ ++ ins_encode %{ ++ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} ++ ++ ins_pipe(iload_reg_mem); +%} + -+// RISCV opto stubs need to write to the pc slot in the thread anchor -+operand thread_anchor_pc(javaThread_RegP reg, immL_pc_off off) ++// Load Long (64 bit signed) ++instruct loadL(iRegLNoSp dst, memory mem) +%{ -+ constraint(ALLOC_IN_RC(ptr_reg)); -+ match(AddP reg off); -+ op_cost(0); -+ format %{ "[$reg, $off]" %} -+ interface(MEMORY_INTER) %{ -+ base($reg); -+ index(0xffffffff); -+ scale(0x0); -+ disp($off); -+ %} -+%} ++ match(Set dst (LoadL mem)); + ++ ins_cost(LOAD_COST); ++ format %{ "ld $dst, $mem\t# int, #@loadL" %} + -+//----------Special Memory Operands-------------------------------------------- -+// Stack Slot Operand - This operand is used for loading and storing temporary -+// values on the stack where a match requires a value to -+// flow through memory. -+operand stackSlotI(sRegI reg) -+%{ -+ constraint(ALLOC_IN_RC(stack_slots)); -+ // No match rule because this operand is only generated in matching -+ // match(RegI); -+ format %{ "[$reg]" %} -+ interface(MEMORY_INTER) %{ -+ base(0x02); // RSP -+ index(0xffffffff); // No Index -+ scale(0x0); // No Scale -+ disp($reg); // Stack Offset ++ ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); ++ __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} -+%} + -+operand stackSlotF(sRegF reg) -+%{ -+ constraint(ALLOC_IN_RC(stack_slots)); -+ // No match rule because this operand is only generated in matching -+ // match(RegF); -+ format %{ "[$reg]" %} -+ interface(MEMORY_INTER) %{ -+ base(0x02); // RSP -+ index(0xffffffff); // No Index -+ scale(0x0); // No Scale -+ disp($reg); // Stack Offset -+ %} ++ ins_pipe(iload_reg_mem); +%} + -+operand stackSlotD(sRegD reg) ++// Load Range ++instruct loadRange(iRegINoSp dst, memory mem) +%{ -+ constraint(ALLOC_IN_RC(stack_slots)); -+ // No match rule because this operand is only generated in matching -+ // match(RegD); -+ format %{ "[$reg]" %} -+ interface(MEMORY_INTER) %{ -+ base(0x02); // RSP -+ index(0xffffffff); // No Index -+ scale(0x0); // No Scale -+ disp($reg); // Stack Offset -+ %} -+%} ++ match(Set dst (LoadRange mem)); + -+operand stackSlotL(sRegL reg) -+%{ -+ constraint(ALLOC_IN_RC(stack_slots)); -+ // No match rule because this operand is only generated in matching -+ // match(RegL); -+ format %{ "[$reg]" %} -+ interface(MEMORY_INTER) %{ -+ base(0x02); // RSP -+ index(0xffffffff); // No Index -+ scale(0x0); // No Scale -+ disp($reg); // Stack Offset -+ %} -+%} ++ ins_cost(LOAD_COST); ++ format %{ "lwu $dst, $mem\t# range, #@loadRange" %} + -+// Special operand allowing long args to int ops to be truncated for free ++ ins_encode %{ ++ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} + -+operand iRegL2I(iRegL reg) %{ ++ ins_pipe(iload_reg_mem); ++%} + -+ op_cost(0); ++// Load Pointer ++instruct loadP(iRegPNoSp dst, memory mem) ++%{ ++ match(Set dst (LoadP mem)); + -+ match(ConvL2I reg); ++ ins_cost(LOAD_COST); ++ format %{ "ld $dst, $mem\t# ptr, #@loadP" %} + -+ format %{ "l2i($reg)" %} ++ ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); ++ __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} + -+ interface(REG_INTER) ++ ins_pipe(iload_reg_mem); +%} + ++// Load Compressed Pointer ++instruct loadN(iRegNNoSp dst, memory mem) ++%{ ++ match(Set dst (LoadN mem)); + -+// Comparison Operands -+// NOTE: Label is a predefined operand which should not be redefined in -+// the AD file. It is generically handled within the ADLC. ++ ins_cost(LOAD_COST); ++ format %{ "lwu $dst, $mem\t# loadN, compressed ptr, #@loadN" %} + -+//----------Conditional Branch Operands---------------------------------------- -+// Comparison Op - This is the operation of the comparison, and is limited to -+// the following set of codes: -+// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) -+// -+// Other attributes of the comparison, such as unsignedness, are specified -+// by the comparison instruction that sets a condition code flags register. -+// That result is represented by a flags operand whose subtype is appropriate -+// to the unsignedness (etc.) of the comparison. -+// -+// Later, the instruction which matches both the Comparison Op (a Bool) and -+// the flags (produced by the Cmp) specifies the coding of the comparison op -+// by matching a specific subtype of Bool operand below, such as cmpOpU. ++ ins_encode %{ ++ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} + ++ ins_pipe(iload_reg_mem); ++%} + -+// used for signed integral comparisons and fp comparisons -+operand cmpOp() ++// Load Klass Pointer ++instruct loadKlass(iRegPNoSp dst, memory mem) +%{ -+ match(Bool); ++ match(Set dst (LoadKlass mem)); + -+ format %{ "" %} ++ ins_cost(LOAD_COST); ++ format %{ "ld $dst, $mem\t# class, #@loadKlass" %} + -+ // the values in interface derives from struct BoolTest::mask -+ interface(COND_INTER) %{ -+ equal(0x0, "eq"); -+ greater(0x1, "gt"); -+ overflow(0x2, "overflow"); -+ less(0x3, "lt"); -+ not_equal(0x4, "ne"); -+ less_equal(0x5, "le"); -+ no_overflow(0x6, "no_overflow"); -+ greater_equal(0x7, "ge"); ++ ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); ++ __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} ++ ++ ins_pipe(iload_reg_mem); +%} + -+// used for unsigned integral comparisons -+operand cmpOpU() ++// Load Narrow Klass Pointer ++instruct loadNKlass(iRegNNoSp dst, memory mem) +%{ -+ match(Bool); ++ match(Set dst (LoadNKlass mem)); + -+ format %{ "" %} -+ // the values in interface derives from struct BoolTest::mask -+ interface(COND_INTER) %{ -+ equal(0x0, "eq"); -+ greater(0x1, "gtu"); -+ overflow(0x2, "overflow"); -+ less(0x3, "ltu"); -+ not_equal(0x4, "ne"); -+ less_equal(0x5, "leu"); -+ no_overflow(0x6, "no_overflow"); -+ greater_equal(0x7, "geu"); ++ ins_cost(LOAD_COST); ++ format %{ "lwu $dst, $mem\t# loadNKlass, compressed class ptr, #@loadNKlass" %} ++ ++ ins_encode %{ ++ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} ++ ++ ins_pipe(iload_reg_mem); +%} + -+// used for certain integral comparisons which can be -+// converted to bxx instructions -+operand cmpOpEqNe() ++// Load Float ++instruct loadF(fRegF dst, memory mem) +%{ -+ match(Bool); -+ op_cost(0); -+ predicate(n->as_Bool()->_test._test == BoolTest::ne || -+ n->as_Bool()->_test._test == BoolTest::eq); ++ match(Set dst (LoadF mem)); + -+ format %{ "" %} -+ interface(COND_INTER) %{ -+ equal(0x0, "eq"); -+ greater(0x1, "gt"); -+ overflow(0x2, "overflow"); -+ less(0x3, "lt"); -+ not_equal(0x4, "ne"); -+ less_equal(0x5, "le"); -+ no_overflow(0x6, "no_overflow"); -+ greater_equal(0x7, "ge"); ++ ins_cost(LOAD_COST); ++ format %{ "flw $dst, $mem\t# float, #@loadF" %} ++ ++ ins_encode %{ ++ __ flw(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} ++ ++ ins_pipe(fp_load_mem_s); +%} + -+operand cmpOpULtGe() ++// Load Double ++instruct loadD(fRegD dst, memory mem) +%{ -+ match(Bool); -+ op_cost(0); -+ predicate(n->as_Bool()->_test._test == BoolTest::lt || -+ n->as_Bool()->_test._test == BoolTest::ge); ++ match(Set dst (LoadD mem)); + -+ format %{ "" %} -+ interface(COND_INTER) %{ -+ equal(0x0, "eq"); -+ greater(0x1, "gt"); -+ overflow(0x2, "overflow"); -+ less(0x3, "lt"); -+ not_equal(0x4, "ne"); -+ less_equal(0x5, "le"); -+ no_overflow(0x6, "no_overflow"); -+ greater_equal(0x7, "ge"); ++ ins_cost(LOAD_COST); ++ format %{ "fld $dst, $mem\t# double, #@loadD" %} ++ ++ ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); ++ __ fld(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} ++ ++ ins_pipe(fp_load_mem_d); +%} + -+operand cmpOpUEqNeLeGt() ++// Load Int Constant ++instruct loadConI(iRegINoSp dst, immI src) +%{ -+ match(Bool); -+ op_cost(0); -+ predicate(n->as_Bool()->_test._test == BoolTest::ne || -+ n->as_Bool()->_test._test == BoolTest::eq || -+ n->as_Bool()->_test._test == BoolTest::le || -+ n->as_Bool()->_test._test == BoolTest::gt); -+ -+ format %{ "" %} -+ interface(COND_INTER) %{ -+ equal(0x0, "eq"); -+ greater(0x1, "gt"); -+ overflow(0x2, "overflow"); -+ less(0x3, "lt"); -+ not_equal(0x4, "ne"); -+ less_equal(0x5, "le"); -+ no_overflow(0x6, "no_overflow"); -+ greater_equal(0x7, "ge"); -+ %} -+%} ++ match(Set dst src); + ++ ins_cost(ALU_COST); ++ format %{ "li $dst, $src\t# int, #@loadConI" %} + -+// Flags register, used as output of compare logic -+operand rFlagsReg() -+%{ -+ constraint(ALLOC_IN_RC(reg_flags)); -+ match(RegFlags); ++ ins_encode(riscv_enc_li_imm(dst, src)); + -+ op_cost(0); -+ format %{ "RFLAGS" %} -+ interface(REG_INTER); ++ ins_pipe(ialu_imm); +%} + -+// Special Registers -+ -+// Method Register -+operand inline_cache_RegP(iRegP reg) ++// Load Long Constant ++instruct loadConL(iRegLNoSp dst, immL src) +%{ -+ constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg -+ match(reg); -+ match(iRegPNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++ match(Set dst src); + -+//----------OPERAND CLASSES---------------------------------------------------- -+// Operand Classes are groups of operands that are used as to simplify -+// instruction definitions by not requiring the AD writer to specify -+// separate instructions for every form of operand when the -+// instruction accepts multiple operand types with the same basic -+// encoding and format. The classic case of this is memory operands. ++ ins_cost(ALU_COST); ++ format %{ "li $dst, $src\t# long, #@loadConL" %} + -+// memory is used to define read/write location for load/store -+// instruction defs. we can turn a memory op into an Address ++ ins_encode(riscv_enc_li_imm(dst, src)); + -+opclass memory(indirect, indOffI, indOffL, indirectN, indOffIN, indOffLN); ++ ins_pipe(ialu_imm); ++%} + -+// iRegIorL2I is used for src inputs in rules for 32 bit int (I) -+// operations. it allows the src to be either an iRegI or a (ConvL2I -+// iRegL). in the latter case the l2i normally planted for a ConvL2I -+// can be elided because the 32-bit instruction will just employ the -+// lower 32 bits anyway. -+// -+// n.b. this does not elide all L2I conversions. if the truncated -+// value is consumed by more than one operation then the ConvL2I -+// cannot be bundled into the consuming nodes so an l2i gets planted -+// (actually a mvw $dst $src) and the downstream instructions consume -+// the result of the l2i as an iRegI input. That's a shame since the -+// mvw is actually redundant but its not too costly. ++// Load Pointer Constant ++instruct loadConP(iRegPNoSp dst, immP con) ++%{ ++ match(Set dst con); + -+opclass iRegIorL2I(iRegI, iRegL2I); -+opclass iRegIorL(iRegI, iRegL); -+opclass iRegNorP(iRegN, iRegP); -+opclass iRegILNP(iRegI, iRegL, iRegN, iRegP); -+opclass iRegILNPNoSp(iRegINoSp, iRegLNoSp, iRegNNoSp, iRegPNoSp); -+opclass immIorL(immI, immL); ++ ins_cost(ALU_COST); ++ format %{ "mv $dst, $con\t# ptr, #@loadConP" %} + -+//----------PIPELINE----------------------------------------------------------- -+// Rules which define the behavior of the target architectures pipeline. ++ ins_encode(riscv_enc_mov_p(dst, con)); + -+// For specific pipelines, e.g. generic RISC-V, define the stages of that pipeline -+//pipe_desc(ID, EX, MEM, WR); -+#define ID S0 -+#define EX S1 -+#define MEM S2 -+#define WR S3 ++ ins_pipe(ialu_imm); ++%} + -+// Integer ALU reg operation -+pipeline %{ ++// Load Null Pointer Constant ++instruct loadConP0(iRegPNoSp dst, immP0 con) ++%{ ++ match(Set dst con); + -+attributes %{ -+ // RISC-V instructions are of fixed length -+ fixed_size_instructions; // Fixed size instructions TODO does -+ max_instructions_per_bundle = 2; // Generic RISC-V 1, Sifive Series 7 2 -+ // RISC-V instructions come in 32-bit word units -+ instruction_unit_size = 4; // An instruction is 4 bytes long -+ instruction_fetch_unit_size = 64; // The processor fetches one line -+ instruction_fetch_units = 1; // of 64 bytes ++ ins_cost(ALU_COST); ++ format %{ "mv $dst, $con\t# NULL ptr, #@loadConP0" %} + -+ // List of nop instructions -+ nops( MachNop ); ++ ins_encode(riscv_enc_mov_zero(dst)); ++ ++ ins_pipe(ialu_imm); +%} + -+// We don't use an actual pipeline model so don't care about resources -+// or description. we do use pipeline classes to introduce fixed -+// latencies ++// Load Pointer Constant One ++instruct loadConP1(iRegPNoSp dst, immP_1 con) ++%{ ++ match(Set dst con); + -+//----------RESOURCES---------------------------------------------------------- -+// Resources are the functional units available to the machine ++ ins_cost(ALU_COST); ++ format %{ "mv $dst, $con\t# load ptr constant one, #@loadConP1" %} + -+// Generic RISC-V pipeline -+// 1 decoder -+// 1 instruction decoded per cycle -+// 1 load/store ops per cycle, 1 branch, 1 FPU -+// 1 mul, 1 div ++ ins_encode(riscv_enc_mov_p1(dst)); + -+resources ( DECODE, -+ ALU, -+ MUL, -+ DIV, -+ BRANCH, -+ LDST, -+ FPU); ++ ins_pipe(ialu_imm); ++%} + -+//----------PIPELINE DESCRIPTION----------------------------------------------- -+// Pipeline Description specifies the stages in the machine's pipeline ++// Load Poll Page Constant ++instruct loadConPollPage(iRegPNoSp dst, immPollPage con) ++%{ ++ match(Set dst con); + -+// Define the pipeline as a generic 6 stage pipeline -+pipe_desc(S0, S1, S2, S3, S4, S5); ++ ins_cost(ALU_COST * 6); ++ format %{ "movptr $dst, $con\t# Poll Page Ptr, #@loadConPollPage" %} + -+//----------PIPELINE CLASSES--------------------------------------------------- -+// Pipeline Classes describe the stages in which input and output are -+// referenced by the hardware pipeline. ++ ins_encode(riscv_enc_mov_poll_page(dst, con)); + -+pipe_class fp_dop_reg_reg_s(fRegF dst, fRegF src1, fRegF src2) -+%{ -+ single_instruction; -+ src1 : S1(read); -+ src2 : S2(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; ++ ins_pipe(ialu_imm); +%} + -+pipe_class fp_dop_reg_reg_d(fRegD dst, fRegD src1, fRegD src2) ++// Load Byte Map Base Constant ++instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con) +%{ -+ src1 : S1(read); -+ src2 : S2(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++ match(Set dst con); ++ ins_cost(ALU_COST); ++ format %{ "mv $dst, $con\t# Byte Map Base, #@loadByteMapBase" %} + -+pipe_class fp_uop_s(fRegF dst, fRegF src) -+%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++ ins_encode(riscv_enc_mov_byte_map_base(dst)); + -+pipe_class fp_uop_d(fRegD dst, fRegD src) -+%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; ++ ins_pipe(ialu_imm); +%} + -+pipe_class fp_d2f(fRegF dst, fRegD src) ++// Load Narrow Pointer Constant ++instruct loadConN(iRegNNoSp dst, immN con) +%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++ match(Set dst con); + -+pipe_class fp_f2d(fRegD dst, fRegF src) -+%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++ ins_cost(ALU_COST * 4); ++ format %{ "mv $dst, $con\t# compressed ptr, #@loadConN" %} + -+pipe_class fp_f2i(iRegINoSp dst, fRegF src) -+%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++ ins_encode(riscv_enc_mov_n(dst, con)); + -+pipe_class fp_f2l(iRegLNoSp dst, fRegF src) -+%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; ++ ins_pipe(ialu_imm); +%} + -+pipe_class fp_i2f(fRegF dst, iRegIorL2I src) ++// Load Narrow Null Pointer Constant ++instruct loadConN0(iRegNNoSp dst, immN0 con) +%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; ++ match(Set dst con); ++ ++ ins_cost(ALU_COST); ++ format %{ "mv $dst, $con\t# compressed NULL ptr, #@loadConN0" %} ++ ++ ins_encode(riscv_enc_mov_zero(dst)); ++ ++ ins_pipe(ialu_imm); +%} + -+pipe_class fp_l2f(fRegF dst, iRegL src) ++// Load Narrow Klass Constant ++instruct loadConNKlass(iRegNNoSp dst, immNKlass con) +%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; ++ match(Set dst con); ++ ++ ins_cost(ALU_COST * 6); ++ format %{ "mv $dst, $con\t# compressed klass ptr, #@loadConNKlass" %} ++ ++ ins_encode(riscv_enc_mov_nk(dst, con)); ++ ++ ins_pipe(ialu_imm); +%} + -+pipe_class fp_d2i(iRegINoSp dst, fRegD src) -+%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} -+ -+pipe_class fp_d2l(iRegLNoSp dst, fRegD src) -+%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++// Load Float Constant ++instruct loadConF(fRegF dst, immF con) %{ ++ match(Set dst con); + -+pipe_class fp_i2d(fRegD dst, iRegIorL2I src) -+%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++ ins_cost(LOAD_COST); ++ format %{ ++ "flw $dst, [$constantaddress]\t# load from constant table: float=$con, #@loadConF" ++ %} + -+pipe_class fp_l2d(fRegD dst, iRegIorL2I src) -+%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++ ins_encode %{ ++ __ flw(as_FloatRegister($dst$$reg), $constantaddress($con)); ++ %} + -+pipe_class fp_div_s(fRegF dst, fRegF src1, fRegF src2) -+%{ -+ single_instruction; -+ src1 : S1(read); -+ src2 : S2(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; ++ ins_pipe(fp_load_constant_s); +%} + -+pipe_class fp_div_d(fRegD dst, fRegD src1, fRegD src2) -+%{ -+ single_instruction; -+ src1 : S1(read); -+ src2 : S2(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++instruct loadConF0(fRegF dst, immF0 con) %{ ++ match(Set dst con); + -+pipe_class fp_sqrt_s(fRegF dst, fRegF src1, fRegF src2) -+%{ -+ single_instruction; -+ src1 : S1(read); -+ src2 : S2(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++ ins_cost(XFER_COST); + -+pipe_class fp_sqrt_d(fRegD dst, fRegD src1, fRegD src2) -+%{ -+ single_instruction; -+ src1 : S1(read); -+ src2 : S2(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++ format %{ "fmv.w.x $dst, zr\t# float, #@loadConF0" %} + -+pipe_class fp_load_constant_s(fRegF dst) -+%{ -+ single_instruction; -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; -+%} ++ ins_encode %{ ++ __ fmv_w_x(as_FloatRegister($dst$$reg), zr); ++ %} + -+pipe_class fp_load_constant_d(fRegD dst) -+%{ -+ single_instruction; -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; ++ ins_pipe(fp_load_constant_s); +%} + -+pipe_class fp_load_mem_s(fRegF dst, memory mem) -+%{ -+ single_instruction; -+ mem : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ LDST : MEM; -+%} ++// Load Double Constant ++instruct loadConD(fRegD dst, immD con) %{ ++ match(Set dst con); + -+pipe_class fp_load_mem_d(fRegD dst, memory mem) -+%{ -+ single_instruction; -+ mem : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ LDST : MEM; -+%} ++ ins_cost(LOAD_COST); ++ format %{ ++ "fld $dst, [$constantaddress]\t# load from constant table: double=$con, #@loadConD" ++ %} + -+pipe_class fp_store_reg_s(fRegF src, memory mem) -+%{ -+ single_instruction; -+ src : S1(read); -+ mem : S5(write); -+ DECODE : ID; -+ LDST : MEM; -+%} ++ ins_encode %{ ++ __ fld(as_FloatRegister($dst$$reg), $constantaddress($con)); ++ %} + -+pipe_class fp_store_reg_d(fRegD src, memory mem) -+%{ -+ single_instruction; -+ src : S1(read); -+ mem : S5(write); -+ DECODE : ID; -+ LDST : MEM; ++ ins_pipe(fp_load_constant_d); +%} + -+//------- Integer ALU operations -------------------------- -+ -+// Integer ALU reg-reg operation -+// Operands needs in ID, result generated in EX -+// E.g. ADD Rd, Rs1, Rs2 -+pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2) -+%{ -+ single_instruction; -+ dst : EX(write); -+ src1 : ID(read); -+ src2 : ID(read); -+ DECODE : ID; -+ ALU : EX; -+%} ++instruct loadConD0(fRegD dst, immD0 con) %{ ++ match(Set dst con); + -+// Integer ALU reg operation with constant shift -+// E.g. SLLI Rd, Rs1, #shift -+pipe_class ialu_reg_shift(iRegI dst, iRegI src1) -+%{ -+ single_instruction; -+ dst : EX(write); -+ src1 : ID(read); -+ DECODE : ID; -+ ALU : EX; -+%} ++ ins_cost(XFER_COST); + -+// Integer ALU reg-reg operation with variable shift -+// both operands must be available in ID -+// E.g. SLL Rd, Rs1, Rs2 -+pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2) -+%{ -+ single_instruction; -+ dst : EX(write); -+ src1 : ID(read); -+ src2 : ID(read); -+ DECODE : ID; -+ ALU : EX; -+%} ++ format %{ "fmv.d.x $dst, zr\t# double, #@loadConD0" %} + -+// Integer ALU reg operation -+// E.g. NEG Rd, Rs2 -+pipe_class ialu_reg(iRegI dst, iRegI src) -+%{ -+ single_instruction; -+ dst : EX(write); -+ src : ID(read); -+ DECODE : ID; -+ ALU : EX; -+%} ++ ins_encode %{ ++ __ fmv_d_x(as_FloatRegister($dst$$reg), zr); ++ %} + -+// Integer ALU reg immediate operation -+// E.g. ADDI Rd, Rs1, #imm -+pipe_class ialu_reg_imm(iRegI dst, iRegI src1) -+%{ -+ single_instruction; -+ dst : EX(write); -+ src1 : ID(read); -+ DECODE : ID; -+ ALU : EX; ++ ins_pipe(fp_load_constant_d); +%} + -+// Integer ALU immediate operation (no source operands) -+// E.g. LI Rd, #imm -+pipe_class ialu_imm(iRegI dst) ++// Store Instructions ++// Store CMS card-mark Immediate ++instruct storeimmCM0(immI0 zero, memory mem) +%{ -+ single_instruction; -+ dst : EX(write); -+ DECODE : ID; -+ ALU : EX; -+%} ++ match(Set mem (StoreCM mem zero)); ++ predicate(unnecessary_storestore(n)); + -+//------- Multiply pipeline operations -------------------- ++ ins_cost(STORE_COST); ++ format %{ "storestore (elided)\n\t" ++ "sb zr, $mem\t# byte, #@storeimmCM0" %} + -+// Multiply reg-reg -+// E.g. MULW Rd, Rs1, Rs2 -+pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2) -+%{ -+ single_instruction; -+ dst : WR(write); -+ src1 : ID(read); -+ src2 : ID(read); -+ DECODE : ID; -+ MUL : WR; -+%} ++ ins_encode %{ ++ __ sb(zr, Address(as_Register($mem$$base), $mem$$disp)); ++ %} + -+// E.g. MUL RD, Rs1, Rs2 -+pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2) -+%{ -+ single_instruction; -+ fixed_latency(3); // Maximum latency for 64 bit mul -+ dst : WR(write); -+ src1 : ID(read); -+ src2 : ID(read); -+ DECODE : ID; -+ MUL : WR; ++ ins_pipe(istore_mem); +%} + -+//------- Divide pipeline operations -------------------- -+ -+// E.g. DIVW Rd, Rs1, Rs2 -+pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2) ++// Store CMS card-mark Immediate with intervening StoreStore ++// needed when using CMS with no conditional card marking ++instruct storeimmCM0_ordered(immI0 zero, memory mem) +%{ -+ single_instruction; -+ fixed_latency(8); // Maximum latency for 32 bit divide -+ dst : WR(write); -+ src1 : ID(read); -+ src2 : ID(read); -+ DECODE : ID; -+ DIV : WR; -+%} ++ match(Set mem (StoreCM mem zero)); + -+// E.g. DIV RD, Rs1, Rs2 -+pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2) -+%{ -+ single_instruction; -+ fixed_latency(16); // Maximum latency for 64 bit divide -+ dst : WR(write); -+ src1 : ID(read); -+ src2 : ID(read); -+ DECODE : ID; -+ DIV : WR; -+%} ++ ins_cost(ALU_COST + STORE_COST); ++ format %{ "membar(StoreStore)\n\t" ++ "sb zr, $mem\t# byte, #@storeimmCM0_ordered" %} + -+//------- Load pipeline operations ------------------------ ++ ins_encode %{ ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ __ sb(zr, Address(as_Register($mem$$base), $mem$$disp)); ++ %} + -+// Load - reg, mem -+// E.g. LA Rd, mem -+pipe_class iload_reg_mem(iRegI dst, memory mem) -+%{ -+ single_instruction; -+ dst : WR(write); -+ mem : ID(read); -+ DECODE : ID; -+ LDST : MEM; ++ ins_pipe(istore_mem); +%} + -+// Load - reg, reg -+// E.g. LD Rd, Rs -+pipe_class iload_reg_reg(iRegI dst, iRegI src) ++// Store Byte ++instruct storeB(iRegIorL2I src, memory mem) +%{ -+ single_instruction; -+ dst : WR(write); -+ src : ID(read); -+ DECODE : ID; -+ LDST : MEM; -+%} ++ match(Set mem (StoreB mem src)); + -+//------- Store pipeline operations ----------------------- ++ ins_cost(STORE_COST); ++ format %{ "sb $src, $mem\t# byte, #@storeB" %} + -+// Store - zr, mem -+// E.g. SD zr, mem -+pipe_class istore_mem(memory mem) -+%{ -+ single_instruction; -+ mem : ID(read); -+ DECODE : ID; -+ LDST : MEM; -+%} ++ ins_encode %{ ++ __ sb(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} + -+// Store - reg, mem -+// E.g. SD Rs, mem -+pipe_class istore_reg_mem(iRegI src, memory mem) -+%{ -+ single_instruction; -+ mem : ID(read); -+ src : EX(read); -+ DECODE : ID; -+ LDST : MEM; ++ ins_pipe(istore_reg_mem); +%} + -+// Store - reg, reg -+// E.g. SD Rs2, Rs1 -+pipe_class istore_reg_reg(iRegI dst, iRegI src) ++instruct storeimmB0(immI0 zero, memory mem) +%{ -+ single_instruction; -+ dst : ID(read); -+ src : EX(read); -+ DECODE : ID; -+ LDST : MEM; -+%} ++ match(Set mem (StoreB mem zero)); + -+//------- Store pipeline operations ----------------------- ++ ins_cost(STORE_COST); ++ format %{ "sb zr, $mem\t# byte, #@storeimmB0" %} + -+// Branch -+pipe_class pipe_branch() -+%{ -+ single_instruction; -+ DECODE : ID; -+ BRANCH : EX; -+%} ++ ins_encode %{ ++ __ sb(zr, Address(as_Register($mem$$base), $mem$$disp)); ++ %} + -+// Branch -+pipe_class pipe_branch_reg(iRegI src) -+%{ -+ single_instruction; -+ src : ID(read); -+ DECODE : ID; -+ BRANCH : EX; ++ ins_pipe(istore_mem); +%} + -+// Compare & Branch -+// E.g. BEQ Rs1, Rs2, L -+pipe_class pipe_cmp_branch(iRegI src1, iRegI src2) ++// Store Char/Short ++instruct storeC(iRegIorL2I src, memory mem) +%{ -+ single_instruction; -+ src1 : ID(read); -+ src2 : ID(read); -+ DECODE : ID; -+ BRANCH : EX; -+%} ++ match(Set mem (StoreC mem src)); + -+// E.g. BEQZ Rs, L -+pipe_class pipe_cmpz_branch(iRegI src) -+%{ -+ single_instruction; -+ src : ID(read); -+ DECODE : ID; -+ BRANCH : EX; -+%} ++ ins_cost(STORE_COST); ++ format %{ "sh $src, $mem\t# short, #@storeC" %} + -+//------- Synchronisation operations ---------------------- -+// Any operation requiring serialization -+// E.g. FENCE/Atomic Ops/Load Acquire/Store Release -+pipe_class pipe_serial() -+%{ -+ single_instruction; -+ force_serialization; -+ fixed_latency(16); -+ DECODE : ID; -+ LDST : MEM; -+%} ++ ins_encode %{ ++ __ sh(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} + -+pipe_class pipe_slow() -+%{ -+ instruction_count(10); -+ multiple_bundles; -+ force_serialization; -+ fixed_latency(16); -+ DECODE : ID; -+ LDST : MEM; ++ ins_pipe(istore_reg_mem); +%} + -+// Empty pipeline class -+pipe_class pipe_class_empty() ++instruct storeimmC0(immI0 zero, memory mem) +%{ -+ single_instruction; -+ fixed_latency(0); -+%} ++ match(Set mem (StoreC mem zero)); + -+// Default pipeline class. -+pipe_class pipe_class_default() -+%{ -+ single_instruction; -+ fixed_latency(2); -+%} ++ ins_cost(STORE_COST); ++ format %{ "sh zr, $mem\t# short, #@storeimmC0" %} + -+// Pipeline class for compares. -+pipe_class pipe_class_compare() -+%{ -+ single_instruction; -+ fixed_latency(16); -+%} ++ ins_encode %{ ++ __ sh(zr, Address(as_Register($mem$$base), $mem$$disp)); ++ %} + -+// Pipeline class for memory operations. -+pipe_class pipe_class_memory() -+%{ -+ single_instruction; -+ fixed_latency(16); ++ ins_pipe(istore_mem); +%} + -+// Pipeline class for call. -+pipe_class pipe_class_call() ++// Store Integer ++instruct storeI(iRegIorL2I src, memory mem) +%{ -+ single_instruction; -+ fixed_latency(100); -+%} ++ match(Set mem(StoreI mem src)); + -+// Define the class for the Nop node. -+define %{ -+ MachNop = pipe_class_empty; -+%} -+%} -+//----------INSTRUCTIONS------------------------------------------------------- -+// -+// match -- States which machine-independent subtree may be replaced -+// by this instruction. -+// ins_cost -- The estimated cost of this instruction is used by instruction -+// selection to identify a minimum cost tree of machine -+// instructions that matches a tree of machine-independent -+// instructions. -+// format -- A string providing the disassembly for this instruction. -+// The value of an instruction's operand may be inserted -+// by referring to it with a '$' prefix. -+// opcode -- Three instruction opcodes may be provided. These are referred -+// to within an encode class as $primary, $secondary, and $tertiary -+// rrspectively. The primary opcode is commonly used to -+// indicate the type of machine instruction, while secondary -+// and tertiary are often used for prefix options or addressing -+// modes. -+// ins_encode -- A list of encode classes with parameters. The encode class -+// name must have been defined in an 'enc_class' specification -+// in the encode section of the architecture description. ++ ins_cost(STORE_COST); ++ format %{ "sw $src, $mem\t# int, #@storeI" %} + -+// ============================================================================ -+// Memory (Load/Store) Instructions ++ ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); ++ __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} + -+// Load Instructions ++ ins_pipe(istore_reg_mem); ++%} + -+// Load Byte (8 bit signed) -+instruct loadB(iRegINoSp dst, memory mem) ++instruct storeimmI0(immI0 zero, memory mem) +%{ -+ match(Set dst (LoadB mem)); ++ match(Set mem(StoreI mem zero)); + -+ ins_cost(LOAD_COST); -+ format %{ "lb $dst, $mem\t# byte, #@loadB" %} ++ ins_cost(STORE_COST); ++ format %{ "sw zr, $mem\t# int, #@storeimmI0" %} + + ins_encode %{ -+ __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ __ sw(zr, Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(iload_reg_mem); ++ ins_pipe(istore_mem); +%} + -+// Load Byte (8 bit signed) into long -+instruct loadB2L(iRegLNoSp dst, memory mem) ++// Store Long (64 bit signed) ++instruct storeL(iRegL src, memory mem) +%{ -+ match(Set dst (ConvI2L (LoadB mem))); ++ match(Set mem (StoreL mem src)); + -+ ins_cost(LOAD_COST); -+ format %{ "lb $dst, $mem\t# byte, #@loadB2L" %} ++ ins_cost(STORE_COST); ++ format %{ "sd $src, $mem\t# long, #@storeL" %} + + ins_encode %{ -+ __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(iload_reg_mem); ++ ins_pipe(istore_reg_mem); +%} + -+// Load Byte (8 bit unsigned) -+instruct loadUB(iRegINoSp dst, memory mem) ++// Store Long (64 bit signed) ++instruct storeimmL0(immL0 zero, memory mem) +%{ -+ match(Set dst (LoadUB mem)); ++ match(Set mem (StoreL mem zero)); + -+ ins_cost(LOAD_COST); -+ format %{ "lbu $dst, $mem\t# byte, #@loadUB" %} ++ ins_cost(STORE_COST); ++ format %{ "sd zr, $mem\t# long, #@storeimmL0" %} + + ins_encode %{ -+ __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ __ sd(zr, Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(iload_reg_mem); ++ ins_pipe(istore_mem); +%} + -+// Load Byte (8 bit unsigned) into long -+instruct loadUB2L(iRegLNoSp dst, memory mem) ++// Store Pointer ++instruct storeP(iRegP src, memory mem) +%{ -+ match(Set dst (ConvI2L (LoadUB mem))); ++ match(Set mem (StoreP mem src)); + -+ ins_cost(LOAD_COST); -+ format %{ "lbu $dst, $mem\t# byte, #@loadUB2L" %} ++ ins_cost(STORE_COST); ++ format %{ "sd $src, $mem\t# ptr, #@storeP" %} + + ins_encode %{ -+ __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(iload_reg_mem); ++ ins_pipe(istore_reg_mem); +%} + -+// Load Short (16 bit signed) -+instruct loadS(iRegINoSp dst, memory mem) ++// Store Pointer ++instruct storeimmP0(immP0 zero, memory mem) +%{ -+ match(Set dst (LoadS mem)); ++ match(Set mem (StoreP mem zero)); + -+ ins_cost(LOAD_COST); -+ format %{ "lh $dst, $mem\t# short, #@loadS" %} ++ ins_cost(STORE_COST); ++ format %{ "sd zr, $mem\t# ptr, #@storeimmP0" %} + + ins_encode %{ -+ __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ __ sd(zr, Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(iload_reg_mem); ++ ins_pipe(istore_mem); +%} + -+// Load Short (16 bit signed) into long -+instruct loadS2L(iRegLNoSp dst, memory mem) ++// Store Compressed Pointer ++instruct storeN(iRegN src, memory mem) +%{ -+ match(Set dst (ConvI2L (LoadS mem))); ++ match(Set mem (StoreN mem src)); + -+ ins_cost(LOAD_COST); -+ format %{ "lh $dst, $mem\t# short, #@loadS2L" %} ++ ins_cost(STORE_COST); ++ format %{ "sw $src, $mem\t# compressed ptr, #@storeN" %} + + ins_encode %{ -+ __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(iload_reg_mem); ++ ins_pipe(istore_reg_mem); +%} + -+// Load Char (16 bit unsigned) -+instruct loadUS(iRegINoSp dst, memory mem) ++instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem) +%{ -+ match(Set dst (LoadUS mem)); ++ match(Set mem (StoreN mem zero)); ++ predicate(Universe::narrow_oop_base() == NULL && ++ Universe::narrow_klass_base() == NULL); + -+ ins_cost(LOAD_COST); -+ format %{ "lhu $dst, $mem\t# short, #@loadUS" %} ++ ins_cost(STORE_COST); ++ format %{ "sw rheapbase, $mem\t# compressed ptr (rheapbase==0), #@storeImmN0" %} + + ins_encode %{ -+ __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ __ sw(as_Register($heapbase$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(iload_reg_mem); ++ ins_pipe(istore_reg_mem); +%} + -+// Load Short/Char (16 bit unsigned) into long -+instruct loadUS2L(iRegLNoSp dst, memory mem) ++// Store Float ++instruct storeF(fRegF src, memory mem) +%{ -+ match(Set dst (ConvI2L (LoadUS mem))); ++ match(Set mem (StoreF mem src)); + -+ ins_cost(LOAD_COST); -+ format %{ "lhu $dst, $mem\t# short, #@loadUS2L" %} ++ ins_cost(STORE_COST); ++ format %{ "fsw $src, $mem\t# float, #@storeF" %} + + ins_encode %{ -+ __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ __ fsw(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(iload_reg_mem); ++ ins_pipe(fp_store_reg_s); +%} + -+// Load Integer (32 bit signed) -+instruct loadI(iRegINoSp dst, memory mem) ++// Store Double ++instruct storeD(fRegD src, memory mem) +%{ -+ match(Set dst (LoadI mem)); ++ match(Set mem (StoreD mem src)); + -+ ins_cost(LOAD_COST); -+ format %{ "lw $dst, $mem\t# int, #@loadI" %} ++ ins_cost(STORE_COST); ++ format %{ "fsd $src, $mem\t# double, #@storeD" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); -+ __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ __ fsd(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(iload_reg_mem); ++ ins_pipe(fp_store_reg_d); +%} + -+// Load Integer (32 bit signed) into long -+instruct loadI2L(iRegLNoSp dst, memory mem) ++// Store Compressed Klass Pointer ++instruct storeNKlass(iRegN src, memory mem) +%{ -+ match(Set dst (ConvI2L (LoadI mem))); ++ match(Set mem (StoreNKlass mem src)); + -+ ins_cost(LOAD_COST); -+ format %{ "lw $dst, $mem\t# int, #@loadI2L" %} ++ ins_cost(STORE_COST); ++ format %{ "sw $src, $mem\t# compressed klass ptr, #@storeNKlass" %} + + ins_encode %{ + Assembler::CompressibleRegion cr(&_masm); -+ __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(iload_reg_mem); ++ ins_pipe(istore_reg_mem); +%} + -+// Load Integer (32 bit unsigned) into long -+instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask) ++// ============================================================================ ++// Atomic operation instructions ++// ++// Intel and SPARC both implement Ideal Node LoadPLocked and ++// Store{PIL}Conditional instructions using a normal load for the ++// LoadPLocked and a CAS for the Store{PIL}Conditional. ++// ++// The ideal code appears only to use LoadPLocked/storePConditional as a ++// pair to lock object allocations from Eden space when not using ++// TLABs. ++// ++// There does not appear to be a Load{IL}Locked Ideal Node and the ++// Ideal code appears to use Store{IL}Conditional as an alias for CAS ++// and to use StoreIConditional only for 32-bit and StoreLConditional ++// only for 64-bit. ++// ++// We implement LoadPLocked and storePConditional instructions using, ++// respectively the RISCV hw load-reserve and store-conditional ++// instructions. Whereas we must implement each of ++// Store{IL}Conditional using a CAS which employs a pair of ++// instructions comprising a load-reserve followed by a ++// store-conditional. ++ ++ ++// Locked-load (load reserved) of the current heap-top ++// used when updating the eden heap top ++// implemented using lr_d on RISCV64 ++instruct loadPLocked(iRegPNoSp dst, indirect mem) +%{ -+ match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); ++ match(Set dst (LoadPLocked mem)); + -+ ins_cost(LOAD_COST); -+ format %{ "lwu $dst, $mem\t# int, #@loadUI2L" %} ++ ins_cost(ALU_COST * 2 + LOAD_COST); ++ ++ format %{ "lr.d $dst, $mem\t# ptr load reserved, #@loadPLocked" %} + + ins_encode %{ -+ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ __ la(t0, Address(as_Register($mem$$base), $mem$$disp)); ++ __ lr_d($dst$$Register, t0, Assembler::aq); + %} + -+ ins_pipe(iload_reg_mem); ++ ins_pipe(pipe_serial); +%} + -+// Load Long (64 bit signed) -+instruct loadL(iRegLNoSp dst, memory mem) ++// Conditional-store of the updated heap-top. ++// Used during allocation of the shared heap. ++// implemented using sc_d on RISCV64. ++instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr) +%{ -+ match(Set dst (LoadL mem)); ++ match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); + -+ ins_cost(LOAD_COST); -+ format %{ "ld $dst, $mem\t# int, #@loadL" %} ++ ins_cost(ALU_COST * 2 + STORE_COST); ++ ++ format %{ ++ "sc_d t1, $newval $heap_top_ptr,\t# ptr store conditional, #@storePConditional" ++ %} + + ins_encode %{ -+ Assembler::CompressibleRegion cr(&_masm); -+ __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ __ la(t0, Address(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp)); ++ __ sc_d($cr$$Register, $newval$$Register, t0, Assembler::rl); + %} + -+ ins_pipe(iload_reg_mem); ++ ins_pipe(pipe_serial); +%} + -+// Load Range -+instruct loadRange(iRegINoSp dst, memory mem) ++// storeLConditional is used by PhaseMacroExpand::expand_lock_node ++// when attempting to rebias a lock towards the current thread. We ++// must use the acquire form of cmpxchg in order to guarantee acquire ++// semantics in this case. ++instruct storeLConditional(indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) +%{ -+ match(Set dst (LoadRange mem)); ++ match(Set cr (StoreLConditional mem (Binary oldval newval))); + -+ ins_cost(LOAD_COST); -+ format %{ "lwu $dst, $mem\t# range, #@loadRange" %} ++ ins_cost(LOAD_COST + STORE_COST + 2 * BRANCH_COST); ++ ++ format %{ ++ "cmpxchg t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval" ++ "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeLConditional" ++ %} + + ins_encode %{ -+ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register); ++ __ xorr($cr$$Register,$cr$$Register, $oldval$$Register); + %} + -+ ins_pipe(iload_reg_mem); ++ ins_pipe(pipe_slow); +%} + -+// Load Pointer -+instruct loadP(iRegPNoSp dst, memory mem) ++// storeIConditional also has acquire semantics, for no better reason ++// than matching storeLConditional. ++instruct storeIConditional(indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) +%{ -+ match(Set dst (LoadP mem)); -+ predicate(n->as_Load()->barrier_data() == 0); ++ match(Set cr (StoreIConditional mem (Binary oldval newval))); + -+ ins_cost(LOAD_COST); -+ format %{ "ld $dst, $mem\t# ptr, #@loadP" %} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2); ++ ++ format %{ ++ "cmpxchgw t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval" ++ "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeIConditional" ++ %} + + ins_encode %{ -+ Assembler::CompressibleRegion cr(&_masm); -+ __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register); ++ __ xorr($cr$$Register,$cr$$Register, $oldval$$Register); + %} + -+ ins_pipe(iload_reg_mem); ++ ins_pipe(pipe_slow); +%} + -+// Load Compressed Pointer -+instruct loadN(iRegNNoSp dst, memory mem) ++// standard CompareAndSwapX when we are using barriers ++// these have higher priority than the rules selected by a predicate ++instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +%{ -+ match(Set dst (LoadN mem)); ++ match(Set res (CompareAndSwapB mem (Binary oldval newval))); + -+ ins_cost(LOAD_COST); -+ format %{ "lwu $dst, $mem\t# loadN, compressed ptr, #@loadN" %} ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4); ++ ++ effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); ++ ++ format %{ ++ "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapB" ++ %} + + ins_encode %{ -+ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, ++ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + -+ ins_pipe(iload_reg_mem); ++ ins_pipe(pipe_slow); +%} + -+// Load Klass Pointer -+instruct loadKlass(iRegPNoSp dst, memory mem) ++instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +%{ -+ match(Set dst (LoadKlass mem)); ++ match(Set res (CompareAndSwapS mem (Binary oldval newval))); + -+ ins_cost(LOAD_COST); -+ format %{ "ld $dst, $mem\t# class, #@loadKlass" %} ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4); ++ ++ effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); ++ ++ format %{ ++ "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapS" ++ %} + + ins_encode %{ -+ Assembler::CompressibleRegion cr(&_masm); -+ __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, ++ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + -+ ins_pipe(iload_reg_mem); ++ ins_pipe(pipe_slow); +%} + -+// Load Narrow Klass Pointer -+instruct loadNKlass(iRegNNoSp dst, memory mem) ++instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) +%{ -+ match(Set dst (LoadNKlass mem)); ++ match(Set res (CompareAndSwapI mem (Binary oldval newval))); + -+ ins_cost(LOAD_COST); -+ format %{ "lwu $dst, $mem\t# loadNKlass, compressed class ptr, #@loadNKlass" %} ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); + -+ ins_encode %{ -+ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ format %{ ++ "cmpxchg $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapI" + %} + -+ ins_pipe(iload_reg_mem); ++ ins_encode(riscv_enc_cmpxchgw(res, mem, oldval, newval)); ++ ++ ins_pipe(pipe_slow); +%} + -+// Load Float -+instruct loadF(fRegF dst, memory mem) ++instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval) +%{ -+ match(Set dst (LoadF mem)); ++ match(Set res (CompareAndSwapL mem (Binary oldval newval))); + -+ ins_cost(LOAD_COST); -+ format %{ "flw $dst, $mem\t# float, #@loadF" %} ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); + -+ ins_encode %{ -+ __ flw(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ format %{ ++ "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapL" + %} + -+ ins_pipe(fp_load_mem_s); ++ ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval)); ++ ++ ins_pipe(pipe_slow); +%} + -+// Load Double -+instruct loadD(fRegD dst, memory mem) ++instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) +%{ -+ match(Set dst (LoadD mem)); ++ match(Set res (CompareAndSwapP mem (Binary oldval newval))); + -+ ins_cost(LOAD_COST); -+ format %{ "fld $dst, $mem\t# double, #@loadD" %} ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); + -+ ins_encode %{ -+ Assembler::CompressibleRegion cr(&_masm); -+ __ fld(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ format %{ ++ "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapP" + %} + -+ ins_pipe(fp_load_mem_d); ++ ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval)); ++ ++ ins_pipe(pipe_slow); +%} + -+// Load Int Constant -+instruct loadConI(iRegINoSp dst, immI src) ++instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) +%{ -+ match(Set dst src); ++ match(Set res (CompareAndSwapN mem (Binary oldval newval))); + -+ ins_cost(ALU_COST); -+ format %{ "li $dst, $src\t# int, #@loadConI" %} ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4); + -+ ins_encode(riscv_enc_li_imm(dst, src)); ++ format %{ ++ "cmpxchg $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapN" ++ %} + -+ ins_pipe(ialu_imm); ++ ins_encode(riscv_enc_cmpxchgn(res, mem, oldval, newval)); ++ ++ ins_pipe(pipe_slow); +%} + -+// Load Long Constant -+instruct loadConL(iRegLNoSp dst, immL src) ++// alternative CompareAndSwapX when we are eliding barriers ++instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +%{ -+ match(Set dst src); -+ -+ ins_cost(ALU_COST); -+ format %{ "li $dst, $src\t# long, #@loadConL" %} ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_encode(riscv_enc_li_imm(dst, src)); ++ match(Set res (CompareAndSwapB mem (Binary oldval newval))); + -+ ins_pipe(ialu_imm); -+%} ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4); + -+// Load Pointer Constant -+instruct loadConP(iRegPNoSp dst, immP con) -+%{ -+ match(Set dst con); ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + -+ ins_cost(ALU_COST); -+ format %{ "mv $dst, $con\t# ptr, #@loadConP" %} ++ format %{ ++ "cmpxchg_acq $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapBAcq" ++ %} + -+ ins_encode(riscv_enc_mov_p(dst, con)); ++ ins_encode %{ ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, ++ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ %} + -+ ins_pipe(ialu_imm); ++ ins_pipe(pipe_slow); +%} + -+// Load Null Pointer Constant -+instruct loadConP0(iRegPNoSp dst, immP0 con) ++instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +%{ -+ match(Set dst con); -+ -+ ins_cost(ALU_COST); -+ format %{ "mv $dst, $con\t# NULL ptr, #@loadConP0" %} ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_encode(riscv_enc_mov_zero(dst)); ++ match(Set res (CompareAndSwapS mem (Binary oldval newval))); + -+ ins_pipe(ialu_imm); -+%} ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4); + -+// Load Pointer Constant One -+instruct loadConP1(iRegPNoSp dst, immP_1 con) -+%{ -+ match(Set dst con); ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + -+ ins_cost(ALU_COST); -+ format %{ "mv $dst, $con\t# load ptr constant one, #@loadConP1" %} ++ format %{ ++ "cmpxchg_acq $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapSAcq" ++ %} + -+ ins_encode(riscv_enc_mov_p1(dst)); ++ ins_encode %{ ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, ++ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ %} + -+ ins_pipe(ialu_imm); ++ ins_pipe(pipe_slow); +%} + -+// Load Byte Map Base Constant -+instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con) ++instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) +%{ -+ match(Set dst con); -+ ins_cost(ALU_COST); -+ format %{ "mv $dst, $con\t# Byte Map Base, #@loadByteMapBase" %} -+ -+ ins_encode(riscv_enc_mov_byte_map_base(dst)); ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_pipe(ialu_imm); -+%} ++ match(Set res (CompareAndSwapI mem (Binary oldval newval))); + -+// Load Narrow Pointer Constant -+instruct loadConN(iRegNNoSp dst, immN con) -+%{ -+ match(Set dst con); ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); + -+ ins_cost(ALU_COST * 4); -+ format %{ "mv $dst, $con\t# compressed ptr, #@loadConN" %} ++ format %{ ++ "cmpxchg_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapIAcq" ++ %} + -+ ins_encode(riscv_enc_mov_n(dst, con)); ++ ins_encode(riscv_enc_cmpxchgw_acq(res, mem, oldval, newval)); + -+ ins_pipe(ialu_imm); ++ ins_pipe(pipe_slow); +%} + -+// Load Narrow Null Pointer Constant -+instruct loadConN0(iRegNNoSp dst, immN0 con) ++instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval) +%{ -+ match(Set dst con); ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(ALU_COST); -+ format %{ "mv $dst, $con\t# compressed NULL ptr, #@loadConN0" %} ++ match(Set res (CompareAndSwapL mem (Binary oldval newval))); + -+ ins_encode(riscv_enc_mov_zero(dst)); ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); + -+ ins_pipe(ialu_imm); -+%} ++ format %{ ++ "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapLAcq" ++ %} + -+// Load Narrow Klass Constant -+instruct loadConNKlass(iRegNNoSp dst, immNKlass con) -+%{ -+ match(Set dst con); ++ ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval)); + -+ ins_cost(ALU_COST * 6); -+ format %{ "mv $dst, $con\t# compressed klass ptr, #@loadConNKlass" %} ++ ins_pipe(pipe_slow); ++%} + -+ ins_encode(riscv_enc_mov_nk(dst, con)); ++instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_pipe(ialu_imm); -+%} ++ match(Set res (CompareAndSwapP mem (Binary oldval newval))); + -+// Load Float Constant -+instruct loadConF(fRegF dst, immF con) %{ -+ match(Set dst con); ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); + -+ ins_cost(LOAD_COST); + format %{ -+ "flw $dst, [$constantaddress]\t# load from constant table: float=$con, #@loadConF" ++ "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapPAcq" + %} + -+ ins_encode %{ -+ __ flw(as_FloatRegister($dst$$reg), $constantaddress($con)); -+ %} ++ ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval)); + -+ ins_pipe(fp_load_constant_s); ++ ins_pipe(pipe_slow); +%} + -+instruct loadConF0(fRegF dst, immF0 con) %{ -+ match(Set dst con); ++instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(XFER_COST); ++ match(Set res (CompareAndSwapN mem (Binary oldval newval))); + -+ format %{ "fmv.w.x $dst, zr\t# float, #@loadConF0" %} ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4); + -+ ins_encode %{ -+ __ fmv_w_x(as_FloatRegister($dst$$reg), zr); ++ format %{ ++ "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapNAcq" + %} + -+ ins_pipe(fp_load_constant_s); ++ ins_encode(riscv_enc_cmpxchgn_acq(res, mem, oldval, newval)); ++ ++ ins_pipe(pipe_slow); +%} + -+// Load Double Constant -+instruct loadConD(fRegD dst, immD con) %{ -+ match(Set dst con); ++// Sundry CAS operations. Note that release is always true, ++// regardless of the memory ordering of the CAS. This is because we ++// need the volatile case to be sequentially consistent but there is ++// no trailing StoreLoad barrier emitted by C2. Unfortunately we ++// can't check the type of memory ordering here, so we always emit a ++// sc_d(w) with rl bit set. ++instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) ++%{ ++ match(Set res (CompareAndExchangeB mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5); ++ ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + -+ ins_cost(LOAD_COST); + format %{ -+ "fld $dst, [$constantaddress]\t# load from constant table: double=$con, #@loadConD" ++ "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeB" + %} + + ins_encode %{ -+ __ fld(as_FloatRegister($dst$$reg), $constantaddress($con)); ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + -+ ins_pipe(fp_load_constant_d); ++ ins_pipe(pipe_slow); +%} + -+instruct loadConD0(fRegD dst, immD0 con) %{ -+ match(Set dst con); ++instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) ++%{ ++ match(Set res (CompareAndExchangeS mem (Binary oldval newval))); + -+ ins_cost(XFER_COST); ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6); + -+ format %{ "fmv.d.x $dst, zr\t# double, #@loadConD0" %} ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + -+ ins_encode %{ -+ __ fmv_d_x(as_FloatRegister($dst$$reg), zr); ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeS" + %} + -+ ins_pipe(fp_load_constant_d); -+%} -+ -+// Store Instructions -+// Store CMS card-mark Immediate -+instruct storeimmCM0(immI0 zero, memory mem) -+%{ -+ match(Set mem (StoreCM mem zero)); -+ -+ ins_cost(STORE_COST); -+ format %{ "storestore (elided)\n\t" -+ "sb zr, $mem\t# byte, #@storeimmCM0" %} -+ + ins_encode %{ -+ __ sb(zr, Address(as_Register($mem$$base), $mem$$disp)); ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + -+ ins_pipe(istore_mem); ++ ins_pipe(pipe_slow); +%} + -+// Store CMS card-mark Immediate with intervening StoreStore -+// needed when using CMS with no conditional card marking -+instruct storeimmCM0_ordered(immI0 zero, memory mem) ++instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) +%{ -+ match(Set mem (StoreCM mem zero)); -+ -+ ins_cost(ALU_COST + STORE_COST); -+ format %{ "membar(StoreStore)\n\t" -+ "sb zr, $mem\t# byte, #@storeimmCM0_ordered" %} -+ -+ ins_encode %{ -+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); -+ __ sb(zr, Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ match(Set res (CompareAndExchangeI mem (Binary oldval newval))); + -+ ins_pipe(istore_mem); -+%} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); + -+// Store Byte -+instruct storeB(iRegIorL2I src, memory mem) -+%{ -+ match(Set mem (StoreB mem src)); ++ effect(TEMP_DEF res); + -+ ins_cost(STORE_COST); -+ format %{ "sb $src, $mem\t# byte, #@storeB" %} ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeI" ++ %} + + ins_encode %{ -+ __ sb(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + %} + -+ ins_pipe(istore_reg_mem); ++ ins_pipe(pipe_slow); +%} + -+instruct storeimmB0(immI0 zero, memory mem) ++instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval) +%{ -+ match(Set mem (StoreB mem zero)); -+ -+ ins_cost(STORE_COST); -+ format %{ "sb zr, $mem\t# byte, #@storeimmB0" %} -+ -+ ins_encode %{ -+ __ sb(zr, Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ match(Set res (CompareAndExchangeL mem (Binary oldval newval))); + -+ ins_pipe(istore_mem); -+%} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); + -+// Store Char/Short -+instruct storeC(iRegIorL2I src, memory mem) -+%{ -+ match(Set mem (StoreC mem src)); ++ effect(TEMP_DEF res); + -+ ins_cost(STORE_COST); -+ format %{ "sh $src, $mem\t# short, #@storeC" %} ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeL" ++ %} + + ins_encode %{ -+ __ sh(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + %} + -+ ins_pipe(istore_reg_mem); ++ ins_pipe(pipe_slow); +%} + -+instruct storeimmC0(immI0 zero, memory mem) ++instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval) +%{ -+ match(Set mem (StoreC mem zero)); -+ -+ ins_cost(STORE_COST); -+ format %{ "sh zr, $mem\t# short, #@storeimmC0" %} -+ -+ ins_encode %{ -+ __ sh(zr, Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ match(Set res (CompareAndExchangeN mem (Binary oldval newval))); + -+ ins_pipe(istore_mem); -+%} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 3); + -+// Store Integer -+instruct storeI(iRegIorL2I src, memory mem) -+%{ -+ match(Set mem(StoreI mem src)); ++ effect(TEMP_DEF res); + -+ ins_cost(STORE_COST); -+ format %{ "sw $src, $mem\t# int, #@storeI" %} ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN" ++ %} + + ins_encode %{ -+ Assembler::CompressibleRegion cr(&_masm); -+ __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + %} + -+ ins_pipe(istore_reg_mem); ++ ins_pipe(pipe_slow); +%} + -+instruct storeimmI0(immI0 zero, memory mem) ++instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) +%{ -+ match(Set mem(StoreI mem zero)); -+ -+ ins_cost(STORE_COST); -+ format %{ "sw zr, $mem\t# int, #@storeimmI0" %} -+ -+ ins_encode %{ -+ __ sw(zr, Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + -+ ins_pipe(istore_mem); -+%} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); + -+// Store Long (64 bit signed) -+instruct storeL(iRegL src, memory mem) -+%{ -+ match(Set mem (StoreL mem src)); ++ effect(TEMP_DEF res); + -+ ins_cost(STORE_COST); -+ format %{ "sd $src, $mem\t# long, #@storeL" %} ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeP" ++ %} + + ins_encode %{ -+ Assembler::CompressibleRegion cr(&_masm); -+ __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + %} + -+ ins_pipe(istore_reg_mem); ++ ins_pipe(pipe_slow); +%} + -+// Store Long (64 bit signed) -+instruct storeimmL0(immL0 zero, memory mem) ++instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +%{ -+ match(Set mem (StoreL mem zero)); -+ -+ ins_cost(STORE_COST); -+ format %{ "sd zr, $mem\t# long, #@storeimmL0" %} ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_encode %{ -+ __ sd(zr, Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ match(Set res (CompareAndExchangeB mem (Binary oldval newval))); + -+ ins_pipe(istore_mem); -+%} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5); + -+// Store Pointer -+instruct storeP(iRegP src, memory mem) -+%{ -+ match(Set mem (StoreP mem src)); ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + -+ ins_cost(STORE_COST); -+ format %{ "sd $src, $mem\t# ptr, #@storeP" %} ++ format %{ ++ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeBAcq" ++ %} + + ins_encode %{ -+ Assembler::CompressibleRegion cr(&_masm); -+ __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + -+ ins_pipe(istore_reg_mem); ++ ins_pipe(pipe_slow); +%} + -+// Store Pointer -+instruct storeimmP0(immP0 zero, memory mem) ++instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +%{ -+ match(Set mem (StoreP mem zero)); -+ -+ ins_cost(STORE_COST); -+ format %{ "sd zr, $mem\t# ptr, #@storeimmP0" %} ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_encode %{ -+ __ sd(zr, Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ match(Set res (CompareAndExchangeS mem (Binary oldval newval))); + -+ ins_pipe(istore_mem); -+%} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6); + -+// Store Compressed Pointer -+instruct storeN(iRegN src, memory mem) -+%{ -+ match(Set mem (StoreN mem src)); ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + -+ ins_cost(STORE_COST); -+ format %{ "sw $src, $mem\t# compressed ptr, #@storeN" %} ++ format %{ ++ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeSAcq" ++ %} + + ins_encode %{ -+ Assembler::CompressibleRegion cr(&_masm); -+ __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + -+ ins_pipe(istore_reg_mem); ++ ins_pipe(pipe_slow); +%} + -+instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem) ++instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) +%{ -+ match(Set mem (StoreN mem zero)); -+ -+ ins_cost(STORE_COST); -+ format %{ "sw rheapbase, $mem\t# compressed ptr (rheapbase==0), #@storeImmN0" %} ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_encode %{ -+ __ sw(as_Register($heapbase$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ match(Set res (CompareAndExchangeI mem (Binary oldval newval))); + -+ ins_pipe(istore_reg_mem); -+%} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); + -+// Store Float -+instruct storeF(fRegF src, memory mem) -+%{ -+ match(Set mem (StoreF mem src)); ++ effect(TEMP_DEF res); + -+ ins_cost(STORE_COST); -+ format %{ "fsw $src, $mem\t# float, #@storeF" %} ++ format %{ ++ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeIAcq" ++ %} + + ins_encode %{ -+ __ fsw(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + %} + -+ ins_pipe(fp_store_reg_s); ++ ins_pipe(pipe_slow); +%} + -+// Store Double -+instruct storeD(fRegD src, memory mem) ++instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval) +%{ -+ match(Set mem (StoreD mem src)); -+ -+ ins_cost(STORE_COST); -+ format %{ "fsd $src, $mem\t# double, #@storeD" %} ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_encode %{ -+ Assembler::CompressibleRegion cr(&_masm); -+ __ fsd(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ match(Set res (CompareAndExchangeL mem (Binary oldval newval))); + -+ ins_pipe(fp_store_reg_d); -+%} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); + -+// Store Compressed Klass Pointer -+instruct storeNKlass(iRegN src, memory mem) -+%{ -+ match(Set mem (StoreNKlass mem src)); ++ effect(TEMP_DEF res); + -+ ins_cost(STORE_COST); -+ format %{ "sw $src, $mem\t# compressed klass ptr, #@storeNKlass" %} ++ format %{ ++ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeLAcq" ++ %} + + ins_encode %{ -+ Assembler::CompressibleRegion cr(&_masm); -+ __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + %} + -+ ins_pipe(istore_reg_mem); ++ ins_pipe(pipe_slow); +%} + -+// ============================================================================ -+// Atomic operation instructions -+// -+// Intel and SPARC both implement Ideal Node LoadPLocked and -+// Store{PIL}Conditional instructions using a normal load for the -+// LoadPLocked and a CAS for the Store{PIL}Conditional. -+// -+// The ideal code appears only to use LoadPLocked/storePConditional as a -+// pair to lock object allocations from Eden space when not using -+// TLABs. -+// -+// There does not appear to be a Load{IL}Locked Ideal Node and the -+// Ideal code appears to use Store{IL}Conditional as an alias for CAS -+// and to use StoreIConditional only for 32-bit and StoreLConditional -+// only for 64-bit. -+// -+// We implement LoadPLocked and storePConditional instructions using, -+// respectively the RISCV hw load-reserve and store-conditional -+// instructions. Whereas we must implement each of -+// Store{IL}Conditional using a CAS which employs a pair of -+// instructions comprising a load-reserve followed by a -+// store-conditional. -+ -+ -+// Locked-load (load reserved) of the current heap-top -+// used when updating the eden heap top -+// implemented using lr_d on RISCV64 -+instruct loadPLocked(iRegPNoSp dst, indirect mem) ++instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval) +%{ -+ match(Set dst (LoadPLocked mem)); -+ -+ ins_cost(ALU_COST * 2 + LOAD_COST); -+ -+ format %{ "lr.d $dst, $mem\t# ptr load reserved, #@loadPLocked" %} -+ -+ ins_encode %{ -+ __ la(t0, Address(as_Register($mem$$base), $mem$$disp)); -+ __ lr_d($dst$$Register, t0, Assembler::aq); -+ %} ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_pipe(pipe_serial); -+%} ++ match(Set res (CompareAndExchangeN mem (Binary oldval newval))); + -+// Conditional-store of the updated heap-top. -+// Used during allocation of the shared heap. -+// implemented using sc_d on RISCV64. -+instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr) -+%{ -+ match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); + -+ ins_cost(ALU_COST * 2 + STORE_COST); ++ effect(TEMP_DEF res); + + format %{ -+ "sc_d t1, $newval $heap_top_ptr,\t# ptr store conditional, #@storePConditional" ++ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq" + %} + + ins_encode %{ -+ __ la(t0, Address(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp)); -+ __ sc_d($cr$$Register, $newval$$Register, t0, Assembler::rl); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(pipe_slow); +%} + -+instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) ++instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) +%{ -+ match(Set cr (StoreLConditional mem (Binary oldval newval))); ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(LOAD_COST + STORE_COST + 2 * BRANCH_COST); ++ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++ ++ effect(TEMP_DEF res); + + format %{ -+ "cmpxchg t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval" -+ "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeLConditional" ++ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq" + %} + + ins_encode %{ + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register); -+ __ xorr($cr$$Register,$cr$$Register, $oldval$$Register); ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + -+// storeIConditional also has acquire semantics, for no better reason -+// than matching storeLConditional. -+instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) ++instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +%{ -+ match(Set cr (StoreIConditional mem (Binary oldval newval))); ++ match(Set res (WeakCompareAndSwapB mem (Binary oldval newval))); + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2); ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6); ++ ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + + format %{ -+ "cmpxchgw t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval" -+ "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeIConditional" ++ "cmpxchg_weak $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapB" + %} + + ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register); -+ __ xorr($cr$$Register,$cr$$Register, $oldval$$Register); ++ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); +%} + -+// standard CompareAndSwapX when we are using barriers -+// these have higher priority than the rules selected by a predicate -+instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +%{ -+ match(Set res (CompareAndSwapB mem (Binary oldval newval))); ++ match(Set res (WeakCompareAndSwapS mem (Binary oldval newval))); + -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4); ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7); + -+ effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + + format %{ -+ "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapB" ++ "cmpxchg_weak $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapS" + %} + + ins_encode %{ -+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, -+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, -+ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); +%} + -+instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) +%{ -+ match(Set res (CompareAndSwapS mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4); ++ match(Set res (WeakCompareAndSwapI mem (Binary oldval newval))); + -+ effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); + + format %{ -+ "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapS" ++ "cmpxchg_weak $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapI" + %} + + ins_encode %{ -+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, -+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, -+ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); +%} + -+instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval) ++instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval) +%{ -+ match(Set res (CompareAndSwapI mem (Binary oldval newval))); ++ match(Set res (WeakCompareAndSwapL mem (Binary oldval newval))); + -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); + + format %{ -+ "cmpxchg $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapI" ++ "cmpxchg_weak $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapL" + %} + -+ ins_encode(riscv_enc_cmpxchgw(res, mem, oldval, newval)); ++ ins_encode %{ ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); ++ %} + + ins_pipe(pipe_slow); +%} + -+instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval) ++instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) +%{ -+ match(Set res (CompareAndSwapL mem (Binary oldval newval))); ++ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); + -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4); + + format %{ -+ "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapL" ++ "cmpxchg_weak $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapN" + %} + -+ ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval)); ++ ins_encode %{ ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); ++ %} + + ins_pipe(pipe_slow); +%} + -+instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) ++instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) +%{ -+ predicate(n->as_LoadStore()->barrier_data() == 0); -+ -+ match(Set res (CompareAndSwapP mem (Binary oldval newval))); ++ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); + + format %{ -+ "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapP" ++ "cmpxchg_weak $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapP" + %} + -+ ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval)); ++ ins_encode %{ ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); ++ %} + + ins_pipe(pipe_slow); +%} + -+instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval) ++instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +%{ -+ match(Set res (CompareAndSwapN mem (Binary oldval newval))); ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4); ++ match(Set res (WeakCompareAndSwapB mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6); ++ ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + + format %{ -+ "cmpxchg $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapN" ++ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapBAcq" + %} + -+ ins_encode(riscv_enc_cmpxchgn(res, mem, oldval, newval)); ++ ins_encode %{ ++ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ xori($res$$Register, $res$$Register, 1); ++ %} + + ins_pipe(pipe_slow); +%} + -+// alternative CompareAndSwapX when we are eliding barriers -+instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +%{ + predicate(needs_acquiring_load_reserved(n)); + -+ match(Set res (CompareAndSwapB mem (Binary oldval newval))); ++ match(Set res (WeakCompareAndSwapS mem (Binary oldval newval))); + -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4); ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7); + + effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + + format %{ -+ "cmpxchg_acq $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapBAcq" ++ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapSAcq" + %} + + ins_encode %{ -+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, -+ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, -+ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); +%} + -+instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) +%{ + predicate(needs_acquiring_load_reserved(n)); + -+ match(Set res (CompareAndSwapS mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4); ++ match(Set res (WeakCompareAndSwapI mem (Binary oldval newval))); + -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); + + format %{ -+ "cmpxchg_acq $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapSAcq" ++ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapIAcq" + %} + + ins_encode %{ -+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, -+ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, -+ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); +%} + -+instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval) ++instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval) +%{ + predicate(needs_acquiring_load_reserved(n)); + -+ match(Set res (CompareAndSwapI mem (Binary oldval newval))); ++ match(Set res (WeakCompareAndSwapL mem (Binary oldval newval))); + -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); + + format %{ -+ "cmpxchg_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapIAcq" ++ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapLAcq" + %} + -+ ins_encode(riscv_enc_cmpxchgw_acq(res, mem, oldval, newval)); ++ ins_encode %{ ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); ++ %} + + ins_pipe(pipe_slow); +%} + -+instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval) ++instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) +%{ + predicate(needs_acquiring_load_reserved(n)); + -+ match(Set res (CompareAndSwapL mem (Binary oldval newval))); ++ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); + -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4); + + format %{ -+ "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapLAcq" ++ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapNAcq" + %} + -+ ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval)); -+ -+ ins_pipe(pipe_slow); -+%} -+ -+instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) -+%{ -+ predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0)); -+ -+ match(Set res (CompareAndSwapP mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); -+ -+ format %{ -+ "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapPAcq" ++ ins_encode %{ ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + -+ ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval)); -+ + ins_pipe(pipe_slow); +%} + -+instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval) ++instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) +%{ + predicate(needs_acquiring_load_reserved(n)); + -+ match(Set res (CompareAndSwapN mem (Binary oldval newval))); ++ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4); ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); + + format %{ -+ "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapNAcq" ++ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapPAcq" + %} + -+ ins_encode(riscv_enc_cmpxchgn_acq(res, mem, oldval, newval)); ++ ins_encode %{ ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); ++ %} + + ins_pipe(pipe_slow); +%} + -+// Sundry CAS operations. Note that release is always true, -+// regardless of the memory ordering of the CAS. This is because we -+// need the volatile case to be sequentially consistent but there is -+// no trailing StoreLoad barrier emitted by C2. Unfortunately we -+// can't check the type of memory ordering here, so we always emit a -+// sc_d(w) with rl bit set. -+instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) +%{ -+ match(Set res (CompareAndExchangeB mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5); ++ match(Set prev (GetAndSetI mem newv)); + -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ins_cost(ALU_COST); + -+ format %{ -+ "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeB" -+ %} ++ format %{ "atomic_xchgw $prev, $newv, [$mem]\t#@get_and_setI" %} + + ins_encode %{ -+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, -+ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); +%} + -+instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) +%{ -+ match(Set res (CompareAndExchangeS mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6); ++ match(Set prev (GetAndSetL mem newv)); + -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ins_cost(ALU_COST); + -+ format %{ -+ "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeS" -+ %} ++ format %{ "atomic_xchg $prev, $newv, [$mem]\t#@get_and_setL" %} + + ins_encode %{ -+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, -+ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); +%} + -+instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) +%{ -+ match(Set res (CompareAndExchangeI mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++ match(Set prev (GetAndSetN mem newv)); + -+ effect(TEMP_DEF res); ++ ins_cost(ALU_COST); + -+ format %{ -+ "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeI" -+ %} ++ format %{ "atomic_xchgwu $prev, $newv, [$mem]\t#@get_and_setN" %} + + ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ __ atomic_xchgwu($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); +%} + -+instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval) ++instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) +%{ -+ match(Set res (CompareAndExchangeL mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++ match(Set prev (GetAndSetP mem newv)); + -+ effect(TEMP_DEF res); ++ ins_cost(ALU_COST); + -+ format %{ -+ "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeL" -+ %} ++ format %{ "atomic_xchg $prev, $newv, [$mem]\t#@get_and_setP" %} + + ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); +%} + -+instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval) ++instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev) +%{ -+ match(Set res (CompareAndExchangeN mem (Binary oldval newval))); ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 3); ++ match(Set prev (GetAndSetI mem newv)); + -+ effect(TEMP_DEF res); ++ ins_cost(ALU_COST); + -+ format %{ -+ "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN" -+ %} ++ format %{ "atomic_xchgw_acq $prev, $newv, [$mem]\t#@get_and_setIAcq" %} + + ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); +%} + -+instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) ++instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) +%{ -+ predicate(n->as_LoadStore()->barrier_data() == 0); -+ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++ match(Set prev (GetAndSetL mem newv)); + -+ effect(TEMP_DEF res); ++ ins_cost(ALU_COST); + -+ format %{ -+ "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeP" -+ %} ++ format %{ "atomic_xchg_acq $prev, $newv, [$mem]\t#@get_and_setLAcq" %} + + ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); +%} + -+instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) +%{ + predicate(needs_acquiring_load_reserved(n)); + -+ match(Set res (CompareAndExchangeB mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5); ++ match(Set prev (GetAndSetN mem newv)); + -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ins_cost(ALU_COST); + -+ format %{ -+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeBAcq" -+ %} ++ format %{ "atomic_xchgwu_acq $prev, $newv, [$mem]\t#@get_and_setNAcq" %} + + ins_encode %{ -+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, -+ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ atomic_xchgalwu($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); +%} + -+instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) +%{ + predicate(needs_acquiring_load_reserved(n)); + -+ match(Set res (CompareAndExchangeS mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6); ++ match(Set prev (GetAndSetP mem newv)); + -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ins_cost(ALU_COST); + -+ format %{ -+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeSAcq" -+ %} ++ format %{ "atomic_xchg_acq $prev, $newv, [$mem]\t#@get_and_setPAcq" %} + + ins_encode %{ -+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, -+ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); +%} + -+instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) +%{ -+ predicate(needs_acquiring_load_reserved(n)); -+ -+ match(Set res (CompareAndExchangeI mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++ match(Set newval (GetAndAddL mem incr)); + -+ effect(TEMP_DEF res); ++ ins_cost(ALU_COST); + -+ format %{ -+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeIAcq" -+ %} ++ format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addL" %} + + ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); +%} + -+instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval) ++instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) +%{ -+ predicate(needs_acquiring_load_reserved(n)); -+ -+ match(Set res (CompareAndExchangeL mem (Binary oldval newval))); ++ predicate(n->as_LoadStore()->result_not_used()); + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++ match(Set dummy (GetAndAddL mem incr)); + -+ effect(TEMP_DEF res); ++ ins_cost(ALU_COST); + -+ format %{ -+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeLAcq" -+ %} ++ format %{ "get_and_addL [$mem], $incr\t#@get_and_addL_no_res" %} + + ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); +%} + -+instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval) ++instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAdd incr) +%{ -+ predicate(needs_acquiring_load_reserved(n)); -+ -+ match(Set res (CompareAndExchangeN mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++ match(Set newval (GetAndAddL mem incr)); + -+ effect(TEMP_DEF res); ++ ins_cost(ALU_COST); + -+ format %{ -+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq" -+ %} ++ format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addLi" %} + + ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); +%} + -+instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) ++instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAdd incr) +%{ -+ predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0)); -+ -+ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); ++ predicate(n->as_LoadStore()->result_not_used()); + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++ match(Set dummy (GetAndAddL mem incr)); + -+ effect(TEMP_DEF res); ++ ins_cost(ALU_COST); + -+ format %{ -+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq" -+ %} ++ format %{ "get_and_addL [$mem], $incr\t#@get_and_addLi_no_res" %} + + ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); +%} + -+instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) +%{ -+ match(Set res (WeakCompareAndSwapB mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6); ++ match(Set newval (GetAndAddI mem incr)); + -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ins_cost(ALU_COST); + -+ format %{ -+ "cmpxchg_weak $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapB" -+ %} ++ format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addI" %} + + ins_encode %{ -+ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, -+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); -+ __ xori($res$$Register, $res$$Register, 1); ++ __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); +%} + -+instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) +%{ -+ match(Set res (WeakCompareAndSwapS mem (Binary oldval newval))); ++ predicate(n->as_LoadStore()->result_not_used()); + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7); ++ match(Set dummy (GetAndAddI mem incr)); + -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ins_cost(ALU_COST); + -+ format %{ -+ "cmpxchg_weak $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapS" -+ %} ++ format %{ "get_and_addI [$mem], $incr\t#@get_and_addI_no_res" %} + + ins_encode %{ -+ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, -+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); -+ __ xori($res$$Register, $res$$Register, 1); ++ __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); +%} + -+instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAdd incr) +%{ -+ match(Set res (WeakCompareAndSwapI mem (Binary oldval newval))); ++ match(Set newval (GetAndAddI mem incr)); + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ ins_cost(ALU_COST); + -+ format %{ -+ "cmpxchg_weak $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapI" -+ %} ++ format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addIi" %} + + ins_encode %{ -+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); -+ __ xori($res$$Register, $res$$Register, 1); ++ __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); +%} + -+instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval) ++instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAdd incr) +%{ -+ match(Set res (WeakCompareAndSwapL mem (Binary oldval newval))); ++ predicate(n->as_LoadStore()->result_not_used()); + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ match(Set dummy (GetAndAddI mem incr)); + -+ format %{ -+ "cmpxchg_weak $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapL" -+ %} ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addI [$mem], $incr\t#@get_and_addIi_no_res" %} + + ins_encode %{ -+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); -+ __ xori($res$$Register, $res$$Register, 1); ++ __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); +%} + -+instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) ++instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr) +%{ -+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4); ++ match(Set newval (GetAndAddL mem incr)); + -+ format %{ -+ "cmpxchg_weak $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapN" -+ %} ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLAcq" %} + + ins_encode %{ -+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); -+ __ xori($res$$Register, $res$$Register, 1); ++ __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); +%} + -+instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) -+%{ -+ predicate(n->as_LoadStore()->barrier_data() == 0); -+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); ++instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{ ++ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n)); + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ match(Set dummy (GetAndAddL mem incr)); + -+ format %{ -+ "cmpxchg_weak $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapP" -+ %} ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addL_no_resAcq" %} + + ins_encode %{ -+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); -+ __ xori($res$$Register, $res$$Register, 1); ++ __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); +%} + -+instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAdd incr) +%{ + predicate(needs_acquiring_load_reserved(n)); + -+ match(Set res (WeakCompareAndSwapB mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6); ++ match(Set newval (GetAndAddL mem incr)); + -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ins_cost(ALU_COST); + -+ format %{ -+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapBAcq" -+ %} ++ format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLiAcq" %} + + ins_encode %{ -+ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, -+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); -+ __ xori($res$$Register, $res$$Register, 1); ++ __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); +%} + -+instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAdd incr) +%{ -+ predicate(needs_acquiring_load_reserved(n)); -+ -+ match(Set res (WeakCompareAndSwapS mem (Binary oldval newval))); ++ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n)); + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7); ++ match(Set dummy (GetAndAddL mem incr)); + -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ins_cost(ALU_COST); + -+ format %{ -+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapSAcq" -+ %} ++ format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addLi_no_resAcq" %} + + ins_encode %{ -+ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, -+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); -+ __ xori($res$$Register, $res$$Register, 1); ++ __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); +%} + -+instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr) +%{ + predicate(needs_acquiring_load_reserved(n)); + -+ match(Set res (WeakCompareAndSwapI mem (Binary oldval newval))); ++ match(Set newval (GetAndAddI mem incr)); + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ ins_cost(ALU_COST); + -+ format %{ -+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapIAcq" -+ %} ++ format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIAcq" %} + + ins_encode %{ -+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); -+ __ xori($res$$Register, $res$$Register, 1); ++ __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); +%} + -+instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval) ++instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr) +%{ -+ predicate(needs_acquiring_load_reserved(n)); ++ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n)); + -+ match(Set res (WeakCompareAndSwapL mem (Binary oldval newval))); ++ match(Set dummy (GetAndAddI mem incr)); + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ ins_cost(ALU_COST); + -+ format %{ -+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapLAcq" -+ %} ++ format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addI_no_resAcq" %} + + ins_encode %{ -+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); -+ __ xori($res$$Register, $res$$Register, 1); ++ __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); +%} + -+instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) ++instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAdd incr) +%{ + predicate(needs_acquiring_load_reserved(n)); + -+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); ++ match(Set newval (GetAndAddI mem incr)); + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4); ++ ins_cost(ALU_COST); + -+ format %{ -+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapNAcq" -+ %} ++ format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIiAcq" %} + + ins_encode %{ -+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); -+ __ xori($res$$Register, $res$$Register, 1); ++ __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); +%} + -+instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) ++instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAdd incr) +%{ -+ predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0)); ++ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n)); + -+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); ++ match(Set dummy (GetAndAddI mem incr)); + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ ins_cost(ALU_COST); + -+ format %{ -+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapPAcq" -+ %} ++ format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addIi_no_resAcq" %} + + ins_encode %{ -+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); -+ __ xori($res$$Register, $res$$Register, 1); ++ __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_slow); -+%} -+ -+instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) -+%{ -+ match(Set prev (GetAndSetI mem newv)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "atomic_xchgw $prev, $newv, [$mem]\t#@get_and_setI" %} -+ -+ ins_encode %{ -+ __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); -+%} -+ -+instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) -+%{ -+ match(Set prev (GetAndSetL mem newv)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "atomic_xchg $prev, $newv, [$mem]\t#@get_and_setL" %} -+ -+ ins_encode %{ -+ __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); -+%} -+ -+instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) -+%{ -+ match(Set prev (GetAndSetN mem newv)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "atomic_xchgwu $prev, $newv, [$mem]\t#@get_and_setN" %} -+ -+ ins_encode %{ -+ __ atomic_xchgwu($prev$$Register, $newv$$Register, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); -+%} -+ -+instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) -+%{ -+ predicate(n->as_LoadStore()->barrier_data() == 0); -+ match(Set prev (GetAndSetP mem newv)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "atomic_xchg $prev, $newv, [$mem]\t#@get_and_setP" %} -+ -+ ins_encode %{ -+ __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); -+%} -+ -+instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev) -+%{ -+ predicate(needs_acquiring_load_reserved(n)); -+ -+ match(Set prev (GetAndSetI mem newv)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "atomic_xchgw_acq $prev, $newv, [$mem]\t#@get_and_setIAcq" %} -+ -+ ins_encode %{ -+ __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); -+%} -+ -+instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) -+%{ -+ predicate(needs_acquiring_load_reserved(n)); -+ -+ match(Set prev (GetAndSetL mem newv)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "atomic_xchg_acq $prev, $newv, [$mem]\t#@get_and_setLAcq" %} -+ -+ ins_encode %{ -+ __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); -+%} -+ -+instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) -+%{ -+ predicate(needs_acquiring_load_reserved(n)); -+ -+ match(Set prev (GetAndSetN mem newv)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "atomic_xchgwu_acq $prev, $newv, [$mem]\t#@get_and_setNAcq" %} -+ -+ ins_encode %{ -+ __ atomic_xchgalwu($prev$$Register, $newv$$Register, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); -+%} -+ -+instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) -+%{ -+ predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0)); -+ -+ match(Set prev (GetAndSetP mem newv)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "atomic_xchg_acq $prev, $newv, [$mem]\t#@get_and_setPAcq" %} -+ -+ ins_encode %{ -+ __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); -+%} -+ -+instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) -+%{ -+ match(Set newval (GetAndAddL mem incr)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addL" %} -+ -+ ins_encode %{ -+ __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); -+%} -+ -+instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) -+%{ -+ predicate(n->as_LoadStore()->result_not_used()); -+ -+ match(Set dummy (GetAndAddL mem incr)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "get_and_addL [$mem], $incr\t#@get_and_addL_no_res" %} -+ -+ ins_encode %{ -+ __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); -+%} -+ -+instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAdd incr) -+%{ -+ match(Set newval (GetAndAddL mem incr)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addLi" %} -+ -+ ins_encode %{ -+ __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); -+%} -+ -+instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAdd incr) -+%{ -+ predicate(n->as_LoadStore()->result_not_used()); -+ -+ match(Set dummy (GetAndAddL mem incr)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "get_and_addL [$mem], $incr\t#@get_and_addLi_no_res" %} -+ -+ ins_encode %{ -+ __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); -+%} -+ -+instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) -+%{ -+ match(Set newval (GetAndAddI mem incr)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addI" %} -+ -+ ins_encode %{ -+ __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); -+%} -+ -+instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) -+%{ -+ predicate(n->as_LoadStore()->result_not_used()); -+ -+ match(Set dummy (GetAndAddI mem incr)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "get_and_addI [$mem], $incr\t#@get_and_addI_no_res" %} -+ -+ ins_encode %{ -+ __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); -+%} -+ -+instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAdd incr) -+%{ -+ match(Set newval (GetAndAddI mem incr)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addIi" %} -+ -+ ins_encode %{ -+ __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); -+%} -+ -+instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAdd incr) -+%{ -+ predicate(n->as_LoadStore()->result_not_used()); -+ -+ match(Set dummy (GetAndAddI mem incr)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "get_and_addI [$mem], $incr\t#@get_and_addIi_no_res" %} -+ -+ ins_encode %{ -+ __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); -+%} -+ -+instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr) -+%{ -+ predicate(needs_acquiring_load_reserved(n)); -+ -+ match(Set newval (GetAndAddL mem incr)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLAcq" %} -+ -+ ins_encode %{ -+ __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); -+%} -+ -+instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{ -+ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n)); -+ -+ match(Set dummy (GetAndAddL mem incr)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addL_no_resAcq" %} -+ -+ ins_encode %{ -+ __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); -+%} -+ -+instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAdd incr) -+%{ -+ predicate(needs_acquiring_load_reserved(n)); -+ -+ match(Set newval (GetAndAddL mem incr)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLiAcq" %} -+ -+ ins_encode %{ -+ __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); -+%} -+ -+instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAdd incr) -+%{ -+ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n)); -+ -+ match(Set dummy (GetAndAddL mem incr)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addLi_no_resAcq" %} -+ -+ ins_encode %{ -+ __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); -+%} -+ -+instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr) -+%{ -+ predicate(needs_acquiring_load_reserved(n)); -+ -+ match(Set newval (GetAndAddI mem incr)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIAcq" %} -+ -+ ins_encode %{ -+ __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); -+%} -+ -+instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr) -+%{ -+ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n)); -+ -+ match(Set dummy (GetAndAddI mem incr)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addI_no_resAcq" %} -+ -+ ins_encode %{ -+ __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); -+%} -+ -+instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAdd incr) -+%{ -+ predicate(needs_acquiring_load_reserved(n)); -+ -+ match(Set newval (GetAndAddI mem incr)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIiAcq" %} -+ -+ ins_encode %{ -+ __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); -+%} -+ -+instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAdd incr) -+%{ -+ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n)); -+ -+ match(Set dummy (GetAndAddI mem incr)); -+ -+ ins_cost(ALU_COST); -+ -+ format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addIi_no_resAcq" %} -+ -+ ins_encode %{ -+ __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base)); -+ %} -+ -+ ins_pipe(pipe_serial); ++ ins_pipe(pipe_serial); +%} + +// ============================================================================ @@ -37285,7 +34798,7 @@ index 00000000000..588887e1d96 +%} + +instruct sqrtF_reg(fRegF dst, fRegF src) %{ -+ match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); ++ match(Set dst (SqrtF src)); + + ins_cost(FSQRT_COST); + format %{ "fsqrt.s $dst, $src\t#@sqrtF_reg" %} @@ -37643,7 +35156,6 @@ index 00000000000..588887e1d96 + +instruct membar_storestore() %{ + match(MemBarStoreStore); -+ match(StoreStoreFence); + ins_cost(ALU_COST); + + format %{ "MEMBAR-store-store\t#@membar_storestore" %} @@ -37728,17 +35240,6 @@ index 00000000000..588887e1d96 + ins_pipe(pipe_class_empty); +%} + -+instruct castLL(iRegL dst) -+%{ -+ match(Set dst (CastLL dst)); -+ -+ size(0); -+ format %{ "# castLL of $dst, #@castLL" %} -+ ins_encode(/* empty encoding */); -+ ins_cost(0); -+ ins_pipe(pipe_class_empty); -+%} -+ +instruct castII(iRegI dst) +%{ + match(Set dst (CastII dst)); @@ -37761,39 +35262,6 @@ index 00000000000..588887e1d96 + ins_pipe(pipe_class_empty); +%} + -+instruct castFF(fRegF dst) -+%{ -+ match(Set dst (CastFF dst)); -+ -+ size(0); -+ format %{ "# castFF of $dst" %} -+ ins_encode(/* empty encoding */); -+ ins_cost(0); -+ ins_pipe(pipe_class_empty); -+%} -+ -+instruct castDD(fRegD dst) -+%{ -+ match(Set dst (CastDD dst)); -+ -+ size(0); -+ format %{ "# castDD of $dst" %} -+ ins_encode(/* empty encoding */); -+ ins_cost(0); -+ ins_pipe(pipe_class_empty); -+%} -+ -+instruct castVV(vReg dst) -+%{ -+ match(Set dst (CastVV dst)); -+ -+ size(0); -+ format %{ "# castVV of $dst" %} -+ ins_encode(/* empty encoding */); -+ ins_cost(0); -+ ins_pipe(pipe_class_empty); -+%} -+ +// ============================================================================ +// Convert Instructions + @@ -38029,7 +35497,7 @@ index 00000000000..588887e1d96 +// in case of 32bit oops (heap < 4Gb). +instruct convN2I(iRegINoSp dst, iRegN src) +%{ -+ predicate(CompressedOops::shift() == 0); ++ predicate(Universe::narrow_oop_shift() == 0); + match(Set dst (ConvL2I (CastP2X (DecodeN src)))); + + ins_cost(ALU_COST); @@ -38588,7 +36056,7 @@ index 00000000000..588887e1d96 + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpU_branch" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + @@ -38608,7 +36076,7 @@ index 00000000000..588887e1d96 + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpU_loop" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + @@ -38667,7 +36135,7 @@ index 00000000000..588887e1d96 + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpUL_branch" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + @@ -38686,7 +36154,7 @@ index 00000000000..588887e1d96 + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpUL_loop" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + @@ -38707,7 +36175,7 @@ index 00000000000..588887e1d96 + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpP_branch" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + @@ -38727,7 +36195,7 @@ index 00000000000..588887e1d96 + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpP_loop" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + @@ -38748,7 +36216,7 @@ index 00000000000..588887e1d96 + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpN_branch" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + @@ -38768,7 +36236,7 @@ index 00000000000..588887e1d96 + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpN_loop" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + @@ -38785,7 +36253,7 @@ index 00000000000..588887e1d96 + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST); -+ format %{ "float_b$cmp $op1, $op2 \t#@cmpF_branch"%} ++ format %{ "float_b$cmp $op1, $op2, $lbl \t#@cmpF_branch"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label)); @@ -38802,7 +36270,7 @@ index 00000000000..588887e1d96 + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST); -+ format %{ "float_b$cmp $op1, $op2\t#@cmpF_loop"%} ++ format %{ "float_b$cmp $op1, $op2, $lbl\t#@cmpF_loop"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label)); @@ -38820,10 +36288,10 @@ index 00000000000..588887e1d96 + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST); -+ format %{ "double_b$cmp $op1, $op2\t#@cmpD_branch"%} ++ format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_branch"%} + + ins_encode %{ -+ __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), ++ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), + as_FloatRegister($op2$$reg), *($lbl$$label)); + %} + @@ -38838,10 +36306,10 @@ index 00000000000..588887e1d96 + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST); -+ format %{ "double_b$cmp $op1, $op2\t#@cmpD_loop"%} ++ format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_loop"%} + + ins_encode %{ -+ __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), ++ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), + as_FloatRegister($op2$$reg), *($lbl$$label)); + %} + @@ -39113,7 +36581,7 @@ index 00000000000..588887e1d96 + effect(USE lbl); + + ins_cost(BRANCH_COST); -+ format %{ "far_b$cmp $cr, zr, L\t#@far_cmpFlag_branch"%} ++ format %{ "far_b$cmp $cr, zr, $lbl\t#@far_cmpFlag_branch"%} + + ins_encode %{ + __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label), /* is_far */ true); @@ -39162,7 +36630,7 @@ index 00000000000..588887e1d96 + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_branch" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + @@ -39177,7 +36645,7 @@ index 00000000000..588887e1d96 + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_loop" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + @@ -39220,7 +36688,7 @@ index 00000000000..588887e1d96 + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpUL_branch" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + @@ -39235,7 +36703,7 @@ index 00000000000..588887e1d96 + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpUL_loop" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + @@ -39253,7 +36721,7 @@ index 00000000000..588887e1d96 + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpP_branch" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + @@ -39271,7 +36739,7 @@ index 00000000000..588887e1d96 + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpP_loop" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + @@ -39289,7 +36757,7 @@ index 00000000000..588887e1d96 + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpN_branch" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + @@ -39307,7 +36775,7 @@ index 00000000000..588887e1d96 + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpN_loop" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + @@ -39322,7 +36790,7 @@ index 00000000000..588887e1d96 + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST * 2); -+ format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_branch"%} ++ format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_branch"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), @@ -39338,7 +36806,7 @@ index 00000000000..588887e1d96 + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST * 2); -+ format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_loop"%} ++ format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_loop"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), @@ -39355,10 +36823,10 @@ index 00000000000..588887e1d96 + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST * 2); -+ format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_branch"%} ++ format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_branch"%} + + ins_encode %{ -+ __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), ++ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), + as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + @@ -39371,10 +36839,10 @@ index 00000000000..588887e1d96 + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST * 2); -+ format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_loop"%} ++ format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_loop"%} + + ins_encode %{ -+ __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), ++ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), + as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + @@ -39673,10 +37141,8 @@ index 00000000000..588887e1d96 + ins_cost(ALU_COST + BRANCH_COST); + + format %{ -+ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpI\n\t" -+ "mv $dst, $src\n\t" -+ "skip:" -+ %} ++ "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpI\n\t" ++ %} + + ins_encode %{ + __ enc_cmove($cop$$cmpcode, @@ -39684,7 +37150,7 @@ index 00000000000..588887e1d96 + as_Register($dst$$reg), as_Register($src$$reg)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_compare); +%} + +instruct cmovI_cmpU(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOpU cop) %{ @@ -39692,18 +37158,16 @@ index 00000000000..588887e1d96 + ins_cost(ALU_COST + BRANCH_COST); + + format %{ -+ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpU\n\t" -+ "mv $dst, $src\n\t" -+ "skip:" -+ %} ++ "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpU\n\t" ++ %} + + ins_encode %{ -+ __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, ++ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, + as_Register($op1$$reg), as_Register($op2$$reg), + as_Register($dst$$reg), as_Register($src$$reg)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_compare); +%} + +instruct cmovI_cmpL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOp cop) %{ @@ -39711,10 +37175,8 @@ index 00000000000..588887e1d96 + ins_cost(ALU_COST + BRANCH_COST); + + format %{ -+ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpL\n\t" -+ "mv $dst, $src\n\t" -+ "skip:" -+ %} ++ "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpL\n\t" ++ %} + + ins_encode %{ + __ enc_cmove($cop$$cmpcode, @@ -39722,7 +37184,24 @@ index 00000000000..588887e1d96 + as_Register($dst$$reg), as_Register($src$$reg)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_compare); ++%} ++ ++instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop) %{ ++ match(Set dst (CMoveI (Binary cop (CmpUL op1 op2)) (Binary dst src))); ++ ins_cost(ALU_COST + BRANCH_COST); ++ ++ format %{ ++ "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpUL\n\t" ++ %} ++ ++ ins_encode %{ ++ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, ++ as_Register($op1$$reg), as_Register($op2$$reg), ++ as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(pipe_class_compare); +%} + +instruct cmovL_cmpL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOp cop) %{ @@ -39730,10 +37209,8 @@ index 00000000000..588887e1d96 + ins_cost(ALU_COST + BRANCH_COST); + + format %{ -+ "bneg$cop $op1, $op2, skip\t#@cmovL_cmpL\n\t" -+ "mv $dst, $src\n\t" -+ "skip:" -+ %} ++ "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpL\n\t" ++ %} + + ins_encode %{ + __ enc_cmove($cop$$cmpcode, @@ -39741,7 +37218,7 @@ index 00000000000..588887e1d96 + as_Register($dst$$reg), as_Register($src$$reg)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_compare); +%} + +instruct cmovL_cmpUL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOpU cop) %{ @@ -39749,38 +37226,51 @@ index 00000000000..588887e1d96 + ins_cost(ALU_COST + BRANCH_COST); + + format %{ -+ "bneg$cop $op1, $op2, skip\t#@cmovL_cmpUL\n\t" -+ "mv $dst, $src\n\t" -+ "skip:" -+ %} ++ "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpUL\n\t" ++ %} + + ins_encode %{ -+ __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, ++ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, + as_Register($op1$$reg), as_Register($op2$$reg), + as_Register($dst$$reg), as_Register($src$$reg)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_compare); +%} + -+instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop) %{ -+ match(Set dst (CMoveI (Binary cop (CmpUL op1 op2)) (Binary dst src))); ++instruct cmovL_cmpI(iRegLNoSp dst, iRegL src, iRegI op1, iRegI op2, cmpOp cop) %{ ++ match(Set dst (CMoveL (Binary cop (CmpI op1 op2)) (Binary dst src))); + ins_cost(ALU_COST + BRANCH_COST); ++ + format %{ -+ "bneg$cop $op1, $op2\t#@cmovI_cmpUL\n\t" -+ "mv $dst, $src\n\t" -+ "skip:" -+ %} ++ "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpI\n\t" ++ %} + + ins_encode %{ -+ __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, ++ __ enc_cmove($cop$$cmpcode, + as_Register($op1$$reg), as_Register($op2$$reg), + as_Register($dst$$reg), as_Register($src$$reg)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_compare); +%} + ++instruct cmovL_cmpU(iRegLNoSp dst, iRegL src, iRegI op1, iRegI op2, cmpOpU cop) %{ ++ match(Set dst (CMoveL (Binary cop (CmpU op1 op2)) (Binary dst src))); ++ ins_cost(ALU_COST + BRANCH_COST); ++ ++ format %{ ++ "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpU\n\t" ++ %} ++ ++ ins_encode %{ ++ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, ++ as_Register($op1$$reg), as_Register($op2$$reg), ++ as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(pipe_class_compare); ++%} + +// ============================================================================ +// Procedure Call/Return Instructions @@ -39920,7 +37410,7 @@ index 00000000000..588887e1d96 +instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, + iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) +%{ -+ predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU); ++ predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); + effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + @@ -39938,7 +37428,7 @@ index 00000000000..588887e1d96 +instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, + iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) +%{ -+ predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL); ++ predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); + effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + @@ -39955,7 +37445,7 @@ index 00000000000..588887e1d96 +instruct string_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, + iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) +%{ -+ predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL); ++ predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL); + match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); + effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + @@ -39973,7 +37463,7 @@ index 00000000000..588887e1d96 + iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, + rFlagsReg cr) +%{ -+ predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU); ++ predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU); + match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); + effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + @@ -40119,7 +37609,6 @@ index 00000000000..588887e1d96 + iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) +%{ + match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); -+ predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); + effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + @@ -40133,28 +37622,9 @@ index 00000000000..588887e1d96 +%} + + -+instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, -+ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, -+ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) -+%{ -+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); -+ predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); -+ effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, -+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); -+ -+ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %} -+ ins_encode %{ -+ __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, -+ $result$$Register, $tmp1$$Register, $tmp2$$Register, -+ $tmp3$$Register, $tmp4$$Register, true /* isL */); -+ %} -+ ins_pipe(pipe_class_memory); -+%} -+ +// clearing of an array +instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy) +%{ -+ predicate(!UseRVV); + match(Set dummy (ClearArray cnt base)); + effect(USE_KILL cnt, USE_KILL base); + @@ -40174,8 +37644,7 @@ index 00000000000..588887e1d96 + +instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr) +%{ -+ predicate(!UseRVV && (uint64_t)n->in(2)->get_long() -+ < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)); ++ predicate((uint64_t)n->in(2)->get_long() < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)); + match(Set dummy (ClearArray cnt base)); + effect(USE_KILL base, KILL cr); + @@ -40192,7 +37661,7 @@ index 00000000000..588887e1d96 +instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, + iRegI_R10 result, rFlagsReg cr) +%{ -+ predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); ++ predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (StrEquals (Binary str1 str2) cnt)); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr); + @@ -40208,7 +37677,7 @@ index 00000000000..588887e1d96 +instruct string_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, + iRegI_R10 result, rFlagsReg cr) +%{ -+ predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); ++ predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (StrEquals (Binary str1 str2) cnt)); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr); + @@ -40225,7 +37694,7 @@ index 00000000000..588887e1d96 + iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3, + iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr) +%{ -+ predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); ++ predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (AryEq ary1 ary2)); + effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr); + @@ -40242,7 +37711,7 @@ index 00000000000..588887e1d96 + iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3, + iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr) +%{ -+ predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); ++ predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (AryEq ary1 ary2)); + effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr); + @@ -40455,10 +37924,10 @@ index 00000000000..588887e1d96 +// End: diff --git a/src/hotspot/cpu/riscv/riscv_b.ad b/src/hotspot/cpu/riscv/riscv_b.ad new file mode 100644 -index 00000000000..4488c1c4031 +index 0000000000..7dda004cd3 --- /dev/null +++ b/src/hotspot/cpu/riscv/riscv_b.ad -@@ -0,0 +1,527 @@ +@@ -0,0 +1,466 @@ +// +// Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. @@ -40486,88 +37955,12 @@ index 00000000000..4488c1c4031 + +// RISCV Bit-Manipulation Extension Architecture Description File + -+instruct rorI_imm_rvb(iRegINoSp dst, iRegI src, immI shift) %{ -+ predicate(UseRVB); -+ match(Set dst (RotateRight src shift)); -+ -+ format %{ "roriw $dst, $src, ($shift & 0x1f)\t#@rorI_imm_rvb" %} -+ -+ ins_cost(ALU_COST); -+ ins_encode %{ -+ __ roriw(as_Register($dst$$reg), as_Register($src$$reg), $shift$$constant & 0x1f); -+ %} -+ -+ ins_pipe(ialu_reg_shift); -+%} -+ -+instruct rorL_imm_rvb(iRegLNoSp dst, iRegL src, immI shift) %{ -+ predicate(UseRVB); -+ match(Set dst (RotateRight src shift)); -+ -+ format %{ "rori $dst, $src, ($shift & 0x3f)\t#@rorL_imm_rvb" %} -+ -+ ins_cost(ALU_COST); -+ ins_encode %{ -+ __ rori(as_Register($dst$$reg), as_Register($src$$reg), $shift$$constant & 0x3f); -+ %} -+ -+ ins_pipe(ialu_reg_shift); -+%} -+ -+instruct rorI_reg_rvb(iRegINoSp dst, iRegI src, iRegI shift) %{ -+ predicate(UseRVB); -+ match(Set dst (RotateRight src shift)); -+ -+ format %{ "rorw $dst, $src, $shift\t#@rorI_reg_rvb" %} -+ ins_cost(ALU_COST); -+ ins_encode %{ -+ __ rorw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); -+ %} -+ ins_pipe(ialu_reg_reg); -+%} -+ -+instruct rorL_reg_rvb(iRegLNoSp dst, iRegL src, iRegI shift) %{ -+ predicate(UseRVB); -+ match(Set dst (RotateRight src shift)); -+ -+ format %{ "ror $dst, $src, $shift\t#@rorL_reg_rvb" %} -+ ins_cost(ALU_COST); -+ ins_encode %{ -+ __ ror(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); -+ %} -+ ins_pipe(ialu_reg_reg); -+%} -+ -+instruct rolI_reg_rvb(iRegINoSp dst, iRegI src, iRegI shift) %{ -+ predicate(UseRVB); -+ match(Set dst (RotateLeft src shift)); -+ -+ format %{ "rolw $dst, $src, $shift\t#@rolI_reg_rvb" %} -+ ins_cost(ALU_COST); -+ ins_encode %{ -+ __ rolw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); -+ %} -+ ins_pipe(ialu_reg_reg); -+%} -+ -+instruct rolL_reg_rvb(iRegLNoSp dst, iRegL src, iRegI shift) %{ -+ predicate(UseRVB); -+ match(Set dst (RotateLeft src shift)); -+ -+ format %{ "rol $dst, $src, $shift\t#@rolL_reg_rvb" %} -+ ins_cost(ALU_COST); -+ ins_encode %{ -+ __ rol(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); -+ %} -+ ins_pipe(ialu_reg_reg); -+%} -+ +// Convert oop into int for vectors alignment masking -+instruct convP2I_rvb(iRegINoSp dst, iRegP src) %{ -+ predicate(UseRVB); ++instruct convP2I_b(iRegINoSp dst, iRegP src) %{ ++ predicate(UseZba); + match(Set dst (ConvL2I (CastP2X src))); + -+ format %{ "zext.w $dst, $src\t# ptr -> int @convP2I_rvb" %} ++ format %{ "zext.w $dst, $src\t# ptr -> int @convP2I_b" %} + + ins_cost(ALU_COST); + ins_encode %{ @@ -40578,11 +37971,11 @@ index 00000000000..4488c1c4031 +%} + +// byte to int -+instruct convB2I_reg_reg_rvb(iRegINoSp dst, iRegIorL2I src, immI_24 lshift, immI_24 rshift) %{ -+ predicate(UseRVB); ++instruct convB2I_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_24 lshift, immI_24 rshift) %{ ++ predicate(UseZbb); + match(Set dst (RShiftI (LShiftI src lshift) rshift)); + -+ format %{ "sext.b $dst, $src\t# b2i, #@convB2I_reg_reg_rvb" %} ++ format %{ "sext.b $dst, $src\t# b2i, #@convB2I_reg_reg_b" %} + + ins_cost(ALU_COST); + ins_encode %{ @@ -40593,11 +37986,11 @@ index 00000000000..4488c1c4031 +%} + +// int to short -+instruct convI2S_reg_reg_rvb(iRegINoSp dst, iRegIorL2I src, immI_16 lshift, immI_16 rshift) %{ -+ predicate(UseRVB); ++instruct convI2S_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_16 lshift, immI_16 rshift) %{ ++ predicate(UseZbb); + match(Set dst (RShiftI (LShiftI src lshift) rshift)); + -+ format %{ "sext.h $dst, $src\t# i2s, #@convI2S_reg_reg_rvb" %} ++ format %{ "sext.h $dst, $src\t# i2s, #@convI2S_reg_reg_b" %} + + ins_cost(ALU_COST); + ins_encode %{ @@ -40608,11 +38001,11 @@ index 00000000000..4488c1c4031 +%} + +// short to unsigned int -+instruct convS2UI_reg_reg_rvb(iRegINoSp dst, iRegIorL2I src, immI_16bits mask) %{ -+ predicate(UseRVB); ++instruct convS2UI_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_16bits mask) %{ ++ predicate(UseZbb); + match(Set dst (AndI src mask)); + -+ format %{ "zext.h $dst, $src\t# s2ui, #@convS2UI_reg_reg_rvb" %} ++ format %{ "zext.h $dst, $src\t# s2ui, #@convS2UI_reg_reg_b" %} + + ins_cost(ALU_COST); + ins_encode %{ @@ -40623,11 +38016,11 @@ index 00000000000..4488c1c4031 +%} + +// int to unsigned long (zero extend) -+instruct convI2UL_reg_reg_rvb(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) %{ -+ predicate(UseRVB); ++instruct convI2UL_reg_reg_b(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) %{ ++ predicate(UseZba); + match(Set dst (AndL (ConvI2L src) mask)); + -+ format %{ "zext.w $dst, $src\t# i2ul, #@convI2UL_reg_reg_rvb" %} ++ format %{ "zext.w $dst, $src\t# i2ul, #@convI2UL_reg_reg_b" %} + + ins_cost(ALU_COST); + ins_encode %{ @@ -40638,12 +38031,12 @@ index 00000000000..4488c1c4031 +%} + +// BSWAP instructions -+instruct bytes_reverse_int_rvb(iRegINoSp dst, iRegIorL2I src) %{ -+ predicate(UseRVB); ++instruct bytes_reverse_int_b(iRegINoSp dst, iRegIorL2I src) %{ ++ predicate(UseZbb); + match(Set dst (ReverseBytesI src)); + + ins_cost(ALU_COST * 2); -+ format %{ "revb_w_w $dst, $src\t#@bytes_reverse_int_rvb" %} ++ format %{ "revb_w_w $dst, $src\t#@bytes_reverse_int_b" %} + + ins_encode %{ + __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg)); @@ -40652,12 +38045,12 @@ index 00000000000..4488c1c4031 + ins_pipe(ialu_reg); +%} + -+instruct bytes_reverse_long_rvb(iRegLNoSp dst, iRegL src) %{ -+ predicate(UseRVB); ++instruct bytes_reverse_long_b(iRegLNoSp dst, iRegL src) %{ ++ predicate(UseZbb); + match(Set dst (ReverseBytesL src)); + + ins_cost(ALU_COST); -+ format %{ "rev8 $dst, $src\t#@bytes_reverse_long_rvb" %} ++ format %{ "rev8 $dst, $src\t#@bytes_reverse_long_b" %} + + ins_encode %{ + __ rev8(as_Register($dst$$reg), as_Register($src$$reg)); @@ -40666,12 +38059,12 @@ index 00000000000..4488c1c4031 + ins_pipe(ialu_reg); +%} + -+instruct bytes_reverse_unsigned_short_rvb(iRegINoSp dst, iRegIorL2I src) %{ -+ predicate(UseRVB); ++instruct bytes_reverse_unsigned_short_b(iRegINoSp dst, iRegIorL2I src) %{ ++ predicate(UseZbb); + match(Set dst (ReverseBytesUS src)); + + ins_cost(ALU_COST * 2); -+ format %{ "revb_h_h_u $dst, $src\t#@bytes_reverse_unsigned_short_rvb" %} ++ format %{ "revb_h_h_u $dst, $src\t#@bytes_reverse_unsigned_short_b" %} + + ins_encode %{ + __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg)); @@ -40680,12 +38073,12 @@ index 00000000000..4488c1c4031 + ins_pipe(ialu_reg); +%} + -+instruct bytes_reverse_short_rvb(iRegINoSp dst, iRegIorL2I src) %{ -+ predicate(UseRVB); ++instruct bytes_reverse_short_b(iRegINoSp dst, iRegIorL2I src) %{ ++ predicate(UseZbb); + match(Set dst (ReverseBytesS src)); + + ins_cost(ALU_COST * 2); -+ format %{ "revb_h_h $dst, $src\t#@bytes_reverse_short_rvb" %} ++ format %{ "revb_h_h $dst, $src\t#@bytes_reverse_short_b" %} + + ins_encode %{ + __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg)); @@ -40695,12 +38088,12 @@ index 00000000000..4488c1c4031 +%} + +// Shift Add Pointer -+instruct shaddP_reg_reg_rvb(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale imm) %{ -+ predicate(UseRVB); ++instruct shaddP_reg_reg_b(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale imm) %{ ++ predicate(UseZba); + match(Set dst (AddP src1 (LShiftL src2 imm))); + + ins_cost(ALU_COST); -+ format %{ "shadd $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_rvb" %} ++ format %{ "shadd $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_b" %} + + ins_encode %{ + __ shadd(as_Register($dst$$reg), @@ -40713,12 +38106,12 @@ index 00000000000..4488c1c4031 + ins_pipe(ialu_reg_reg); +%} + -+instruct shaddP_reg_reg_ext_rvb(iRegPNoSp dst, iRegP src1, iRegI src2, immIScale imm) %{ -+ predicate(UseRVB); ++instruct shaddP_reg_reg_ext_b(iRegPNoSp dst, iRegP src1, iRegI src2, immIScale imm) %{ ++ predicate(UseZba); + match(Set dst (AddP src1 (LShiftL (ConvI2L src2) imm))); + + ins_cost(ALU_COST); -+ format %{ "shadd $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_ext_rvb" %} ++ format %{ "shadd $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_ext_b" %} + + ins_encode %{ + __ shadd(as_Register($dst$$reg), @@ -40732,12 +38125,12 @@ index 00000000000..4488c1c4031 +%} + +// Shift Add Long -+instruct shaddL_reg_reg_rvb(iRegLNoSp dst, iRegL src1, iRegL src2, immIScale imm) %{ -+ predicate(UseRVB); ++instruct shaddL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immIScale imm) %{ ++ predicate(UseZba); + match(Set dst (AddL src1 (LShiftL src2 imm))); + + ins_cost(ALU_COST); -+ format %{ "shadd $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_rvb" %} ++ format %{ "shadd $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_b" %} + + ins_encode %{ + __ shadd(as_Register($dst$$reg), @@ -40750,12 +38143,12 @@ index 00000000000..4488c1c4031 + ins_pipe(ialu_reg_reg); +%} + -+instruct shaddL_reg_reg_ext_rvb(iRegLNoSp dst, iRegL src1, iRegI src2, immIScale imm) %{ -+ predicate(UseRVB); ++instruct shaddL_reg_reg_ext_b(iRegLNoSp dst, iRegL src1, iRegI src2, immIScale imm) %{ ++ predicate(UseZba); + match(Set dst (AddL src1 (LShiftL (ConvI2L src2) imm))); + + ins_cost(ALU_COST); -+ format %{ "shadd $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_ext_rvb" %} ++ format %{ "shadd $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_ext_b" %} + + ins_encode %{ + __ shadd(as_Register($dst$$reg), @@ -40769,12 +38162,12 @@ index 00000000000..4488c1c4031 +%} + +// Zeros Count instructions -+instruct countLeadingZerosI_rvb(iRegINoSp dst, iRegIorL2I src) %{ -+ predicate(UseRVB); ++instruct countLeadingZerosI_b(iRegINoSp dst, iRegIorL2I src) %{ ++ predicate(UseZbb); + match(Set dst (CountLeadingZerosI src)); + + ins_cost(ALU_COST); -+ format %{ "clzw $dst, $src\t#@countLeadingZerosI_rvb" %} ++ format %{ "clzw $dst, $src\t#@countLeadingZerosI_b" %} + + ins_encode %{ + __ clzw(as_Register($dst$$reg), as_Register($src$$reg)); @@ -40783,12 +38176,12 @@ index 00000000000..4488c1c4031 + ins_pipe(ialu_reg); +%} + -+instruct countLeadingZerosL_rvb(iRegINoSp dst, iRegL src) %{ -+ predicate(UseRVB); ++instruct countLeadingZerosL_b(iRegINoSp dst, iRegL src) %{ ++ predicate(UseZbb); + match(Set dst (CountLeadingZerosL src)); + + ins_cost(ALU_COST); -+ format %{ "clz $dst, $src\t#@countLeadingZerosL_rvb" %} ++ format %{ "clz $dst, $src\t#@countLeadingZerosL_b" %} + + ins_encode %{ + __ clz(as_Register($dst$$reg), as_Register($src$$reg)); @@ -40797,12 +38190,12 @@ index 00000000000..4488c1c4031 + ins_pipe(ialu_reg); +%} + -+instruct countTrailingZerosI_rvb(iRegINoSp dst, iRegIorL2I src) %{ -+ predicate(UseRVB); ++instruct countTrailingZerosI_b(iRegINoSp dst, iRegIorL2I src) %{ ++ predicate(UseZbb); + match(Set dst (CountTrailingZerosI src)); + + ins_cost(ALU_COST); -+ format %{ "ctzw $dst, $src\t#@countTrailingZerosI_rvb" %} ++ format %{ "ctzw $dst, $src\t#@countTrailingZerosI_b" %} + + ins_encode %{ + __ ctzw(as_Register($dst$$reg), as_Register($src$$reg)); @@ -40811,12 +38204,12 @@ index 00000000000..4488c1c4031 + ins_pipe(ialu_reg); +%} + -+instruct countTrailingZerosL_rvb(iRegINoSp dst, iRegL src) %{ -+ predicate(UseRVB); ++instruct countTrailingZerosL_b(iRegINoSp dst, iRegL src) %{ ++ predicate(UseZbb); + match(Set dst (CountTrailingZerosL src)); + + ins_cost(ALU_COST); -+ format %{ "ctz $dst, $src\t#@countTrailingZerosL_rvb" %} ++ format %{ "ctz $dst, $src\t#@countTrailingZerosL_b" %} + + ins_encode %{ + __ ctz(as_Register($dst$$reg), as_Register($src$$reg)); @@ -40826,12 +38219,12 @@ index 00000000000..4488c1c4031 +%} + +// Population Count instructions -+instruct popCountI_rvb(iRegINoSp dst, iRegIorL2I src) %{ ++instruct popCountI_b(iRegINoSp dst, iRegIorL2I src) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountI src)); + + ins_cost(ALU_COST); -+ format %{ "cpopw $dst, $src\t#@popCountI_rvb" %} ++ format %{ "cpopw $dst, $src\t#@popCountI_b" %} + + ins_encode %{ + __ cpopw(as_Register($dst$$reg), as_Register($src$$reg)); @@ -40841,12 +38234,12 @@ index 00000000000..4488c1c4031 +%} + +// Note: Long/bitCount(long) returns an int. -+instruct popCountL_rvb(iRegINoSp dst, iRegL src) %{ ++instruct popCountL_b(iRegINoSp dst, iRegL src) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountL src)); + + ins_cost(ALU_COST); -+ format %{ "cpop $dst, $src\t#@popCountL_rvb" %} ++ format %{ "cpop $dst, $src\t#@popCountL_b" %} + + ins_encode %{ + __ cpop(as_Register($dst$$reg), as_Register($src$$reg)); @@ -40856,12 +38249,12 @@ index 00000000000..4488c1c4031 +%} + +// Max and Min -+instruct minI_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2) %{ -+ predicate(UseRVB); ++instruct minI_reg_b(iRegINoSp dst, iRegI src1, iRegI src2) %{ ++ predicate(UseZbb); + match(Set dst (MinI src1 src2)); + + ins_cost(ALU_COST); -+ format %{ "min $dst, $src1, $src2\t#@minI_reg_rvb" %} ++ format %{ "min $dst, $src1, $src2\t#@minI_reg_b" %} + + ins_encode %{ + __ min(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); @@ -40870,12 +38263,12 @@ index 00000000000..4488c1c4031 + ins_pipe(ialu_reg_reg); +%} + -+instruct maxI_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2) %{ -+ predicate(UseRVB); ++instruct maxI_reg_b(iRegINoSp dst, iRegI src1, iRegI src2) %{ ++ predicate(UseZbb); + match(Set dst (MaxI src1 src2)); + + ins_cost(ALU_COST); -+ format %{ "max $dst, $src1, $src2\t#@maxI_reg_rvb" %} ++ format %{ "max $dst, $src1, $src2\t#@maxI_reg_b" %} + + ins_encode %{ + __ max(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); @@ -40885,14 +38278,14 @@ index 00000000000..4488c1c4031 +%} + +// Abs -+instruct absI_reg_rvb(iRegINoSp dst, iRegI src) %{ -+ predicate(UseRVB); ++instruct absI_reg_b(iRegINoSp dst, iRegI src) %{ ++ predicate(UseZbb); + match(Set dst (AbsI src)); + + ins_cost(ALU_COST * 2); + format %{ + "negw t0, $src\n\t" -+ "max $dst, $src, t0\t#@absI_reg_rvb" ++ "max $dst, $src, t0\t#@absI_reg_b" + %} + + ins_encode %{ @@ -40903,14 +38296,14 @@ index 00000000000..4488c1c4031 + ins_pipe(ialu_reg_reg); +%} + -+instruct absL_reg_rvb(iRegLNoSp dst, iRegL src) %{ -+ predicate(UseRVB); ++instruct absL_reg_b(iRegLNoSp dst, iRegL src) %{ ++ predicate(UseZbb); + match(Set dst (AbsL src)); + + ins_cost(ALU_COST * 2); + format %{ + "neg t0, $src\n\t" -+ "max $dst, $src, t0\t#@absL_reg_rvb" ++ "max $dst, $src, t0\t#@absL_reg_b" + %} + + ins_encode %{ @@ -40922,12 +38315,12 @@ index 00000000000..4488c1c4031 +%} + +// And Not -+instruct andnI_reg_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{ -+ predicate(UseRVB); ++instruct andnI_reg_reg_b(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{ ++ predicate(UseZbb); + match(Set dst (AndI src1 (XorI src2 m1))); + + ins_cost(ALU_COST); -+ format %{ "andn $dst, $src1, $src2\t#@andnI_reg_reg_rvb" %} ++ format %{ "andn $dst, $src1, $src2\t#@andnI_reg_reg_b" %} + + ins_encode %{ + __ andn(as_Register($dst$$reg), @@ -40938,12 +38331,12 @@ index 00000000000..4488c1c4031 + ins_pipe(ialu_reg_reg); +%} + -+instruct andnL_reg_reg_rvb(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{ -+ predicate(UseRVB); ++instruct andnL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{ ++ predicate(UseZbb); + match(Set dst (AndL src1 (XorL src2 m1))); + + ins_cost(ALU_COST); -+ format %{ "andn $dst, $src1, $src2\t#@andnL_reg_reg_rvb" %} ++ format %{ "andn $dst, $src1, $src2\t#@andnL_reg_reg_b" %} + + ins_encode %{ + __ andn(as_Register($dst$$reg), @@ -40955,12 +38348,12 @@ index 00000000000..4488c1c4031 +%} + +// Or Not -+instruct ornI_reg_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{ -+ predicate(UseRVB); ++instruct ornI_reg_reg_b(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{ ++ predicate(UseZbb); + match(Set dst (OrI src1 (XorI src2 m1))); + + ins_cost(ALU_COST); -+ format %{ "orn $dst, $src1, $src2\t#@ornI_reg_reg_rvb" %} ++ format %{ "orn $dst, $src1, $src2\t#@ornI_reg_reg_b" %} + + ins_encode %{ + __ orn(as_Register($dst$$reg), @@ -40971,12 +38364,12 @@ index 00000000000..4488c1c4031 + ins_pipe(ialu_reg_reg); +%} + -+instruct ornL_reg_reg_rvb(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{ -+ predicate(UseRVB); ++instruct ornL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{ ++ predicate(UseZbb); + match(Set dst (OrL src1 (XorL src2 m1))); + + ins_cost(ALU_COST); -+ format %{ "orn $dst, $src1, $src2\t#@ornL_reg_reg_rvb" %} ++ format %{ "orn $dst, $src1, $src2\t#@ornL_reg_reg_b" %} + + ins_encode %{ + __ orn(as_Register($dst$$reg), @@ -40985,6723 +38378,4615 @@ index 00000000000..4488c1c4031 + %} + + ins_pipe(ialu_reg_reg); -+%} -\ No newline at end of file -diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad -new file mode 100644 -index 00000000000..3828e096b21 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/riscv_v.ad -@@ -0,0 +1,2065 @@ -+// -+// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. -+// Copyright (c) 2020, Arm Limited. All rights reserved. -+// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+// -+// This code is free software; you can redistribute it and/or modify it -+// under the terms of the GNU General Public License version 2 only, as -+// published by the Free Software Foundation. -+// -+// This code is distributed in the hope that it will be useful, but WITHOUT -+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+// version 2 for more details (a copy is included in the LICENSE file that -+// accompanied this code). -+// -+// You should have received a copy of the GNU General Public License version -+// 2 along with this work; if not, write to the Free Software Foundation, -+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+// -+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+// or visit www.oracle.com if you need additional information or have any -+// questions. -+// -+// -+ -+// RISCV Vector Extension Architecture Description File -+ -+opclass vmemA(indirect); -+ -+source_hpp %{ -+ bool op_vec_supported(int opcode); -+%} -+ -+source %{ -+ -+ static void loadStore(C2_MacroAssembler masm, bool is_store, -+ VectorRegister reg, BasicType bt, Register base) { -+ Assembler::SEW sew = Assembler::elemtype_to_sew(bt); -+ masm.vsetvli(t0, x0, sew); -+ if (is_store) { -+ masm.vsex_v(reg, base, sew); -+ } else { -+ masm.vlex_v(reg, base, sew); -+ } -+ } -+ -+ bool op_vec_supported(int opcode) { -+ switch (opcode) { -+ // No multiply reduction instructions -+ case Op_MulReductionVD: -+ case Op_MulReductionVF: -+ case Op_MulReductionVI: -+ case Op_MulReductionVL: -+ // Others -+ case Op_Extract: -+ case Op_ExtractB: -+ case Op_ExtractC: -+ case Op_ExtractD: -+ case Op_ExtractF: -+ case Op_ExtractI: -+ case Op_ExtractL: -+ case Op_ExtractS: -+ case Op_ExtractUB: -+ // Vector API specific -+ case Op_AndReductionV: -+ case Op_OrReductionV: -+ case Op_XorReductionV: -+ case Op_LoadVectorGather: -+ case Op_StoreVectorScatter: -+ case Op_VectorBlend: -+ case Op_VectorCast: -+ case Op_VectorCastB2X: -+ case Op_VectorCastD2X: -+ case Op_VectorCastF2X: -+ case Op_VectorCastI2X: -+ case Op_VectorCastL2X: -+ case Op_VectorCastS2X: -+ case Op_VectorInsert: -+ case Op_VectorLoadConst: -+ case Op_VectorLoadMask: -+ case Op_VectorLoadShuffle: -+ case Op_VectorMaskCmp: -+ case Op_VectorRearrange: -+ case Op_VectorReinterpret: -+ case Op_VectorStoreMask: -+ case Op_VectorTest: -+ return false; -+ default: -+ return UseRVV; -+ } -+ } -+ -+%} + -+definitions %{ -+ int_def VEC_COST (200, 200); +%} + -+// All VEC instructions -+ -+// vector load/store -+instruct loadV(vReg dst, vmemA mem) %{ -+ match(Set dst (LoadVector mem)); -+ ins_cost(VEC_COST); -+ format %{ "vle $dst, $mem\t#@loadV" %} -+ ins_encode %{ -+ VectorRegister dst_reg = as_VectorRegister($dst$$reg); -+ loadStore(C2_MacroAssembler(&cbuf), false, dst_reg, -+ Matcher::vector_element_basic_type(this), as_Register($mem$$base)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++// AndI 0b0..010..0 + ConvI2B ++instruct convI2Bool_andI_reg_immIpowerOf2(iRegINoSp dst, iRegIorL2I src, immIpowerOf2 mask) %{ ++ predicate(UseZbs); ++ match(Set dst (Conv2B (AndI src mask))); ++ ins_cost(ALU_COST); + -+instruct storeV(vReg src, vmemA mem) %{ -+ match(Set mem (StoreVector mem src)); -+ ins_cost(VEC_COST); -+ format %{ "vse $src, $mem\t#@storeV" %} ++ format %{ "bexti $dst, $src, $mask\t#@convI2Bool_andI_reg_immIpowerOf2" %} + ins_encode %{ -+ VectorRegister src_reg = as_VectorRegister($src$$reg); -+ loadStore(C2_MacroAssembler(&cbuf), true, src_reg, -+ Matcher::vector_element_basic_type(this, $src), as_Register($mem$$base)); ++ __ bexti($dst$$Register, $src$$Register, exact_log2((juint)($mask$$constant))); + %} -+ ins_pipe(pipe_slow); -+%} + -+// vector abs -+ -+instruct vabsB(vReg dst, vReg src, vReg tmp) %{ -+ match(Set dst (AbsVB src)); -+ ins_cost(VEC_COST); -+ effect(TEMP tmp); -+ format %{ "vrsub.vi $tmp, 0, $src\t#@vabsB\n\t" -+ "vmax.vv $dst, $tmp, $src" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); -+ __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); -+ %} -+ ins_pipe(pipe_slow); ++ ins_pipe(ialu_reg_reg); +%} +\ No newline at end of file +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +new file mode 100644 +index 0000000000..f41a496093 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -0,0 +1,2666 @@ ++/* ++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+instruct vabsS(vReg dst, vReg src, vReg tmp) %{ -+ match(Set dst (AbsVS src)); -+ ins_cost(VEC_COST); -+ effect(TEMP tmp); -+ format %{ "vrsub.vi $tmp, 0, $src\t#@vabsS\n\t" -+ "vmax.vv $dst, $tmp, $src" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); -+ __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/debugInfoRec.hpp" ++#include "code/icBuffer.hpp" ++#include "code/vtableStubs.hpp" ++#include "compiler/oopMap.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "interpreter/interpreter.hpp" ++#include "logging/log.hpp" ++#include "memory/resourceArea.hpp" ++#include "nativeInst_riscv.hpp" ++#include "oops/compiledICHolder.hpp" ++#include "oops/klass.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/jniHandles.hpp" ++#include "runtime/safepointMechanism.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/signature.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/align.hpp" ++#include "utilities/formatBuffer.hpp" ++#include "vmreg_riscv.inline.hpp" ++#ifdef COMPILER1 ++#include "c1/c1_Runtime1.hpp" ++#endif ++#ifdef COMPILER2 ++#include "adfiles/ad_riscv.hpp" ++#include "opto/runtime.hpp" ++#endif + -+instruct vabsI(vReg dst, vReg src, vReg tmp) %{ -+ match(Set dst (AbsVI src)); -+ ins_cost(VEC_COST); -+ effect(TEMP tmp); -+ format %{ "vrsub.vi $tmp, 0, $src\t#@vabsI\n\t" -+ "vmax.vv $dst, $tmp, $src" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); -+ __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++#define __ masm-> + -+instruct vabsL(vReg dst, vReg src, vReg tmp) %{ -+ match(Set dst (AbsVL src)); -+ ins_cost(VEC_COST); -+ effect(TEMP tmp); -+ format %{ "vrsub.vi $tmp, 0, $src\t#@vabsL\n\t" -+ "vmax.vv $dst, $tmp, $src" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); -+ __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; + -+instruct vabsF(vReg dst, vReg src) %{ -+ match(Set dst (AbsVF src)); -+ ins_cost(VEC_COST); -+ format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsF" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++class SimpleRuntimeFrame { ++public: + -+instruct vabsD(vReg dst, vReg src) %{ -+ match(Set dst (AbsVD src)); -+ ins_cost(VEC_COST); -+ format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsD" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ // Most of the runtime stubs have this simple frame layout. ++ // This class exists to make the layout shared in one place. ++ // Offsets are for compiler stack slots, which are jints. ++ enum layout { ++ // The frame sender code expects that fp will be in the "natural" place and ++ // will override any oopMap setting for it. We must therefore force the layout ++ // so that it agrees with the frame sender code. ++ // we don't expect any arg reg save area so riscv asserts that ++ // frame::arg_reg_save_area_bytes == 0 ++ fp_off = 0, fp_off2, ++ return_off, return_off2, ++ framesize ++ }; ++}; + -+// vector add ++class RegisterSaver { ++ public: ++ RegisterSaver() {} ++ ~RegisterSaver() {} ++ OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); ++ void restore_live_registers(MacroAssembler* masm); + -+instruct vaddB(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (AddVB src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vadd.vv $dst, $src1, $src2\t#@vaddB" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vadd_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ // Offsets into the register save area ++ // Used by deoptimization when it is managing result register ++ // values on its own ++ // gregs:28, float_register:32; except: x1(ra) & x2(sp) & gp(x3) & tp(x4) ++ // |---f0---|<---SP ++ // |---f1---| ++ // | .. | ++ // |---f31--| ++ // |---reserved slot for stack alignment---| ++ // |---x5---| ++ // | x6 | ++ // |---.. --| ++ // |---x31--| ++ // |---fp---| ++ // |---ra---| ++ int f0_offset_in_bytes(void) { ++ return 0; ++ } ++ int reserved_slot_offset_in_bytes(void) { ++ return f0_offset_in_bytes() + ++ FloatRegisterImpl::max_slots_per_register * ++ FloatRegisterImpl::number_of_registers * ++ BytesPerInt; ++ } + -+instruct vaddS(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (AddVS src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vadd.vv $dst, $src1, $src2\t#@vaddS" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vadd_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ int reg_offset_in_bytes(Register r) { ++ assert (r->encoding() > 4, "ra, sp, gp and tp not saved"); ++ return reserved_slot_offset_in_bytes() + (r->encoding() - 4 /* x1, x2, x3, x4 */) * wordSize; ++ } + -+instruct vaddI(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (AddVI src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vadd.vv $dst, $src1, $src2\t#@vaddI" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vadd_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ int freg_offset_in_bytes(FloatRegister f) { ++ return f0_offset_in_bytes() + f->encoding() * wordSize; ++ } + -+instruct vaddL(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (AddVL src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vadd.vv $dst, $src1, $src2\t#@vaddL" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vadd_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ int ra_offset_in_bytes(void) { ++ return reserved_slot_offset_in_bytes() + ++ (RegisterImpl::number_of_registers - 3) * ++ RegisterImpl::max_slots_per_register * ++ BytesPerInt; ++ } ++}; + -+instruct vaddF(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (AddVF src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddF" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfadd_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { ++ assert_cond(masm != NULL && total_frame_words != NULL); ++ int frame_size_in_bytes = align_up(additional_frame_words * wordSize + ra_offset_in_bytes() + wordSize, 16); ++ // OopMap frame size is in compiler stack slots (jint's) not bytes or words ++ int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; ++ // The caller will allocate additional_frame_words ++ int additional_frame_slots = additional_frame_words * wordSize / BytesPerInt; ++ // CodeBlob frame size is in words. ++ int frame_size_in_words = frame_size_in_bytes / wordSize; ++ *total_frame_words = frame_size_in_words; + -+instruct vaddD(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (AddVD src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddD" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfadd_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ // Save Integer and Float registers. ++ __ enter(); ++ __ push_CPU_state(); + -+// vector and ++ // Set an oopmap for the call site. This oopmap will map all ++ // oop-registers and debug-info registers as callee-saved. This ++ // will allow deoptimization at this safepoint to find all possible ++ // debug-info recordings, as well as let GC find all oops. + -+instruct vand(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (AndV src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vand.vv $dst, $src1, $src2\t#@vand" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vand_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* oop_map = new OopMap(frame_size_in_slots, 0); ++ assert_cond(oop_maps != NULL && oop_map != NULL); + -+// vector or ++ int sp_offset_in_slots = 0; ++ int step_in_slots = 0; + -+instruct vor(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (OrV src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vor.vv $dst, $src1, $src2\t#@vor" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vor_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ step_in_slots = FloatRegisterImpl::max_slots_per_register; ++ for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) { ++ FloatRegister r = as_FloatRegister(i); ++ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg()); ++ } + -+// vector xor ++ step_in_slots = RegisterImpl::max_slots_per_register; ++ // skip the slot reserved for alignment, see MacroAssembler::push_reg; ++ // also skip x5 ~ x6 on the stack because they are caller-saved registers. ++ sp_offset_in_slots += RegisterImpl::max_slots_per_register * 3; ++ // besides, we ignore x0 ~ x4 because push_CPU_state won't push them on the stack. ++ for (int i = 7; i < RegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) { ++ Register r = as_Register(i); ++ if (r != xthread) { ++ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots + additional_frame_slots), r->as_VMReg()); ++ } ++ } + -+instruct vxor(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (XorV src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vxor.vv $dst, $src1, $src2\t#@vxor" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vxor_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ return oop_map; ++} + -+// vector float div ++void RegisterSaver::restore_live_registers(MacroAssembler* masm) { ++ assert_cond(masm != NULL); ++ __ pop_CPU_state(); ++ __ leave(); ++} + -+instruct vdivF(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (DivVF src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vfdiv.vv $dst, $src1, $src2\t#@vdivF" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfdiv_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++// Is vector's size (in bytes) bigger than a size saved by default? ++bool SharedRuntime::is_wide_vector(int size) { ++ return false; ++} + -+instruct vdivD(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (DivVD src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vfdiv.vv $dst, $src1, $src2\t#@vdivD" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfdiv_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++size_t SharedRuntime::trampoline_size() { ++ return 6 * NativeInstruction::instruction_size; ++} + -+// vector integer max/min ++void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) { ++ int32_t offset = 0; ++ __ movptr_with_offset(t0, destination, offset); ++ __ jalr(x0, t0, offset); ++} + -+instruct vmax(vReg dst, vReg src1, vReg src2) %{ -+ predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT && -+ n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE); -+ match(Set dst (MaxV src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vmax.vv $dst, $src1, $src2\t#@vmax" %} -+ ins_encode %{ -+ BasicType bt = Matcher::vector_element_basic_type(this); -+ Assembler::SEW sew = Assembler::elemtype_to_sew(bt); -+ __ vsetvli(t0, x0, sew); -+ __ vmax_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++// The java_calling_convention describes stack locations as ideal slots on ++// a frame with no abi restrictions. Since we must observe abi restrictions ++// (like the placement of the register window) the slots must be biased by ++// the following value. ++static int reg2offset_in(VMReg r) { ++ // Account for saved fp and ra ++ // This should really be in_preserve_stack_slots ++ return r->reg2stack() * VMRegImpl::stack_slot_size; ++} + -+instruct vmin(vReg dst, vReg src1, vReg src2) %{ -+ predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT && -+ n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE); -+ match(Set dst (MinV src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vmin.vv $dst, $src1, $src2\t#@vmin" %} -+ ins_encode %{ -+ BasicType bt = Matcher::vector_element_basic_type(this); -+ Assembler::SEW sew = Assembler::elemtype_to_sew(bt); -+ __ vsetvli(t0, x0, sew); -+ __ vmin_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++static int reg2offset_out(VMReg r) { ++ return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; ++} + -+// vector float-point max/min ++// --------------------------------------------------------------------------- ++// Read the array of BasicTypes from a signature, and compute where the ++// arguments should go. Values in the VMRegPair regs array refer to 4-byte ++// quantities. Values less than VMRegImpl::stack0 are registers, those above ++// refer to 4-byte stack slots. All stack slots are based off of the stack pointer ++// as framesizes are fixed. ++// VMRegImpl::stack0 refers to the first slot 0(sp). ++// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register ++// up to RegisterImpl::number_of_registers) are the 64-bit ++// integer registers. + -+instruct vmaxF(vReg dst, vReg src1, vReg src2) %{ -+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); -+ match(Set dst (MaxV src1 src2)); -+ effect(TEMP_DEF dst); -+ ins_cost(VEC_COST); -+ format %{ "vmaxF $dst, $src1, $src2\t#@vmaxF" %} -+ ins_encode %{ -+ __ minmax_FD_v(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), -+ false /* is_double */, false /* is_min */); -+ %} -+ ins_pipe(pipe_slow); -+%} ++// Note: the INPUTS in sig_bt are in units of Java argument words, ++// which are 64-bit. The OUTPUTS are in 32-bit units. + -+instruct vmaxD(vReg dst, vReg src1, vReg src2) %{ -+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); -+ match(Set dst (MaxV src1 src2)); -+ effect(TEMP_DEF dst); -+ ins_cost(VEC_COST); -+ format %{ "vmaxD $dst, $src1, $src2\t#@vmaxD" %} -+ ins_encode %{ -+ __ minmax_FD_v(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), -+ true /* is_double */, false /* is_min */); -+ %} -+ ins_pipe(pipe_slow); -+%} ++// The Java calling convention is a "shifted" version of the C ABI. ++// By skipping the first C ABI register we can call non-static jni ++// methods with small numbers of arguments without having to shuffle ++// the arguments at all. Since we control the java ABI we ought to at ++// least get some advantage out of it. + -+instruct vminF(vReg dst, vReg src1, vReg src2) %{ -+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); -+ match(Set dst (MinV src1 src2)); -+ effect(TEMP_DEF dst); -+ ins_cost(VEC_COST); -+ format %{ "vminF $dst, $src1, $src2\t#@vminF" %} -+ ins_encode %{ -+ __ minmax_FD_v(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), -+ false /* is_double */, true /* is_min */); -+ %} -+ ins_pipe(pipe_slow); -+%} ++int SharedRuntime::java_calling_convention(const BasicType *sig_bt, ++ VMRegPair *regs, ++ int total_args_passed, ++ int is_outgoing) { ++ // Create the mapping between argument positions and ++ // registers. ++ static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = { ++ j_rarg0, j_rarg1, j_rarg2, j_rarg3, ++ j_rarg4, j_rarg5, j_rarg6, j_rarg7 ++ }; ++ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = { ++ j_farg0, j_farg1, j_farg2, j_farg3, ++ j_farg4, j_farg5, j_farg6, j_farg7 ++ }; + -+instruct vminD(vReg dst, vReg src1, vReg src2) %{ -+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); -+ match(Set dst (MinV src1 src2)); -+ effect(TEMP_DEF dst); -+ ins_cost(VEC_COST); -+ format %{ "vminD $dst, $src1, $src2\t#@vminD" %} -+ ins_encode %{ -+ __ minmax_FD_v(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), -+ true /* is_double */, true /* is_min */); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ uint int_args = 0; ++ uint fp_args = 0; ++ uint stk_args = 0; // inc by 2 each time + -+// vector fmla ++ for (int i = 0; i < total_args_passed; i++) { ++ switch (sig_bt[i]) { ++ case T_BOOLEAN: // fall through ++ case T_CHAR: // fall through ++ case T_BYTE: // fall through ++ case T_SHORT: // fall through ++ case T_INT: ++ if (int_args < Argument::n_int_register_parameters_j) { ++ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_VOID: ++ // halves of T_LONG or T_DOUBLE ++ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); ++ regs[i].set_bad(); ++ break; ++ case T_LONG: // fall through ++ assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); ++ case T_OBJECT: // fall through ++ case T_ARRAY: // fall through ++ case T_ADDRESS: ++ if (int_args < Argument::n_int_register_parameters_j) { ++ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_FLOAT: ++ if (fp_args < Argument::n_float_register_parameters_j) { ++ regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_DOUBLE: ++ assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); ++ if (fp_args < Argument::n_float_register_parameters_j) { ++ regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } + -+// dst_src1 = dst_src1 + src2 * src3 -+instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{ -+ predicate(UseFMA); -+ match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaF" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfmacc_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ return align_up(stk_args, 2); ++} + -+// dst_src1 = dst_src1 + src2 * src3 -+instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{ -+ predicate(UseFMA); -+ match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaD" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfmacc_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++// Patch the callers callsite with entry to compiled code if it exists. ++static void patch_callers_callsite(MacroAssembler *masm) { ++ assert_cond(masm != NULL); ++ Label L; ++ __ ld(t0, Address(xmethod, in_bytes(Method::code_offset()))); ++ __ beqz(t0, L); + -+// vector fmls ++ __ enter(); ++ __ push_CPU_state(); + -+// dst_src1 = dst_src1 + -src2 * src3 -+// dst_src1 = dst_src1 + src2 * -src3 -+instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{ -+ predicate(UseFMA); -+ match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3))); -+ match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3)))); -+ ins_cost(VEC_COST); -+ format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsF" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ // VM needs caller's callsite ++ // VM needs target method ++ // This needs to be a long call since we will relocate this adapter to ++ // the codeBuffer and it may not reach + -+// dst_src1 = dst_src1 + -src2 * src3 -+// dst_src1 = dst_src1 + src2 * -src3 -+instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{ -+ predicate(UseFMA); -+ match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3))); -+ match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3)))); -+ ins_cost(VEC_COST); -+ format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsD" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++#ifndef PRODUCT ++ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); ++#endif + -+// vector fnmla ++ __ mv(c_rarg0, xmethod); ++ __ mv(c_rarg1, ra); ++ int32_t offset = 0; ++ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)), offset); ++ __ jalr(x1, t0, offset); + -+// dst_src1 = -dst_src1 + -src2 * src3 -+// dst_src1 = -dst_src1 + src2 * -src3 -+instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{ -+ predicate(UseFMA); -+ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3))); -+ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3)))); -+ ins_cost(VEC_COST); -+ format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaF" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ __ pop_CPU_state(); ++ // restore sp ++ __ leave(); ++ __ bind(L); ++} + -+// dst_src1 = -dst_src1 + -src2 * src3 -+// dst_src1 = -dst_src1 + src2 * -src3 -+instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{ -+ predicate(UseFMA); -+ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3))); -+ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3)))); -+ ins_cost(VEC_COST); -+ format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaD" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++static void gen_c2i_adapter(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs, ++ Label& skip_fixup) { ++ // Before we get into the guts of the C2I adapter, see if we should be here ++ // at all. We've come from compiled code and are attempting to jump to the ++ // interpreter, which means the caller made a static call to get here ++ // (vcalls always get a compiled target if there is one). Check for a ++ // compiled target. If there is one, we need to patch the caller's call. ++ patch_callers_callsite(masm); + -+// vector fnmls ++ __ bind(skip_fixup); + -+// dst_src1 = -dst_src1 + src2 * src3 -+instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{ -+ predicate(UseFMA); -+ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsF" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfmsac_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ int words_pushed = 0; + -+// dst_src1 = -dst_src1 + src2 * src3 -+instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{ -+ predicate(UseFMA); -+ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsD" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfmsac_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ // Since all args are passed on the stack, total_args_passed * ++ // Interpreter::stackElementSize is the space we need. + -+// vector mla ++ int extraspace = total_args_passed * Interpreter::stackElementSize; + -+// dst_src1 = dst_src1 + src2 * src3 -+instruct vmlaB(vReg dst_src1, vReg src2, vReg src3) %{ -+ match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaB" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vmacc_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ __ mv(x30, sp); + -+// dst_src1 = dst_src1 + src2 * src3 -+instruct vmlaS(vReg dst_src1, vReg src2, vReg src3) %{ -+ match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaS" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vmacc_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ // stack is aligned, keep it that way ++ extraspace = align_up(extraspace, 2 * wordSize); + -+// dst_src1 = dst_src1 + src2 * src3 -+instruct vmlaI(vReg dst_src1, vReg src2, vReg src3) %{ -+ match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaI" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vmacc_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ if (extraspace) { ++ __ sub(sp, sp, extraspace); ++ } + -+// dst_src1 = dst_src1 + src2 * src3 -+instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) %{ -+ match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaL" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vmacc_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ // Now write the args into the outgoing interpreter space ++ for (int i = 0; i < total_args_passed; i++) { ++ if (sig_bt[i] == T_VOID) { ++ assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half"); ++ continue; ++ } + -+// vector mls ++ // offset to start parameters ++ int st_off = (total_args_passed - i - 1) * Interpreter::stackElementSize; ++ int next_off = st_off - Interpreter::stackElementSize; + -+// dst_src1 = dst_src1 - src2 * src3 -+instruct vmlsB(vReg dst_src1, vReg src2, vReg src3) %{ -+ match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsB" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ // Say 4 args: ++ // i st_off ++ // 0 32 T_LONG ++ // 1 24 T_VOID ++ // 2 16 T_OBJECT ++ // 3 8 T_BOOL ++ // - 0 return address ++ // ++ // However to make thing extra confusing. Because we can fit a Java long/double in ++ // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter ++ // leaves one slot empty and only stores to a single slot. In this case the ++ // slot that is occupied is the T_VOID slot. See I said it was confusing. + -+// dst_src1 = dst_src1 - src2 * src3 -+instruct vmlsS(vReg dst_src1, vReg src2, vReg src3) %{ -+ match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsS" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ VMReg r_1 = regs[i].first(); ++ VMReg r_2 = regs[i].second(); ++ if (!r_1->is_valid()) { ++ assert(!r_2->is_valid(), ""); ++ continue; ++ } ++ if (r_1->is_stack()) { ++ // memory to memory use t0 ++ int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size ++ + extraspace ++ + words_pushed * wordSize); ++ if (!r_2->is_valid()) { ++ __ lwu(t0, Address(sp, ld_off)); ++ __ sd(t0, Address(sp, st_off), /*temp register*/esp); ++ } else { ++ __ ld(t0, Address(sp, ld_off), /*temp register*/esp); + -+// dst_src1 = dst_src1 - src2 * src3 -+instruct vmlsI(vReg dst_src1, vReg src2, vReg src3) %{ -+ match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsI" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG ++ // T_DOUBLE and T_LONG use two slots in the interpreter ++ if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { ++ // ld_off == LSW, ld_off+wordSize == MSW ++ // st_off == MSW, next_off == LSW ++ __ sd(t0, Address(sp, next_off), /*temp register*/esp); ++#ifdef ASSERT ++ // Overwrite the unused slot with known junk ++ __ li(t0, 0xdeadffffdeadaaaaul); ++ __ sd(t0, Address(sp, st_off), /*temp register*/esp); ++#endif /* ASSERT */ ++ } else { ++ __ sd(t0, Address(sp, st_off), /*temp register*/esp); ++ } ++ } ++ } else if (r_1->is_Register()) { ++ Register r = r_1->as_Register(); ++ if (!r_2->is_valid()) { ++ // must be only an int (or less ) so move only 32bits to slot ++ __ sd(r, Address(sp, st_off)); ++ } else { ++ // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG ++ // T_DOUBLE and T_LONG use two slots in the interpreter ++ if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { ++ // long/double in gpr ++#ifdef ASSERT ++ // Overwrite the unused slot with known junk ++ __ li(t0, 0xdeadffffdeadaaabul); ++ __ sd(t0, Address(sp, st_off), /*temp register*/esp); ++#endif /* ASSERT */ ++ __ sd(r, Address(sp, next_off)); ++ } else { ++ __ sd(r, Address(sp, st_off)); ++ } ++ } ++ } else { ++ assert(r_1->is_FloatRegister(), ""); ++ if (!r_2->is_valid()) { ++ // only a float use just part of the slot ++ __ fsw(r_1->as_FloatRegister(), Address(sp, st_off)); ++ } else { ++#ifdef ASSERT ++ // Overwrite the unused slot with known junk ++ __ li(t0, 0xdeadffffdeadaaacul); ++ __ sd(t0, Address(sp, st_off), /*temp register*/esp); ++#endif /* ASSERT */ ++ __ fsd(r_1->as_FloatRegister(), Address(sp, next_off)); ++ } ++ } ++ } + -+// dst_src1 = dst_src1 - src2 * src3 -+instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) %{ -+ match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsL" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ __ mv(esp, sp); // Interp expects args on caller's expression stack + -+// vector mul ++ __ ld(t0, Address(xmethod, in_bytes(Method::interpreter_entry_offset()))); ++ __ jr(t0); ++} + -+instruct vmulB(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (MulVB src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vmul.vv $dst, $src1, $src2\t#@vmulB" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs) { ++ // Cut-out for having no stack args. ++ int comp_words_on_stack = align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, wordSize) >> LogBytesPerWord; ++ if (comp_args_on_stack != 0) { ++ __ sub(t0, sp, comp_words_on_stack * wordSize); ++ __ andi(sp, t0, -16); ++ } + -+instruct vmulS(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (MulVS src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vmul.vv $dst, $src1, $src2\t#@vmulS" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ // Will jump to the compiled code just as if compiled code was doing it. ++ // Pre-load the register-jump target early, to schedule it better. ++ __ ld(t1, Address(xmethod, in_bytes(Method::from_compiled_offset()))); + -+instruct vmulI(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (MulVI src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vmul.vv $dst, $src1, $src2\t#@vmulI" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ // Now generate the shuffle code. ++ for (int i = 0; i < total_args_passed; i++) { ++ if (sig_bt[i] == T_VOID) { ++ assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half"); ++ continue; ++ } + -+instruct vmulL(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (MulVL src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vmul.vv $dst, $src1, $src2\t#@vmulL" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ // Pick up 0, 1 or 2 words from SP+offset. + -+instruct vmulF(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (MulVF src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulF" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), ++ "scrambled load targets?"); ++ // Load in argument order going down. ++ int ld_off = (total_args_passed - i - 1) * Interpreter::stackElementSize; ++ // Point to interpreter value (vs. tag) ++ int next_off = ld_off - Interpreter::stackElementSize; + -+instruct vmulD(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (MulVD src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulD" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ VMReg r_1 = regs[i].first(); ++ VMReg r_2 = regs[i].second(); ++ if (!r_1->is_valid()) { ++ assert(!r_2->is_valid(), ""); ++ continue; ++ } ++ if (r_1->is_stack()) { ++ // Convert stack slot to an SP offset (+ wordSize to account for return address ) ++ int st_off = regs[i].first()->reg2stack() * VMRegImpl::stack_slot_size; ++ if (!r_2->is_valid()) { ++ __ lw(t0, Address(esp, ld_off)); ++ __ sd(t0, Address(sp, st_off), /*temp register*/t2); ++ } else { ++ // ++ // We are using two optoregs. This can be either T_OBJECT, ++ // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates ++ // two slots but only uses one for thr T_LONG or T_DOUBLE case ++ // So we must adjust where to pick up the data to match the ++ // interpreter. ++ // ++ // Interpreter local[n] == MSW, local[n+1] == LSW however locals ++ // are accessed as negative so LSW is at LOW address + -+// vector fneg ++ // ld_off is MSW so get LSW ++ const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? ++ next_off : ld_off; ++ __ ld(t0, Address(esp, offset)); ++ // st_off is LSW (i.e. reg.first()) ++ __ sd(t0, Address(sp, st_off), /*temp register*/t2); ++ } ++ } else if (r_1->is_Register()) { // Register argument ++ Register r = r_1->as_Register(); ++ if (r_2->is_valid()) { ++ // ++ // We are using two VMRegs. This can be either T_OBJECT, ++ // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates ++ // two slots but only uses one for thr T_LONG or T_DOUBLE case ++ // So we must adjust where to pick up the data to match the ++ // interpreter. + -+instruct vnegF(vReg dst, vReg src) %{ -+ match(Set dst (NegVF src)); -+ ins_cost(VEC_COST); -+ format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegF" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? ++ next_off : ld_off; + -+instruct vnegD(vReg dst, vReg src) %{ -+ match(Set dst (NegVD src)); -+ ins_cost(VEC_COST); -+ format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegD" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ // this can be a misaligned move ++ __ ld(r, Address(esp, offset)); ++ } else { ++ // sign extend and use a full word? ++ __ lw(r, Address(esp, ld_off)); ++ } ++ } else { ++ if (!r_2->is_valid()) { ++ __ flw(r_1->as_FloatRegister(), Address(esp, ld_off)); ++ } else { ++ __ fld(r_1->as_FloatRegister(), Address(esp, next_off)); ++ } ++ } ++ } + -+// popcount vector ++ // 6243940 We might end up in handle_wrong_method if ++ // the callee is deoptimized as we race thru here. If that ++ // happens we don't want to take a safepoint because the ++ // caller frame will look interpreted and arguments are now ++ // "compiled" so it is much better to make this transition ++ // invisible to the stack walking code. Unfortunately if ++ // we try and find the callee by normal means a safepoint ++ // is possible. So we stash the desired callee in the thread ++ // and the vm will find there should this case occur. + -+instruct vpopcountI(iRegINoSp dst, vReg src) %{ -+ match(Set dst (PopCountVI src)); -+ format %{ "vpopc.m $dst, $src\t#@vpopcountI" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vpopc_m(as_Register($dst$$reg), as_VectorRegister($src$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ __ sd(xmethod, Address(xthread, JavaThread::callee_target_offset())); + -+// vector add reduction ++ __ jr(t1); ++} + -+instruct reduce_addB(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ -+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); -+ match(Set dst (AddReductionVI src1 src2)); -+ effect(TEMP tmp); -+ ins_cost(VEC_COST); -+ format %{ "vmv.s.x $tmp, $src1\t#@reduce_addB\n\t" -+ "vredsum.vs $tmp, $src2, $tmp\n\t" -+ "vmv.x.s $dst, $tmp" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); -+ __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), -+ as_VectorRegister($tmp$$reg)); -+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++// --------------------------------------------------------------- ++AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs, ++ AdapterFingerPrint* fingerprint) { ++ address i2c_entry = __ pc(); ++ gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); + -+instruct reduce_addS(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ -+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); -+ match(Set dst (AddReductionVI src1 src2)); -+ effect(TEMP tmp); -+ ins_cost(VEC_COST); -+ format %{ "vmv.s.x $tmp, $src1\t#@reduce_addS\n\t" -+ "vredsum.vs $tmp, $src2, $tmp\n\t" -+ "vmv.x.s $dst, $tmp" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); -+ __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), -+ as_VectorRegister($tmp$$reg)); -+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ address c2i_unverified_entry = __ pc(); ++ Label skip_fixup; + -+instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ -+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); -+ match(Set dst (AddReductionVI src1 src2)); -+ effect(TEMP tmp); -+ ins_cost(VEC_COST); -+ format %{ "vmv.s.x $tmp, $src1\t#@reduce_addI\n\t" -+ "vredsum.vs $tmp, $src2, $tmp\n\t" -+ "vmv.x.s $dst, $tmp" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); -+ __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), -+ as_VectorRegister($tmp$$reg)); -+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ Label ok; + -+instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ -+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); -+ match(Set dst (AddReductionVL src1 src2)); -+ effect(TEMP tmp); -+ ins_cost(VEC_COST); -+ format %{ "vmv.s.x $tmp, $src1\t#@reduce_addL\n\t" -+ "vredsum.vs $tmp, $src2, $tmp\n\t" -+ "vmv.x.s $dst, $tmp" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); -+ __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), -+ as_VectorRegister($tmp$$reg)); -+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ const Register holder = t1; ++ const Register receiver = j_rarg0; ++ const Register tmp = t2; // A call-clobbered register not used for arg passing + -+instruct reduce_addF(fRegF src1_dst, vReg src2, vReg tmp) %{ -+ match(Set src1_dst (AddReductionVF src1_dst src2)); -+ effect(TEMP tmp); -+ ins_cost(VEC_COST); -+ format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addF\n\t" -+ "vfredosum.vs $tmp, $src2, $tmp\n\t" -+ "vfmv.f.s $src1_dst, $tmp" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister); -+ __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), -+ as_VectorRegister($tmp$$reg)); -+ __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ // ------------------------------------------------------------------------- ++ // Generate a C2I adapter. On entry we know xmethod holds the Method* during calls ++ // to the interpreter. The args start out packed in the compiled layout. They ++ // need to be unpacked into the interpreter layout. This will almost always ++ // require some stack space. We grow the current (compiled) stack, then repack ++ // the args. We finally end in a jump to the generic interpreter entry point. ++ // On exit from the interpreter, the interpreter will restore our SP (lest the ++ // compiled code, which relys solely on SP and not FP, get sick). + -+instruct reduce_addD(fRegD src1_dst, vReg src2, vReg tmp) %{ -+ match(Set src1_dst (AddReductionVD src1_dst src2)); -+ effect(TEMP tmp); -+ ins_cost(VEC_COST); -+ format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addD\n\t" -+ "vfredosum.vs $tmp, $src2, $tmp\n\t" -+ "vfmv.f.s $src1_dst, $tmp" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister); -+ __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), -+ as_VectorRegister($tmp$$reg)); -+ __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ { ++ __ block_comment("c2i_unverified_entry {"); ++ __ load_klass(t0, receiver); ++ __ ld(tmp, Address(holder, CompiledICHolder::holder_klass_offset())); ++ __ ld(xmethod, Address(holder, CompiledICHolder::holder_metadata_offset())); ++ __ beq(t0, tmp, ok); ++ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + -+// vector integer max reduction -+instruct vreduce_maxB(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{ -+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); -+ match(Set dst (MaxReductionV src1 src2)); -+ ins_cost(VEC_COST); -+ effect(TEMP tmp); -+ format %{ "vreduce_maxB $dst, $src1, $src2, $tmp" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg)); -+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); -+ Label Ldone; -+ __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone); -+ __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); -+ __ bind(Ldone); -+ %} -+ ins_pipe(pipe_slow); -+%} -+ -+instruct vreduce_maxS(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{ -+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); -+ match(Set dst (MaxReductionV src1 src2)); -+ ins_cost(VEC_COST); -+ effect(TEMP tmp); -+ format %{ "vreduce_maxS $dst, $src1, $src2, $tmp" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg)); -+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); -+ Label Ldone; -+ __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone); -+ __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); -+ __ bind(Ldone); -+ %} -+ ins_pipe(pipe_slow); -+%} -+ -+instruct vreduce_maxI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ -+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); -+ match(Set dst (MaxReductionV src1 src2)); -+ ins_cost(VEC_COST); -+ effect(TEMP tmp); -+ format %{ "vreduce_maxI $dst, $src1, $src2, $tmp" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); -+ __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg)); -+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} -+ -+instruct vreduce_maxL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ -+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); -+ match(Set dst (MaxReductionV src1 src2)); -+ ins_cost(VEC_COST); -+ effect(TEMP tmp); -+ format %{ "vreduce_maxL $dst, $src1, $src2, $tmp" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); -+ __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg)); -+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} -+ -+// vector integer min reduction -+instruct vreduce_minB(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{ -+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); -+ match(Set dst (MinReductionV src1 src2)); -+ ins_cost(VEC_COST); -+ effect(TEMP tmp); -+ format %{ "vreduce_minB $dst, $src1, $src2, $tmp" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg)); -+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); -+ Label Ldone; -+ __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone); -+ __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); -+ __ bind(Ldone); -+ %} -+ ins_pipe(pipe_slow); -+%} -+ -+instruct vreduce_minS(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{ -+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); -+ match(Set dst (MinReductionV src1 src2)); -+ ins_cost(VEC_COST); -+ effect(TEMP tmp); -+ format %{ "vreduce_minS $dst, $src1, $src2, $tmp" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg)); -+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); -+ Label Ldone; -+ __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone); -+ __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); -+ __ bind(Ldone); -+ %} -+ ins_pipe(pipe_slow); -+%} -+ -+instruct vreduce_minI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ -+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); -+ match(Set dst (MinReductionV src1 src2)); -+ ins_cost(VEC_COST); -+ effect(TEMP tmp); -+ format %{ "vreduce_minI $dst, $src1, $src2, $tmp" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); -+ __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg)); -+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} -+ -+instruct vreduce_minL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ -+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); -+ match(Set dst (MinReductionV src1 src2)); -+ ins_cost(VEC_COST); -+ effect(TEMP tmp); -+ format %{ "vreduce_minL $dst, $src1, $src2, $tmp" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); -+ __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg)); -+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ __ bind(ok); ++ // Method might have been compiled since the call site was patched to ++ // interpreted; if that is the case treat it as a miss so we can get ++ // the call site corrected. ++ __ ld(t0, Address(xmethod, in_bytes(Method::code_offset()))); ++ __ beqz(t0, skip_fixup); ++ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); ++ __ block_comment("} c2i_unverified_entry"); ++ } + -+// vector float max reduction ++ address c2i_entry = __ pc(); + -+instruct vreduce_maxF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{ -+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); -+ match(Set dst (MaxReductionV src1 src2)); -+ ins_cost(VEC_COST); -+ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); -+ format %{ "reduce_maxF $dst, $src1, $src2, $tmp1, $tmp2" %} -+ ins_encode %{ -+ __ reduce_minmax_FD_v($dst$$FloatRegister, -+ $src1$$FloatRegister, as_VectorRegister($src2$$reg), -+ as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), -+ false /* is_double */, false /* is_min */); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); + -+instruct vreduce_maxD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{ -+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); -+ match(Set dst (MaxReductionV src1 src2)); -+ ins_cost(VEC_COST); -+ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); -+ format %{ "reduce_maxD $dst, $src1, $src2, $tmp1, $tmp2" %} -+ ins_encode %{ -+ __ reduce_minmax_FD_v($dst$$FloatRegister, -+ $src1$$FloatRegister, as_VectorRegister($src2$$reg), -+ as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), -+ true /* is_double */, false /* is_min */); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ __ flush(); ++ return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); ++} + -+// vector float min reduction ++int SharedRuntime::c_calling_convention(const BasicType *sig_bt, ++ VMRegPair *regs, ++ VMRegPair *regs2, ++ int total_args_passed) { ++ assert(regs2 == NULL, "not needed on riscv"); + -+instruct vreduce_minF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{ -+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); -+ match(Set dst (MinReductionV src1 src2)); -+ ins_cost(VEC_COST); -+ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); -+ format %{ "reduce_minF $dst, $src1, $src2, $tmp1, $tmp2" %} -+ ins_encode %{ -+ __ reduce_minmax_FD_v($dst$$FloatRegister, -+ $src1$$FloatRegister, as_VectorRegister($src2$$reg), -+ as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), -+ false /* is_double */, true /* is_min */); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ // We return the amount of VMRegImpl stack slots we need to reserve for all ++ // the arguments NOT counting out_preserve_stack_slots. + -+instruct vreduce_minD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{ -+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); -+ match(Set dst (MinReductionV src1 src2)); -+ ins_cost(VEC_COST); -+ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); -+ format %{ "reduce_minD $dst, $src1, $src2, $tmp1, $tmp2" %} -+ ins_encode %{ -+ __ reduce_minmax_FD_v($dst$$FloatRegister, -+ $src1$$FloatRegister, as_VectorRegister($src2$$reg), -+ as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), -+ true /* is_double */, true /* is_min */); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { ++ c_rarg0, c_rarg1, c_rarg2, c_rarg3, ++ c_rarg4, c_rarg5, c_rarg6, c_rarg7 ++ }; ++ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_c] = { ++ c_farg0, c_farg1, c_farg2, c_farg3, ++ c_farg4, c_farg5, c_farg6, c_farg7 ++ }; + -+// vector Math.rint, floor, ceil ++ uint int_args = 0; ++ uint fp_args = 0; ++ uint stk_args = 0; // inc by 2 each time + -+instruct vroundD(vReg dst, vReg src, immI rmode) %{ -+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); -+ match(Set dst (RoundDoubleModeV src rmode)); -+ format %{ "vroundD $dst, $src, $rmode" %} -+ ins_encode %{ -+ switch ($rmode$$constant) { -+ case RoundDoubleModeNode::rmode_rint: -+ __ csrwi(CSR_FRM, C2_MacroAssembler::rne); -+ __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); ++ for (int i = 0; i < total_args_passed; i++) { ++ switch (sig_bt[i]) { ++ case T_BOOLEAN: // fall through ++ case T_CHAR: // fall through ++ case T_BYTE: // fall through ++ case T_SHORT: // fall through ++ case T_INT: ++ if (int_args < Argument::n_int_register_parameters_c) { ++ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_LONG: // fall through ++ assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); ++ case T_OBJECT: // fall through ++ case T_ARRAY: // fall through ++ case T_ADDRESS: // fall through ++ case T_METADATA: ++ if (int_args < Argument::n_int_register_parameters_c) { ++ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_FLOAT: ++ if (fp_args < Argument::n_float_register_parameters_c) { ++ regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); ++ } else if (int_args < Argument::n_int_register_parameters_c) { ++ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } + break; -+ case RoundDoubleModeNode::rmode_floor: -+ __ csrwi(CSR_FRM, C2_MacroAssembler::rdn); -+ __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); ++ case T_DOUBLE: ++ assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); ++ if (fp_args < Argument::n_float_register_parameters_c) { ++ regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); ++ } else if (int_args < Argument::n_int_register_parameters_c) { ++ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } + break; -+ case RoundDoubleModeNode::rmode_ceil: -+ __ csrwi(CSR_FRM, C2_MacroAssembler::rup); -+ __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); ++ case T_VOID: // Halves of longs and doubles ++ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); ++ regs[i].set_bad(); + break; + default: + ShouldNotReachHere(); -+ break; + } -+ %} -+ ins_pipe(pipe_slow); -+%} -+ -+// vector replicate -+ -+instruct replicateB(vReg dst, iRegIorL2I src) %{ -+ match(Set dst (ReplicateB src)); -+ ins_cost(VEC_COST); -+ format %{ "vmv.v.x $dst, $src\t#@replicateB" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} -+ -+instruct replicateS(vReg dst, iRegIorL2I src) %{ -+ match(Set dst (ReplicateS src)); -+ ins_cost(VEC_COST); -+ format %{ "vmv.v.x $dst, $src\t#@replicateS" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} -+ -+instruct replicateI(vReg dst, iRegIorL2I src) %{ -+ match(Set dst (ReplicateI src)); -+ ins_cost(VEC_COST); -+ format %{ "vmv.v.x $dst, $src\t#@replicateI" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} -+ -+instruct replicateL(vReg dst, iRegL src) %{ -+ match(Set dst (ReplicateL src)); -+ ins_cost(VEC_COST); -+ format %{ "vmv.v.x $dst, $src\t#@replicateL" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} -+ -+instruct replicateB_imm5(vReg dst, immI5 con) %{ -+ match(Set dst (ReplicateB con)); -+ ins_cost(VEC_COST); -+ format %{ "vmv.v.i $dst, $con\t#@replicateB_imm5" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); -+ %} -+ ins_pipe(pipe_slow); -+%} -+ -+instruct replicateS_imm5(vReg dst, immI5 con) %{ -+ match(Set dst (ReplicateS con)); -+ ins_cost(VEC_COST); -+ format %{ "vmv.v.i $dst, $con\t#@replicateS_imm5" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); -+ %} -+ ins_pipe(pipe_slow); -+%} -+ -+instruct replicateI_imm5(vReg dst, immI5 con) %{ -+ match(Set dst (ReplicateI con)); -+ ins_cost(VEC_COST); -+ format %{ "vmv.v.i $dst, $con\t#@replicateI_imm5" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); -+ %} -+ ins_pipe(pipe_slow); -+%} -+ -+instruct replicateL_imm5(vReg dst, immL5 con) %{ -+ match(Set dst (ReplicateL con)); -+ ins_cost(VEC_COST); -+ format %{ "vmv.v.i $dst, $con\t#@replicateL_imm5" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); -+ %} -+ ins_pipe(pipe_slow); -+%} -+ -+instruct replicateF(vReg dst, fRegF src) %{ -+ match(Set dst (ReplicateF src)); -+ ins_cost(VEC_COST); -+ format %{ "vfmv.v.f $dst, $src\t#@replicateF" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister); -+ %} -+ ins_pipe(pipe_slow); -+%} -+ -+instruct replicateD(vReg dst, fRegD src) %{ -+ match(Set dst (ReplicateD src)); -+ ins_cost(VEC_COST); -+ format %{ "vfmv.v.f $dst, $src\t#@replicateD" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister); -+ %} -+ ins_pipe(pipe_slow); -+%} -+ -+// vector shift -+ -+instruct vasrB(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (RShiftVB src shift)); -+ ins_cost(VEC_COST); -+ effect(TEMP_DEF dst); -+ format %{ "vmsgtu.vi v0, $shift 7\t#@vasrB\n\t" -+ "vsra.vi $dst, $src, 7, Assembler::v0_t\n\t" -+ "vmnot.m v0, v0\n\t" -+ "vsra.vv $dst, $src, $shift, Assembler::v0_t" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ // if shift > BitsPerByte - 1, clear the low BitsPerByte - 1 bits -+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); -+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ BitsPerByte - 1, Assembler::v0_t); -+ // otherwise, shift -+ __ vmnot_m(v0, v0); -+ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg), Assembler::v0_t); -+ %} -+ ins_pipe(pipe_slow); -+%} -+ -+instruct vasrS(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (RShiftVS src shift)); -+ ins_cost(VEC_COST); -+ effect(TEMP_DEF dst); -+ format %{ "vmsgtu.vi v0, $shift, 15\t#@vasrS\n\t" -+ "vsra.vi $dst, $src, 15, Assembler::v0_t\n\t" -+ "vmnot.m v0, v0\n\t" -+ "vsra.vv $dst, $src, $shift, Assembler::v0_t" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ // if shift > BitsPerShort - 1, clear the low BitsPerShort - 1 bits -+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); -+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ BitsPerShort - 1, Assembler::v0_t); -+ // otherwise, shift -+ __ vmnot_m(v0, v0); -+ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg), Assembler::v0_t); -+ %} -+ ins_pipe(pipe_slow); -+%} -+ -+instruct vasrI(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (RShiftVI src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsra.vv $dst, $src, $shift\t#@vasrI" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} -+ -+instruct vasrL(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (RShiftVL src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsra.vv $dst, $src, $shift\t#@vasrL" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} -+ -+instruct vlslB(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (LShiftVB src shift)); -+ ins_cost(VEC_COST); -+ effect( TEMP_DEF dst); -+ format %{ "vmsgtu.vi v0, $shift, 7\t#@vlslB\n\t" -+ "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" -+ "vmnot.m v0, v0\n\t" -+ "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ // if shift > BitsPerByte - 1, clear the element -+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); -+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg), Assembler::v0_t); -+ // otherwise, shift -+ __ vmnot_m(v0, v0); -+ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg), Assembler::v0_t); -+ %} -+ ins_pipe(pipe_slow); -+%} -+ -+instruct vlslS(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (LShiftVS src shift)); -+ ins_cost(VEC_COST); -+ effect(TEMP_DEF dst); -+ format %{ "vmsgtu.vi v0, $shift, 15\t#@vlslS\n\t" -+ "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" -+ "vmnot.m v0, v0\n\t" -+ "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ // if shift > BitsPerShort - 1, clear the element -+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); -+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg), Assembler::v0_t); -+ // otherwise, shift -+ __ vmnot_m(v0, v0); -+ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg), Assembler::v0_t); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ } + -+instruct vlslI(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (LShiftVI src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsll.vv $dst, $src, $shift\t#@vlslI" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ return stk_args; ++} + -+instruct vlslL(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (LShiftVL src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsll.vv $dst, $src, $shift\t# vector (D)" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++// On 64 bit we will store integer like items to the stack as ++// 64 bits items (riscv64 abi) even though java would only store ++// 32bits for a parameter. On 32bit it will simply be 32 bits ++// So this routine will do 32->32 on 32bit and 32->64 on 64bit ++static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ assert_cond(masm != NULL); ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ // stack to stack ++ __ ld(t0, Address(fp, reg2offset_in(src.first()))); ++ __ sd(t0, Address(sp, reg2offset_out(dst.first()))); ++ } else { ++ // stack to reg ++ __ lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); ++ } ++ } else if (dst.first()->is_stack()) { ++ // reg to stack ++ __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); ++ } else { ++ if (dst.first() != src.first()) { ++ // 32bits extend sign ++ __ addw(dst.first()->as_Register(), src.first()->as_Register(), zr); ++ } ++ } ++} + -+instruct vlsrB(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (URShiftVB src shift)); -+ ins_cost(VEC_COST); -+ effect(TEMP_DEF dst); -+ format %{ "vmsgtu.vi v0, $shift, 7\t#@vlsrB\n\t" -+ "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" -+ "vmnot.m v0, v0, v0\n\t" -+ "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ // if shift > BitsPerByte - 1, clear the element -+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); -+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg), Assembler::v0_t); -+ // otherwise, shift -+ __ vmnot_m(v0, v0); -+ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg), Assembler::v0_t); -+ %} -+ ins_pipe(pipe_slow); -+%} ++// An oop arg. Must pass a handle not the oop itself ++static void object_move(MacroAssembler* masm, ++ OopMap* map, ++ int oop_handle_offset, ++ int framesize_in_slots, ++ VMRegPair src, ++ VMRegPair dst, ++ bool is_receiver, ++ int* receiver_offset) { ++ assert_cond(masm != NULL && map != NULL && receiver_offset != NULL); ++ // must pass a handle. First figure out the location we use as a handle ++ Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register(); + -+instruct vlsrS(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (URShiftVS src shift)); -+ ins_cost(VEC_COST); -+ effect(TEMP_DEF dst); -+ format %{ "vmsgtu.vi v0, $shift, 15\t#@vlsrS\n\t" -+ "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" -+ "vmnot.m v0, v0\n\t" -+ "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ // if shift > BitsPerShort - 1, clear the element -+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); -+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg), Assembler::v0_t); -+ // otherwise, shift -+ __ vmnot_m(v0, v0); -+ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg), Assembler::v0_t); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ // See if oop is NULL if it is we need no handle + ++ if (src.first()->is_stack()) { + -+instruct vlsrI(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (URShiftVI src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrI" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ // Oop is already on the stack as an argument ++ int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); ++ map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); ++ if (is_receiver) { ++ *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; ++ } + ++ __ ld(t0, Address(fp, reg2offset_in(src.first()))); ++ __ la(rHandle, Address(fp, reg2offset_in(src.first()))); ++ // conditionally move a NULL ++ Label notZero1; ++ __ bnez(t0, notZero1); ++ __ mv(rHandle, zr); ++ __ bind(notZero1); ++ } else { + -+instruct vlsrL(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (URShiftVL src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrL" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ // Oop is in an a register we must store it to the space we reserve ++ // on the stack for oop_handles and pass a handle if oop is non-NULL + -+instruct vasrB_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (RShiftVB src (RShiftCntV shift))); -+ ins_cost(VEC_COST); -+ format %{ "vsra.vi $dst, $src, $shift\t#@vasrB_imm" %} -+ ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e8); -+ if (con == 0) { -+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; ++ const Register rOop = src.first()->as_Register(); ++ int oop_slot = -1; ++ if (rOop == j_rarg0) { ++ oop_slot = 0; ++ } else if (rOop == j_rarg1) { ++ oop_slot = 1; ++ } else if (rOop == j_rarg2) { ++ oop_slot = 2; ++ } else if (rOop == j_rarg3) { ++ oop_slot = 3; ++ } else if (rOop == j_rarg4) { ++ oop_slot = 4; ++ } else if (rOop == j_rarg5) { ++ oop_slot = 5; ++ } else if (rOop == j_rarg6) { ++ oop_slot = 6; ++ } else { ++ assert(rOop == j_rarg7, "wrong register"); ++ oop_slot = 7; + } -+ if (con >= BitsPerByte) con = BitsPerByte - 1; -+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -+ %} -+ ins_pipe(pipe_slow); -+%} + -+instruct vasrS_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (RShiftVS src (RShiftCntV shift))); -+ ins_cost(VEC_COST); -+ format %{ "vsra.vi $dst, $src, $shift\t#@vasrS_imm" %} -+ ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e16); -+ if (con == 0) { -+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; -+ } -+ if (con >= BitsPerShort) con = BitsPerShort - 1; -+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset; ++ int offset = oop_slot * VMRegImpl::stack_slot_size; + -+instruct vasrI_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (RShiftVI src (RShiftCntV shift))); -+ ins_cost(VEC_COST); -+ format %{ "vsrl.vi $dst, $src, $shift\t#@vasrI_imm" %} -+ ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e32); -+ if (con == 0) { -+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; ++ map->set_oop(VMRegImpl::stack2reg(oop_slot)); ++ // Store oop in handle area, may be NULL ++ __ sd(rOop, Address(sp, offset)); ++ if (is_receiver) { ++ *receiver_offset = offset; + } -+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -+ %} -+ ins_pipe(pipe_slow); -+%} + -+instruct vasrL_imm(vReg dst, vReg src, immI shift) %{ -+ predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32); -+ match(Set dst (RShiftVL src (RShiftCntV shift))); -+ ins_cost(VEC_COST); -+ format %{ "vsrl.vi $dst, $src, $shift\t#@vasrL_imm" %} -+ ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e64); -+ if (con == 0) { -+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; ++ //rOop maybe the same as rHandle ++ if (rOop == rHandle) { ++ Label isZero; ++ __ beqz(rOop, isZero); ++ __ la(rHandle, Address(sp, offset)); ++ __ bind(isZero); ++ } else { ++ Label notZero2; ++ __ la(rHandle, Address(sp, offset)); ++ __ bnez(rOop, notZero2); ++ __ mv(rHandle, zr); ++ __ bind(notZero2); + } -+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ } + -+instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (URShiftVB src (RShiftCntV shift))); -+ ins_cost(VEC_COST); -+ format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrB_imm" %} -+ ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e8); -+ if (con == 0) { -+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; ++ // If arg is on the stack then place it otherwise it is already in correct reg. ++ if (dst.first()->is_stack()) { ++ __ sd(rHandle, Address(sp, reg2offset_out(dst.first()))); ++ } ++} ++ ++// A float arg may have to do float reg int reg conversion ++static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ assert(src.first()->is_stack() && dst.first()->is_stack() || ++ src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); ++ assert_cond(masm != NULL); ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ __ lwu(t0, Address(fp, reg2offset_in(src.first()))); ++ __ sw(t0, Address(sp, reg2offset_out(dst.first()))); ++ } else if (dst.first()->is_Register()) { ++ __ lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); ++ } else { ++ ShouldNotReachHere(); + } -+ if (con >= BitsPerByte) { -+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; ++ } else if (src.first() != dst.first()) { ++ if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { ++ __ fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); ++ } else { ++ ShouldNotReachHere(); + } -+ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ } ++} + -+instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (URShiftVS src (RShiftCntV shift))); -+ ins_cost(VEC_COST); -+ format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrS_imm" %} -+ ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e16); -+ if (con == 0) { -+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; ++// A long move ++static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ assert_cond(masm != NULL); ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ // stack to stack ++ __ ld(t0, Address(fp, reg2offset_in(src.first()))); ++ __ sd(t0, Address(sp, reg2offset_out(dst.first()))); ++ } else { ++ // stack to reg ++ __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); + } -+ if (con >= BitsPerShort) { -+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; ++ } else if (dst.first()->is_stack()) { ++ // reg to stack ++ __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); ++ } else { ++ if (dst.first() != src.first()) { ++ __ mv(dst.first()->as_Register(), src.first()->as_Register()); + } -+ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ } ++} + -+instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (URShiftVI src (RShiftCntV shift))); -+ ins_cost(VEC_COST); -+ format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrI_imm" %} -+ ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e32); -+ if (con == 0) { -+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; ++// A double move ++static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ assert(src.first()->is_stack() && dst.first()->is_stack() || ++ src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); ++ assert_cond(masm != NULL); ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ __ ld(t0, Address(fp, reg2offset_in(src.first()))); ++ __ sd(t0, Address(sp, reg2offset_out(dst.first()))); ++ } else if (dst.first()-> is_Register()) { ++ __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); ++ } else { ++ ShouldNotReachHere(); + } -+ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -+ %} -+ ins_pipe(pipe_slow); -+%} -+ -+instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{ -+ predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32); -+ match(Set dst (URShiftVL src (RShiftCntV shift))); -+ ins_cost(VEC_COST); -+ format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrL_imm" %} -+ ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e64); -+ if (con == 0) { -+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; ++ } else if (src.first() != dst.first()) { ++ if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { ++ __ fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); ++ } else { ++ ShouldNotReachHere(); + } -+ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ } ++} + -+instruct vlslB_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (LShiftVB src (LShiftCntV shift))); -+ ins_cost(VEC_COST); -+ format %{ "vsll.vi $dst, $src, $shift\t#@vlslB_imm" %} -+ ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e8); -+ if (con >= BitsPerByte) { -+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; ++void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { ++ assert_cond(masm != NULL); ++ // We always ignore the frame_slots arg and just use the space just below frame pointer ++ // which by this time is free to use ++ switch (ret_type) { ++ case T_FLOAT: ++ __ fsw(f10, Address(fp, -3 * wordSize)); ++ break; ++ case T_DOUBLE: ++ __ fsd(f10, Address(fp, -3 * wordSize)); ++ break; ++ case T_VOID: break; ++ default: { ++ __ sd(x10, Address(fp, -3 * wordSize)); + } -+ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ } ++} + -+instruct vlslS_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (LShiftVS src (LShiftCntV shift))); -+ ins_cost(VEC_COST); -+ format %{ "vsll.vi $dst, $src, $shift\t#@vlslS_imm" %} -+ ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e16); -+ if (con >= BitsPerShort) { -+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; ++void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { ++ assert_cond(masm != NULL); ++ // We always ignore the frame_slots arg and just use the space just below frame pointer ++ // which by this time is free to use ++ switch (ret_type) { ++ case T_FLOAT: ++ __ flw(f10, Address(fp, -3 * wordSize)); ++ break; ++ case T_DOUBLE: ++ __ fld(f10, Address(fp, -3 * wordSize)); ++ break; ++ case T_VOID: break; ++ default: { ++ __ ld(x10, Address(fp, -3 * wordSize)); + } -+ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ } ++} + -+instruct vlslI_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (LShiftVI src (LShiftCntV shift))); -+ ins_cost(VEC_COST); -+ format %{ "vsll.vi $dst, $src, $shift\t#@vlslI_imm" %} -+ ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -+ %} -+ ins_pipe(pipe_slow); -+%} -+ -+instruct vlslL_imm(vReg dst, vReg src, immI shift) %{ -+ predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32); -+ match(Set dst (LShiftVL src (LShiftCntV shift))); -+ ins_cost(VEC_COST); -+ format %{ "vsll.vi $dst, $src, $shift\t#@vlslL_imm" %} -+ ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -+ %} -+ ins_pipe(pipe_slow); -+%} ++static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { ++ assert_cond(masm != NULL && args != NULL); ++ RegSet x; ++ for ( int i = first_arg ; i < arg_count ; i++ ) { ++ if (args[i].first()->is_Register()) { ++ x = x + args[i].first()->as_Register(); ++ } else if (args[i].first()->is_FloatRegister()) { ++ __ addi(sp, sp, -2 * wordSize); ++ __ fsd(args[i].first()->as_FloatRegister(), Address(sp, 0)); ++ } ++ } ++ __ push_reg(x, sp); ++} + -+instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{ -+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); -+ match(Set dst (LShiftCntV cnt)); -+ match(Set dst (RShiftCntV cnt)); -+ format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntB" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { ++ assert_cond(masm != NULL && args != NULL); ++ RegSet x; ++ for ( int i = first_arg ; i < arg_count ; i++ ) { ++ if (args[i].first()->is_Register()) { ++ x = x + args[i].first()->as_Register(); ++ } else { ++ ; ++ } ++ } ++ __ pop_reg(x, sp); ++ for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { ++ if (args[i].first()->is_Register()) { ++ ; ++ } else if (args[i].first()->is_FloatRegister()) { ++ __ fld(args[i].first()->as_FloatRegister(), Address(sp, 0)); ++ __ add(sp, sp, 2 * wordSize); ++ } ++ } ++} + -+instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{ -+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_SHORT || -+ n->bottom_type()->is_vect()->element_basic_type() == T_CHAR); -+ match(Set dst (LShiftCntV cnt)); -+ match(Set dst (RShiftCntV cnt)); -+ format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntS" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++static void rt_call(MacroAssembler* masm, address dest) { ++ assert_cond(masm != NULL); ++ CodeBlob *cb = CodeCache::find_blob(dest); ++ if (cb) { ++ __ far_call(RuntimeAddress(dest)); ++ } else { ++ int32_t offset = 0; ++ __ la_patchable(t0, RuntimeAddress(dest), offset); ++ __ jalr(x1, t0, offset); ++ } ++} + -+instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{ -+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_INT); -+ match(Set dst (LShiftCntV cnt)); -+ match(Set dst (RShiftCntV cnt)); -+ format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntI" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++static void verify_oop_args(MacroAssembler* masm, ++ const methodHandle& method, ++ const BasicType* sig_bt, ++ const VMRegPair* regs) { ++ const Register temp_reg = x9; // not part of any compiled calling seq ++ if (VerifyOops) { ++ for (int i = 0; i < method->size_of_parameters(); i++) { ++ if (sig_bt[i] == T_OBJECT || ++ sig_bt[i] == T_ARRAY) { ++ VMReg r = regs[i].first(); ++ assert(r->is_valid(), "bad oop arg"); ++ if (r->is_stack()) { ++ __ ld(temp_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size)); ++ __ verify_oop(temp_reg); ++ } else { ++ __ verify_oop(r->as_Register()); ++ } ++ } ++ } ++ } ++} + -+instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{ -+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_LONG); -+ match(Set dst (LShiftCntV cnt)); -+ match(Set dst (RShiftCntV cnt)); -+ format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntL" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++static void gen_special_dispatch(MacroAssembler* masm, ++ const methodHandle& method, ++ const BasicType* sig_bt, ++ const VMRegPair* regs) { ++ verify_oop_args(masm, method, sig_bt, regs); ++ vmIntrinsics::ID iid = method->intrinsic_id(); + -+// vector sqrt ++ // Now write the args into the outgoing interpreter space ++ bool has_receiver = false; ++ Register receiver_reg = noreg; ++ int member_arg_pos = -1; ++ Register member_reg = noreg; ++ int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); ++ if (ref_kind != 0) { ++ member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument ++ member_reg = x9; // known to be free at this point ++ has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); ++ } else if (iid == vmIntrinsics::_invokeBasic) { ++ has_receiver = true; ++ } else { ++ fatal("unexpected intrinsic id %d", iid); ++ } + -+instruct vsqrtF(vReg dst, vReg src) %{ -+ match(Set dst (SqrtVF src)); -+ ins_cost(VEC_COST); -+ format %{ "vfsqrt.v $dst, $src\t#@vsqrtF" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ if (member_reg != noreg) { ++ // Load the member_arg into register, if necessary. ++ SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); ++ VMReg r = regs[member_arg_pos].first(); ++ if (r->is_stack()) { ++ __ ld(member_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size)); ++ } else { ++ // no data motion is needed ++ member_reg = r->as_Register(); ++ } ++ } + -+instruct vsqrtD(vReg dst, vReg src) %{ -+ match(Set dst (SqrtVD src)); -+ ins_cost(VEC_COST); -+ format %{ "vfsqrt.v $dst, $src\t#@vsqrtD" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ if (has_receiver) { ++ // Make sure the receiver is loaded into a register. ++ assert(method->size_of_parameters() > 0, "oob"); ++ assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); ++ VMReg r = regs[0].first(); ++ assert(r->is_valid(), "bad receiver arg"); ++ if (r->is_stack()) { ++ // Porting note: This assumes that compiled calling conventions always ++ // pass the receiver oop in a register. If this is not true on some ++ // platform, pick a temp and load the receiver from stack. ++ fatal("receiver always in a register"); ++ receiver_reg = x12; // known to be free at this point ++ __ ld(receiver_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size)); ++ } else { ++ // no data motion is needed ++ receiver_reg = r->as_Register(); ++ } ++ } + -+// vector sub ++ // Figure out which address we are really jumping to: ++ MethodHandles::generate_method_handle_dispatch(masm, iid, ++ receiver_reg, member_reg, /*for_compiler_entry:*/ true); ++} + -+instruct vsubB(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (SubVB src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vsub.vv $dst, $src1, $src2\t#@vsubB" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++// --------------------------------------------------------------------------- ++// Generate a native wrapper for a given method. The method takes arguments ++// in the Java compiled code convention, marshals them to the native ++// convention (handlizes oops, etc), transitions to native, makes the call, ++// returns to java state (possibly blocking), unhandlizes any result and ++// returns. ++// ++// Critical native functions are a shorthand for the use of ++// GetPrimtiveArrayCritical and disallow the use of any other JNI ++// functions. The wrapper is expected to unpack the arguments before ++// passing them to the callee and perform checks before and after the ++// native call to ensure that they GCLocker ++// lock_critical/unlock_critical semantics are followed. Some other ++// parts of JNI setup are skipped like the tear down of the JNI handle ++// block and the check for pending exceptions it's impossible for them ++// to be thrown. ++// ++// They are roughly structured like this: ++// if (GCLocker::needs_gc()) SharedRuntime::block_for_jni_critical() ++// tranistion to thread_in_native ++// unpack arrray arguments and call native entry point ++// check for safepoint in progress ++// check if any thread suspend flags are set ++// call into JVM and possible unlock the JNI critical ++// if a GC was suppressed while in the critical native. ++// transition back to thread_in_Java ++// return to caller ++// ++nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, ++ const methodHandle& method, ++ int compile_id, ++ BasicType* in_sig_bt, ++ VMRegPair* in_regs, ++ BasicType ret_type, ++ address critical_entry) { ++ if (method->is_method_handle_intrinsic()) { ++ vmIntrinsics::ID iid = method->intrinsic_id(); ++ intptr_t start = (intptr_t)__ pc(); ++ int vep_offset = ((intptr_t)__ pc()) - start; + -+instruct vsubS(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (SubVS src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vsub.vv $dst, $src1, $src2\t#@vsubS" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ // First instruction must be a nop as it may need to be patched on deoptimisation ++ __ nop(); ++ gen_special_dispatch(masm, ++ method, ++ in_sig_bt, ++ in_regs); ++ int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period ++ __ flush(); ++ int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually ++ return nmethod::new_native_nmethod(method, ++ compile_id, ++ masm->code(), ++ vep_offset, ++ frame_complete, ++ stack_slots / VMRegImpl::slots_per_word, ++ in_ByteSize(-1), ++ in_ByteSize(-1), ++ (OopMapSet*)NULL); ++ } ++ address native_func = method->native_function(); ++ assert(native_func != NULL, "must have function"); + -+instruct vsubI(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (SubVI src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vsub.vv $dst, $src1, $src2\t#@vsubI" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ // An OopMap for lock (and class if static) ++ OopMapSet *oop_maps = new OopMapSet(); ++ assert_cond(oop_maps != NULL); ++ intptr_t start = (intptr_t)__ pc(); + -+instruct vsubL(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (SubVL src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vsub.vv $dst, $src1, $src2\t#@vsubL" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ // We have received a description of where all the java arg are located ++ // on entry to the wrapper. We need to convert these args to where ++ // the jni function will expect them. To figure out where they go ++ // we convert the java signature to a C signature by inserting ++ // the hidden arguments as arg[0] and possibly arg[1] (static method) + -+instruct vsubF(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (SubVF src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vfsub.vv $dst, $src1, $src2\t@vsubF" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ const int total_in_args = method->size_of_parameters(); ++ int total_c_args = total_in_args + (method->is_static() ? 2 : 1); + -+instruct vsubD(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (SubVD src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vfsub.vv $dst, $src1, $src2\t#@vsubD" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); ++ VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); ++ BasicType* in_elem_bt = NULL; + -+instruct vstring_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, -+ iRegI_R10 result, vReg_V1 v1, -+ vReg_V2 v2, vReg_V3 v3, rFlagsReg cr) -+%{ -+ predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); -+ match(Set result (StrEquals (Binary str1 str2) cnt)); -+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr); ++ int argc = 0; ++ out_sig_bt[argc++] = T_ADDRESS; ++ if (method->is_static()) { ++ out_sig_bt[argc++] = T_OBJECT; ++ } + -+ format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %} -+ ins_encode %{ -+ // Count is in 8-bit bytes; non-Compact chars are 16 bits. -+ __ string_equals_v($str1$$Register, $str2$$Register, -+ $result$$Register, $cnt$$Register, 1); -+ %} -+ ins_pipe(pipe_class_memory); -+%} ++ for (int i = 0; i < total_in_args ; i++) { ++ out_sig_bt[argc++] = in_sig_bt[i]; ++ } + -+instruct vstring_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, -+ iRegI_R10 result, vReg_V1 v1, -+ vReg_V2 v2, vReg_V3 v3, rFlagsReg cr) -+%{ -+ predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); -+ match(Set result (StrEquals (Binary str1 str2) cnt)); -+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr); ++ // Now figure out where the args must be stored and how much stack space ++ // they require. ++ int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); + -+ format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %} -+ ins_encode %{ -+ // Count is in 8-bit bytes; non-Compact chars are 16 bits. -+ __ string_equals_v($str1$$Register, $str2$$Register, -+ $result$$Register, $cnt$$Register, 2); -+ %} -+ ins_pipe(pipe_class_memory); -+%} ++ // Compute framesize for the wrapper. We need to handlize all oops in ++ // incoming registers + -+instruct varray_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, -+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr) -+%{ -+ predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); -+ match(Set result (AryEq ary1 ary2)); -+ effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr); ++ // Calculate the total number of stack slots we will need. + -+ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %} -+ ins_encode %{ -+ __ arrays_equals_v($ary1$$Register, $ary2$$Register, -+ $result$$Register, $tmp$$Register, 1); -+ %} -+ ins_pipe(pipe_class_memory); -+%} ++ // First count the abi requirement plus all of the outgoing args ++ int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; + -+instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, -+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr) -+%{ -+ predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); -+ match(Set result (AryEq ary1 ary2)); -+ effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr); ++ // Now the space for the inbound oop handle area ++ int total_save_slots = 8 * VMRegImpl::slots_per_word; // 8 arguments passed in registers + -+ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %} -+ ins_encode %{ -+ __ arrays_equals_v($ary1$$Register, $ary2$$Register, -+ $result$$Register, $tmp$$Register, 2); -+ %} -+ ins_pipe(pipe_class_memory); -+%} ++ int oop_handle_offset = stack_slots; ++ stack_slots += total_save_slots; + -+instruct vstring_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, -+ iRegP_R28 tmp1, iRegL_R29 tmp2) -+%{ -+ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU); -+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); -+ effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, -+ TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); ++ // Now any space we need for handlizing a klass if static method + -+ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %} -+ ins_encode %{ -+ // Count is in 8-bit bytes; non-Compact chars are 16 bits. -+ __ string_compare_v($str1$$Register, $str2$$Register, -+ $cnt1$$Register, $cnt2$$Register, $result$$Register, -+ $tmp1$$Register, $tmp2$$Register, -+ StrIntrinsicNode::UU); -+ %} -+ ins_pipe(pipe_class_memory); -+%} -+instruct vstring_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, -+ iRegP_R28 tmp1, iRegL_R29 tmp2) -+%{ -+ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL); -+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); -+ effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, -+ TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); ++ int klass_slot_offset = 0; ++ int klass_offset = -1; ++ int lock_slot_offset = 0; ++ bool is_static = false; + -+ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %} -+ ins_encode %{ -+ __ string_compare_v($str1$$Register, $str2$$Register, -+ $cnt1$$Register, $cnt2$$Register, $result$$Register, -+ $tmp1$$Register, $tmp2$$Register, -+ StrIntrinsicNode::LL); -+ %} -+ ins_pipe(pipe_class_memory); -+%} ++ if (method->is_static()) { ++ klass_slot_offset = stack_slots; ++ stack_slots += VMRegImpl::slots_per_word; ++ klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; ++ is_static = true; ++ } + -+instruct vstring_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, -+ iRegP_R28 tmp1, iRegL_R29 tmp2) -+%{ -+ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL); -+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); -+ effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, -+ TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); ++ // Plus a lock if needed + -+ format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %} -+ ins_encode %{ -+ __ string_compare_v($str1$$Register, $str2$$Register, -+ $cnt1$$Register, $cnt2$$Register, $result$$Register, -+ $tmp1$$Register, $tmp2$$Register, -+ StrIntrinsicNode::UL); -+ %} -+ ins_pipe(pipe_class_memory); -+%} -+instruct vstring_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, -+ iRegP_R28 tmp1, iRegL_R29 tmp2) -+%{ -+ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU); -+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); -+ effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, -+ TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); ++ if (method->is_synchronized()) { ++ lock_slot_offset = stack_slots; ++ stack_slots += VMRegImpl::slots_per_word; ++ } + -+ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %} -+ ins_encode %{ -+ __ string_compare_v($str1$$Register, $str2$$Register, -+ $cnt1$$Register, $cnt2$$Register, $result$$Register, -+ $tmp1$$Register, $tmp2$$Register, -+ StrIntrinsicNode::LU); -+ %} -+ ins_pipe(pipe_class_memory); -+%} ++ // Now a place (+2) to save return values or temp during shuffling ++ // + 4 for return address (which we own) and saved fp ++ stack_slots += 6; + -+// fast byte[] to char[] inflation -+instruct vstring_inflate(Universe dummy, iRegP_R10 src, iRegP_R11 dst, iRegI_R12 len, -+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp) -+%{ -+ predicate(UseRVV); -+ match(Set dummy (StrInflatedCopy src (Binary dst len))); -+ effect(TEMP v1, TEMP v2, TEMP v3, TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len); ++ // Ok The space we have allocated will look like: ++ // ++ // ++ // FP-> | | ++ // | 2 slots (ra) | ++ // | 2 slots (fp) | ++ // |---------------------| ++ // | 2 slots for moves | ++ // |---------------------| ++ // | lock box (if sync) | ++ // |---------------------| <- lock_slot_offset ++ // | klass (if static) | ++ // |---------------------| <- klass_slot_offset ++ // | oopHandle area | ++ // |---------------------| <- oop_handle_offset (8 java arg registers) ++ // | outbound memory | ++ // | based arguments | ++ // | | ++ // |---------------------| ++ // | | ++ // SP-> | out_preserved_slots | ++ // ++ // + -+ format %{ "String Inflate $src,$dst" %} -+ ins_encode %{ -+ __ byte_array_inflate_v($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register); -+ %} -+ ins_pipe(pipe_class_memory); -+%} + -+// encode char[] to byte[] in ISO_8859_1 -+instruct vencode_iso_array(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result, -+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp) -+%{ -+ predicate(UseRVV); -+ match(Set result (EncodeISOArray src (Binary dst len))); -+ effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len, -+ TEMP v1, TEMP v2, TEMP v3, TEMP tmp); ++ // Now compute actual number of stack words we need rounding to make ++ // stack properly aligned. ++ stack_slots = align_up(stack_slots, StackAlignmentInSlots); + -+ format %{ "Encode array $src,$dst,$len -> $result" %} -+ ins_encode %{ -+ __ encode_iso_array_v($src$$Register, $dst$$Register, $len$$Register, -+ $result$$Register, $tmp$$Register); -+ %} -+ ins_pipe( pipe_class_memory ); -+%} ++ int stack_size = stack_slots * VMRegImpl::stack_slot_size; + -+// fast char[] to byte[] compression -+instruct vstring_compress(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result, -+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp) -+%{ -+ predicate(UseRVV); -+ match(Set result (StrCompressedCopy src (Binary dst len))); -+ effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len, -+ TEMP v1, TEMP v2, TEMP v3, TEMP tmp); ++ // First thing make an ic check to see if we should even be here + -+ format %{ "String Compress $src,$dst -> $result // KILL R11, R12, R13" %} -+ ins_encode %{ -+ __ char_array_compress_v($src$$Register, $dst$$Register, $len$$Register, -+ $result$$Register, $tmp$$Register); -+ %} -+ ins_pipe( pipe_slow ); -+%} ++ // We are free to use all registers as temps without saving them and ++ // restoring them except fp. fp is the only callee save register ++ // as far as the interpreter and the compiler(s) are concerned. + -+instruct vcount_positives(iRegP_R11 ary, iRegI_R12 len, iRegI_R10 result, iRegL tmp) -+%{ -+ predicate(UseRVV); -+ match(Set result (CountPositives ary len)); -+ effect(USE_KILL ary, USE_KILL len, TEMP tmp); + -+ format %{ "count positives byte[] $ary, $len -> $result" %} -+ ins_encode %{ -+ __ count_positives_v($ary$$Register, $len$$Register, $result$$Register, $tmp$$Register); -+ %} ++ const Register ic_reg = t1; ++ const Register receiver = j_rarg0; + -+ ins_pipe(pipe_slow); -+%} ++ Label hit; ++ Label exception_pending; + -+instruct vstringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, -+ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, -+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3) -+%{ -+ predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); -+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); -+ effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, -+ TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3); ++ assert_different_registers(ic_reg, receiver, t0); ++ __ verify_oop(receiver); ++ __ cmp_klass(receiver, ic_reg, t0, hit); + -+ format %{ "StringUTF16 IndexOf char[] $str1, $cnt1, $ch -> $result" %} ++ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + -+ ins_encode %{ -+ __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register, -+ $result$$Register, $tmp1$$Register, $tmp2$$Register, -+ false /* isL */); -+ %} ++ // Verified entry point must be aligned ++ __ align(8); + -+ ins_pipe(pipe_class_memory); -+%} ++ __ bind(hit); + -+instruct vstringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, -+ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, -+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3) -+%{ -+ predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); -+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); -+ effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, -+ TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3); ++ int vep_offset = ((intptr_t)__ pc()) - start; + -+ format %{ "StringLatin1 IndexOf char[] $str1, $cnt1, $ch -> $result" %} ++ // If we have to make this method not-entrant we'll overwrite its ++ // first instruction with a jump. ++ __ nop(); + -+ ins_encode %{ -+ __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register, -+ $result$$Register, $tmp1$$Register, $tmp2$$Register, -+ true /* isL */); -+ %} ++ // Generate stack overflow check ++ __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size()); + -+ ins_pipe(pipe_class_memory); -+%} ++ // Generate a new frame for the wrapper. ++ __ enter(); ++ // -2 because return address is already present and so is saved fp ++ __ sub(sp, sp, stack_size - 2 * wordSize); + -+// clearing of an array -+instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy, -+ vReg_V1 vReg1, vReg_V2 vReg2, vReg_V3 vReg3) -+%{ -+ predicate(UseRVV); -+ match(Set dummy (ClearArray cnt base)); -+ effect(USE_KILL cnt, USE_KILL base, TEMP vReg1, TEMP vReg2, TEMP vReg3); ++ // Frame is now completed as far as size and linkage. ++ int frame_complete = ((intptr_t)__ pc()) - start; + -+ format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %} ++ // We use x18 as the oop handle for the receiver/klass ++ // It is callee save so it survives the call to native + -+ ins_encode %{ -+ __ clear_array_v($base$$Register, $cnt$$Register); -+ %} ++ const Register oop_handle_reg = x18; + -+ ins_pipe(pipe_class_memory); -+%} -diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -new file mode 100644 -index 00000000000..f85d4b25a76 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -@@ -0,0 +1,2761 @@ -+/* -+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++ // ++ // We immediately shuffle the arguments so that any vm call we have to ++ // make from here on out (sync slow path, jvmti, etc.) we will have ++ // captured the oops from our caller and have a valid oopMap for ++ // them. + -+#include "precompiled.hpp" -+#include "asm/macroAssembler.hpp" -+#include "asm/macroAssembler.inline.hpp" -+#include "code/debugInfoRec.hpp" -+#include "code/icBuffer.hpp" -+#include "code/vtableStubs.hpp" -+#include "compiler/oopMap.hpp" -+#include "gc/shared/barrierSetAssembler.hpp" -+#include "interpreter/interp_masm.hpp" -+#include "interpreter/interpreter.hpp" -+#include "logging/log.hpp" -+#include "memory/resourceArea.hpp" -+#include "nativeInst_riscv.hpp" -+#include "oops/compiledICHolder.hpp" -+#include "oops/klass.inline.hpp" -+#include "prims/methodHandles.hpp" -+#include "runtime/jniHandles.hpp" -+#include "runtime/safepointMechanism.hpp" -+#include "runtime/sharedRuntime.hpp" -+#include "runtime/signature.hpp" -+#include "runtime/stubRoutines.hpp" -+#include "runtime/vframeArray.hpp" -+#include "utilities/align.hpp" -+#include "utilities/formatBuffer.hpp" -+#include "vmreg_riscv.inline.hpp" -+#ifdef COMPILER1 -+#include "c1/c1_Runtime1.hpp" -+#endif -+#ifdef COMPILER2 -+#include "adfiles/ad_riscv.hpp" -+#include "opto/runtime.hpp" -+#endif ++ // ----------------- ++ // The Grand Shuffle + -+#define __ masm-> ++ // The Java calling convention is either equal (linux) or denser (win64) than the ++ // c calling convention. However the because of the jni_env argument the c calling ++ // convention always has at least one more (and two for static) arguments than Java. ++ // Therefore if we move the args from java -> c backwards then we will never have ++ // a register->register conflict and we don't have to build a dependency graph ++ // and figure out how to break any cycles. ++ // + -+const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; -+ -+class SimpleRuntimeFrame { -+public: ++ // Record esp-based slot for receiver on stack for non-static methods ++ int receiver_offset = -1; + -+ // Most of the runtime stubs have this simple frame layout. -+ // This class exists to make the layout shared in one place. -+ // Offsets are for compiler stack slots, which are jints. -+ enum layout { -+ // The frame sender code expects that fp will be in the "natural" place and -+ // will override any oopMap setting for it. We must therefore force the layout -+ // so that it agrees with the frame sender code. -+ // we don't expect any arg reg save area so riscv asserts that -+ // frame::arg_reg_save_area_bytes == 0 -+ fp_off = 0, fp_off2, -+ return_off, return_off2, -+ framesize -+ }; -+}; ++ // This is a trick. We double the stack slots so we can claim ++ // the oops in the caller's frame. Since we are sure to have ++ // more args than the caller doubling is enough to make ++ // sure we can capture all the incoming oop args from the ++ // caller. ++ // ++ OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); ++ assert_cond(map != NULL); + -+class RegisterSaver { -+ const bool _save_vectors; -+ public: -+ RegisterSaver(bool save_vectors) : _save_vectors(UseRVV && save_vectors) {} -+ ~RegisterSaver() {} -+ OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); -+ void restore_live_registers(MacroAssembler* masm); ++ int float_args = 0; ++ int int_args = 0; + -+ // Offsets into the register save area -+ // Used by deoptimization when it is managing result register -+ // values on its own -+ // gregs:28, float_register:32; except: x1(ra) & x2(sp) & gp(x3) & tp(x4) -+ // |---v0---|<---SP -+ // |---v1---|save vectors only in generate_handler_blob -+ // |-- .. --| -+ // |---v31--|----- -+ // |---f0---| -+ // |---f1---| -+ // | .. | -+ // |---f31--| -+ // |---reserved slot for stack alignment---| -+ // |---x5---| -+ // | x6 | -+ // |---.. --| -+ // |---x31--| -+ // |---fp---| -+ // |---ra---| -+ int v0_offset_in_bytes(void) { return 0; } -+ int f0_offset_in_bytes(void) { -+ int f0_offset = 0; -+#ifdef COMPILER2 -+ if (_save_vectors) { -+ f0_offset += Matcher::scalable_vector_reg_size(T_INT) * VectorRegisterImpl::number_of_registers * -+ BytesPerInt; -+ } -+#endif -+ return f0_offset; ++#ifdef ASSERT ++ bool reg_destroyed[RegisterImpl::number_of_registers]; ++ bool freg_destroyed[FloatRegisterImpl::number_of_registers]; ++ for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { ++ reg_destroyed[r] = false; + } -+ int reserved_slot_offset_in_bytes(void) { -+ return f0_offset_in_bytes() + -+ FloatRegisterImpl::max_slots_per_register * -+ FloatRegisterImpl::number_of_registers * -+ BytesPerInt; ++ for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { ++ freg_destroyed[f] = false; + } + -+ int reg_offset_in_bytes(Register r) { -+ assert (r->encoding() > 4, "ra, sp, gp and tp not saved"); -+ return reserved_slot_offset_in_bytes() + (r->encoding() - 4 /* x1, x2, x3, x4 */) * wordSize; -+ } ++#endif /* ASSERT */ + -+ int freg_offset_in_bytes(FloatRegister f) { -+ return f0_offset_in_bytes() + f->encoding() * wordSize; -+ } ++ // For JNI natives the incoming and outgoing registers are offset upwards. ++ GrowableArray arg_order(2 * total_in_args); ++ VMRegPair tmp_vmreg; ++ tmp_vmreg.set2(x9->as_VMReg()); + -+ int ra_offset_in_bytes(void) { -+ return reserved_slot_offset_in_bytes() + -+ (RegisterImpl::number_of_registers - 3) * -+ RegisterImpl::max_slots_per_register * -+ BytesPerInt; ++ for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { ++ arg_order.push(i); ++ arg_order.push(c_arg); + } -+}; + -+OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { -+ int vector_size_in_bytes = 0; -+ int vector_size_in_slots = 0; -+#ifdef COMPILER2 -+ if (_save_vectors) { -+ vector_size_in_bytes += Matcher::scalable_vector_reg_size(T_BYTE); -+ vector_size_in_slots += Matcher::scalable_vector_reg_size(T_INT); -+ } -+#endif ++ int temploc = -1; ++ for (int ai = 0; ai < arg_order.length(); ai += 2) { ++ int i = arg_order.at(ai); ++ int c_arg = arg_order.at(ai + 1); ++ __ block_comment(err_msg("mv %d -> %d", i, c_arg)); ++ assert(c_arg != -1 && i != -1, "wrong order"); ++#ifdef ASSERT ++ if (in_regs[i].first()->is_Register()) { ++ assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); ++ } else if (in_regs[i].first()->is_FloatRegister()) { ++ assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!"); ++ } ++ if (out_regs[c_arg].first()->is_Register()) { ++ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; ++ } else if (out_regs[c_arg].first()->is_FloatRegister()) { ++ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; ++ } ++#endif /* ASSERT */ ++ switch (in_sig_bt[i]) { ++ case T_ARRAY: ++ case T_OBJECT: ++ object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], ++ ((i == 0) && (!is_static)), ++ &receiver_offset); ++ int_args++; ++ break; ++ case T_VOID: ++ break; + -+ assert_cond(masm != NULL && total_frame_words != NULL); -+ int frame_size_in_bytes = align_up(additional_frame_words * wordSize + ra_offset_in_bytes() + wordSize, 16); -+ // OopMap frame size is in compiler stack slots (jint's) not bytes or words -+ int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; -+ // The caller will allocate additional_frame_words -+ int additional_frame_slots = additional_frame_words * wordSize / BytesPerInt; -+ // CodeBlob frame size is in words. -+ int frame_size_in_words = frame_size_in_bytes / wordSize; -+ *total_frame_words = frame_size_in_words; ++ case T_FLOAT: ++ float_move(masm, in_regs[i], out_regs[c_arg]); ++ float_args++; ++ break; + -+ // Save Integer, Float and Vector registers. -+ __ enter(); -+ __ push_CPU_state(_save_vectors, vector_size_in_bytes); ++ case T_DOUBLE: ++ assert( i + 1 < total_in_args && ++ in_sig_bt[i + 1] == T_VOID && ++ out_sig_bt[c_arg + 1] == T_VOID, "bad arg list"); ++ double_move(masm, in_regs[i], out_regs[c_arg]); ++ float_args++; ++ break; + -+ // Set an oopmap for the call site. This oopmap will map all -+ // oop-registers and debug-info registers as callee-saved. This -+ // will allow deoptimization at this safepoint to find all possible -+ // debug-info recordings, as well as let GC find all oops. ++ case T_LONG : ++ long_move(masm, in_regs[i], out_regs[c_arg]); ++ int_args++; ++ break; + -+ OopMapSet *oop_maps = new OopMapSet(); -+ OopMap* oop_map = new OopMap(frame_size_in_slots, 0); -+ assert_cond(oop_maps != NULL && oop_map != NULL); ++ case T_ADDRESS: ++ assert(false, "found T_ADDRESS in java args"); ++ break; + -+ int sp_offset_in_slots = 0; -+ int step_in_slots = 0; -+ if (_save_vectors) { -+ step_in_slots = vector_size_in_slots; -+ for (int i = 0; i < VectorRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) { -+ VectorRegister r = as_VectorRegister(i); -+ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg()); ++ default: ++ move32_64(masm, in_regs[i], out_regs[c_arg]); ++ int_args++; + } + } + -+ step_in_slots = FloatRegisterImpl::max_slots_per_register; -+ for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) { -+ FloatRegister r = as_FloatRegister(i); -+ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg()); ++ // point c_arg at the first arg that is already loaded in case we ++ // need to spill before we call out ++ int c_arg = total_c_args - total_in_args; ++ ++ // Pre-load a static method's oop into c_rarg1. ++ if (method->is_static()) { ++ ++ // load oop into a register ++ __ movoop(c_rarg1, ++ JNIHandles::make_local(method->method_holder()->java_mirror()), ++ /*immediate*/true); ++ ++ // Now handlize the static class mirror it's known not-null. ++ __ sd(c_rarg1, Address(sp, klass_offset)); ++ map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); ++ ++ // Now get the handle ++ __ la(c_rarg1, Address(sp, klass_offset)); ++ // and protect the arg if we must spill ++ c_arg--; + } + -+ step_in_slots = RegisterImpl::max_slots_per_register; -+ // skip the slot reserved for alignment, see MacroAssembler::push_reg; -+ // also skip x5 ~ x6 on the stack because they are caller-saved registers. -+ sp_offset_in_slots += RegisterImpl::max_slots_per_register * 3; -+ // besides, we ignore x0 ~ x4 because push_CPU_state won't push them on the stack. -+ for (int i = 7; i < RegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) { -+ Register r = as_Register(i); -+ if (r != xthread) { -+ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots + additional_frame_slots), r->as_VMReg()); -+ } ++ // Change state to native (we save the return address in the thread, since it might not ++ // be pushed on the stack when we do a stack traversal). ++ // We use the same pc/oopMap repeatedly when we call out ++ ++ Label native_return; ++ __ set_last_Java_frame(sp, noreg, native_return, t0); ++ ++ Label dtrace_method_entry, dtrace_method_entry_done; ++ { ++ int32_t offset = 0; ++ __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset); ++ __ lbu(t0, Address(t0, offset)); ++ __ addw(t0, t0, zr); ++ __ bnez(t0, dtrace_method_entry); ++ __ bind(dtrace_method_entry_done); + } + -+ return oop_map; -+} ++ // RedefineClasses() tracing support for obsolete method entry ++ if (log_is_enabled(Trace, redefine, class, obsolete)) { ++ // protect the args we've loaded ++ save_args(masm, total_c_args, c_arg, out_regs); ++ __ mov_metadata(c_rarg1, method()); ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), ++ xthread, c_rarg1); ++ restore_args(masm, total_c_args, c_arg, out_regs); ++ } + -+void RegisterSaver::restore_live_registers(MacroAssembler* masm) { -+ assert_cond(masm != NULL); -+#ifdef COMPILER2 -+ __ pop_CPU_state(_save_vectors, Matcher::scalable_vector_reg_size(T_BYTE)); -+#else -+ __ pop_CPU_state(_save_vectors); -+#endif -+ __ leave(); -+} ++ // Lock a synchronized method + -+// Is vector's size (in bytes) bigger than a size saved by default? -+// riscv does not ovlerlay the floating-point registers on vector registers like aarch64. -+bool SharedRuntime::is_wide_vector(int size) { -+ return UseRVV; -+} ++ // Register definitions used by locking and unlocking + -+// The java_calling_convention describes stack locations as ideal slots on -+// a frame with no abi restrictions. Since we must observe abi restrictions -+// (like the placement of the register window) the slots must be biased by -+// the following value. -+static int reg2offset_in(VMReg r) { -+ // Account for saved fp and ra -+ // This should really be in_preserve_stack_slots -+ return r->reg2stack() * VMRegImpl::stack_slot_size; -+} ++ const Register swap_reg = x10; ++ const Register obj_reg = x9; // Will contain the oop ++ const Register lock_reg = x30; // Address of compiler lock object (BasicLock) ++ const Register old_hdr = x30; // value of old header at unlock time ++ const Register tmp = ra; + -+static int reg2offset_out(VMReg r) { -+ return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; -+} ++ Label slow_path_lock; ++ Label lock_done; + -+// --------------------------------------------------------------------------- -+// Read the array of BasicTypes from a signature, and compute where the -+// arguments should go. Values in the VMRegPair regs array refer to 4-byte -+// quantities. Values less than VMRegImpl::stack0 are registers, those above -+// refer to 4-byte stack slots. All stack slots are based off of the stack pointer -+// as framesizes are fixed. -+// VMRegImpl::stack0 refers to the first slot 0(sp). -+// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register -+// up to RegisterImpl::number_of_registers) are the 64-bit -+// integer registers. ++ if (method->is_synchronized()) { + -+// Note: the INPUTS in sig_bt are in units of Java argument words, -+// which are 64-bit. The OUTPUTS are in 32-bit units. ++ const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); + -+// The Java calling convention is a "shifted" version of the C ABI. -+// By skipping the first C ABI register we can call non-static jni -+// methods with small numbers of arguments without having to shuffle -+// the arguments at all. Since we control the java ABI we ought to at -+// least get some advantage out of it. ++ // Get the handle (the 2nd argument) ++ __ mv(oop_handle_reg, c_rarg1); + -+int SharedRuntime::java_calling_convention(const BasicType *sig_bt, -+ VMRegPair *regs, -+ int total_args_passed) { -+ // Create the mapping between argument positions and -+ // registers. -+ static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = { -+ j_rarg0, j_rarg1, j_rarg2, j_rarg3, -+ j_rarg4, j_rarg5, j_rarg6, j_rarg7 -+ }; -+ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = { -+ j_farg0, j_farg1, j_farg2, j_farg3, -+ j_farg4, j_farg5, j_farg6, j_farg7 -+ }; ++ // Get address of the box + -+ uint int_args = 0; -+ uint fp_args = 0; -+ uint stk_args = 0; // inc by 2 each time ++ __ la(lock_reg, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); + -+ for (int i = 0; i < total_args_passed; i++) { -+ switch (sig_bt[i]) { -+ case T_BOOLEAN: // fall through -+ case T_CHAR: // fall through -+ case T_BYTE: // fall through -+ case T_SHORT: // fall through -+ case T_INT: -+ if (int_args < Argument::n_int_register_parameters_j) { -+ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); -+ } else { -+ regs[i].set1(VMRegImpl::stack2reg(stk_args)); -+ stk_args += 2; -+ } -+ break; -+ case T_VOID: -+ // halves of T_LONG or T_DOUBLE -+ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); -+ regs[i].set_bad(); -+ break; -+ case T_LONG: // fall through -+ assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); -+ case T_OBJECT: // fall through -+ case T_ARRAY: // fall through -+ case T_ADDRESS: -+ if (int_args < Argument::n_int_register_parameters_j) { -+ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); -+ } else { -+ regs[i].set2(VMRegImpl::stack2reg(stk_args)); -+ stk_args += 2; -+ } -+ break; -+ case T_FLOAT: -+ if (fp_args < Argument::n_float_register_parameters_j) { -+ regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); -+ } else { -+ regs[i].set1(VMRegImpl::stack2reg(stk_args)); -+ stk_args += 2; -+ } -+ break; -+ case T_DOUBLE: -+ assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); -+ if (fp_args < Argument::n_float_register_parameters_j) { -+ regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); -+ } else { -+ regs[i].set2(VMRegImpl::stack2reg(stk_args)); -+ stk_args += 2; -+ } -+ break; -+ default: -+ ShouldNotReachHere(); ++ // Load the oop from the handle ++ __ ld(obj_reg, Address(oop_handle_reg, 0)); ++ ++ if (UseBiasedLocking) { ++ __ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, lock_done, &slow_path_lock); + } -+ } + -+ return align_up(stk_args, 2); -+} ++ // Load (object->mark() | 1) into swap_reg % x10 ++ __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); ++ __ ori(swap_reg, t0, 1); + -+// Patch the callers callsite with entry to compiled code if it exists. -+static void patch_callers_callsite(MacroAssembler *masm) { -+ assert_cond(masm != NULL); -+ Label L; -+ __ ld(t0, Address(xmethod, in_bytes(Method::code_offset()))); -+ __ beqz(t0, L); ++ // Save (object->mark() | 1) into BasicLock's displaced header ++ __ sd(swap_reg, Address(lock_reg, mark_word_offset)); + -+ __ enter(); -+ __ push_CPU_state(); ++ // src -> dest if dest == x10 else x10 <- dest ++ { ++ Label here; ++ __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL); ++ } + -+ // VM needs caller's callsite -+ // VM needs target method -+ // This needs to be a long call since we will relocate this adapter to -+ // the codeBuffer and it may not reach ++ // Test if the oopMark is an obvious stack pointer, i.e., ++ // 1) (mark & 3) == 0, and ++ // 2) sp <= mark < mark + os::pagesize() ++ // These 3 tests can be done by evaluating the following ++ // expression: ((mark - sp) & (3 - os::vm_page_size())), ++ // assuming both stack pointer and pagesize have their ++ // least significant 2 bits clear. ++ // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg + -+#ifndef PRODUCT -+ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); -+#endif ++ __ sub(swap_reg, swap_reg, sp); ++ __ andi(swap_reg, swap_reg, 3 - os::vm_page_size()); + -+ __ mv(c_rarg0, xmethod); -+ __ mv(c_rarg1, ra); -+ int32_t offset = 0; -+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)), offset); -+ __ jalr(x1, t0, offset); ++ // Save the test result, for recursive case, the result is zero ++ __ sd(swap_reg, Address(lock_reg, mark_word_offset)); ++ __ bnez(swap_reg, slow_path_lock); + -+ // Explicit fence.i required because fixup_callers_callsite may change the code -+ // stream. -+ __ safepoint_ifence(); ++ // Slow path will re-enter here ++ __ bind(lock_done); ++ } + -+ __ pop_CPU_state(); -+ // restore sp -+ __ leave(); -+ __ bind(L); -+} + -+static void gen_c2i_adapter(MacroAssembler *masm, -+ int total_args_passed, -+ int comp_args_on_stack, -+ const BasicType *sig_bt, -+ const VMRegPair *regs, -+ Label& skip_fixup) { -+ // Before we get into the guts of the C2I adapter, see if we should be here -+ // at all. We've come from compiled code and are attempting to jump to the -+ // interpreter, which means the caller made a static call to get here -+ // (vcalls always get a compiled target if there is one). Check for a -+ // compiled target. If there is one, we need to patch the caller's call. -+ patch_callers_callsite(masm); ++ // Finally just about ready to make the JNI call + -+ __ bind(skip_fixup); ++ // get JNIEnv* which is first argument to native ++ __ la(c_rarg0, Address(xthread, in_bytes(JavaThread::jni_environment_offset()))); + -+ int words_pushed = 0; ++ // Now set thread in native ++ __ la(t1, Address(xthread, JavaThread::thread_state_offset())); ++ __ mv(t0, _thread_in_native); ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ __ sw(t0, Address(t1)); + -+ // Since all args are passed on the stack, total_args_passed * -+ // Interpreter::stackElementSize is the space we need. ++ rt_call(masm, native_func); + -+ int extraspace = total_args_passed * Interpreter::stackElementSize; ++ __ bind(native_return); + -+ __ mv(x30, sp); ++ intptr_t return_pc = (intptr_t) __ pc(); ++ oop_maps->add_gc_map(return_pc - start, map); + -+ // stack is aligned, keep it that way -+ extraspace = align_up(extraspace, 2 * wordSize); ++ // Unpack native results. ++ if (ret_type != T_OBJECT && ret_type != T_ARRAY) { ++ __ cast_primitive_type(ret_type, x10); ++ } + -+ if (extraspace) { -+ __ sub(sp, sp, extraspace); ++ Label safepoint_in_progress, safepoint_in_progress_done; ++ Label after_transition; ++ ++ // Switch thread to "native transition" state before reading the synchronization state. ++ // This additional state is necessary because reading and testing the synchronization ++ // state is not atomic w.r.t. GC, as this scenario demonstrates: ++ // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. ++ // VM thread changes sync state to synchronizing and suspends threads for GC. ++ // Thread A is resumed to finish this native method, but doesn't block here since it ++ // didn't see any synchronization is progress, and escapes. ++ __ mv(t0, _thread_in_native_trans); ++ ++ __ sw(t0, Address(xthread, JavaThread::thread_state_offset())); ++ ++ // Force this write out before the read below ++ __ membar(MacroAssembler::AnyAny); ++ ++ // check for safepoint operation in progress and/or pending suspend requests ++ { ++ __ safepoint_poll_acquire(safepoint_in_progress); ++ __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset())); ++ __ bnez(t0, safepoint_in_progress); ++ __ bind(safepoint_in_progress_done); + } + -+ // Now write the args into the outgoing interpreter space -+ for (int i = 0; i < total_args_passed; i++) { -+ if (sig_bt[i] == T_VOID) { -+ assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half"); -+ continue; -+ } ++ // change thread state ++ __ la(t1, Address(xthread, JavaThread::thread_state_offset())); ++ __ mv(t0, _thread_in_Java); ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ __ sw(t0, Address(t1)); ++ __ bind(after_transition); + -+ // offset to start parameters -+ int st_off = (total_args_passed - i - 1) * Interpreter::stackElementSize; -+ int next_off = st_off - Interpreter::stackElementSize; ++ Label reguard; ++ Label reguard_done; ++ __ lbu(t0, Address(xthread, JavaThread::stack_guard_state_offset())); ++ __ mv(t1, JavaThread::stack_guard_yellow_reserved_disabled); ++ __ beq(t0, t1, reguard); ++ __ bind(reguard_done); + -+ // Say 4 args: -+ // i st_off -+ // 0 32 T_LONG -+ // 1 24 T_VOID -+ // 2 16 T_OBJECT -+ // 3 8 T_BOOL -+ // - 0 return address -+ // -+ // However to make thing extra confusing. Because we can fit a Java long/double in -+ // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter -+ // leaves one slot empty and only stores to a single slot. In this case the -+ // slot that is occupied is the T_VOID slot. See I said it was confusing. ++ // native result if any is live + -+ VMReg r_1 = regs[i].first(); -+ VMReg r_2 = regs[i].second(); -+ if (!r_1->is_valid()) { -+ assert(!r_2->is_valid(), ""); -+ continue; -+ } -+ if (r_1->is_stack()) { -+ // memory to memory use t0 -+ int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size -+ + extraspace -+ + words_pushed * wordSize); -+ if (!r_2->is_valid()) { -+ __ lwu(t0, Address(sp, ld_off)); -+ __ sd(t0, Address(sp, st_off), /*temp register*/esp); -+ } else { -+ __ ld(t0, Address(sp, ld_off), /*temp register*/esp); ++ // Unlock ++ Label unlock_done; ++ Label slow_path_unlock; ++ if (method->is_synchronized()) { + -+ // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG -+ // T_DOUBLE and T_LONG use two slots in the interpreter -+ if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { -+ // ld_off == LSW, ld_off+wordSize == MSW -+ // st_off == MSW, next_off == LSW -+ __ sd(t0, Address(sp, next_off), /*temp register*/esp); -+#ifdef ASSERT -+ // Overwrite the unused slot with known junk -+ __ li(t0, 0xdeadffffdeadaaaaul); -+ __ sd(t0, Address(sp, st_off), /*temp register*/esp); -+#endif /* ASSERT */ -+ } else { -+ __ sd(t0, Address(sp, st_off), /*temp register*/esp); -+ } -+ } -+ } else if (r_1->is_Register()) { -+ Register r = r_1->as_Register(); -+ if (!r_2->is_valid()) { -+ // must be only an int (or less ) so move only 32bits to slot -+ __ sd(r, Address(sp, st_off)); -+ } else { -+ // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG -+ // T_DOUBLE and T_LONG use two slots in the interpreter -+ if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { -+ // long/double in gpr -+#ifdef ASSERT -+ // Overwrite the unused slot with known junk -+ __ li(t0, 0xdeadffffdeadaaabul); -+ __ sd(t0, Address(sp, st_off), /*temp register*/esp); -+#endif /* ASSERT */ -+ __ sd(r, Address(sp, next_off)); -+ } else { -+ __ sd(r, Address(sp, st_off)); -+ } -+ } -+ } else { -+ assert(r_1->is_FloatRegister(), ""); -+ if (!r_2->is_valid()) { -+ // only a float use just part of the slot -+ __ fsw(r_1->as_FloatRegister(), Address(sp, st_off)); -+ } else { -+#ifdef ASSERT -+ // Overwrite the unused slot with known junk -+ __ li(t0, 0xdeadffffdeadaaacul); -+ __ sd(t0, Address(sp, st_off), /*temp register*/esp); -+#endif /* ASSERT */ -+ __ fsd(r_1->as_FloatRegister(), Address(sp, next_off)); -+ } -+ } -+ } -+ -+ __ mv(esp, sp); // Interp expects args on caller's expression stack ++ // Get locked oop from the handle we passed to jni ++ __ ld(obj_reg, Address(oop_handle_reg, 0)); + -+ __ ld(t0, Address(xmethod, in_bytes(Method::interpreter_entry_offset()))); -+ __ jr(t0); -+} ++ Label done; + -+void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, -+ int total_args_passed, -+ int comp_args_on_stack, -+ const BasicType *sig_bt, -+ const VMRegPair *regs) { -+ // Cut-out for having no stack args. -+ int comp_words_on_stack = align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, wordSize) >> LogBytesPerWord; -+ if (comp_args_on_stack != 0) { -+ __ sub(t0, sp, comp_words_on_stack * wordSize); -+ __ andi(sp, t0, -16); -+ } ++ if (UseBiasedLocking) { ++ __ biased_locking_exit(obj_reg, old_hdr, done); ++ } + -+ // Will jump to the compiled code just as if compiled code was doing it. -+ // Pre-load the register-jump target early, to schedule it better. -+ __ ld(t1, Address(xmethod, in_bytes(Method::from_compiled_offset()))); ++ // Simple recursive lock? ++ __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); ++ __ beqz(t0, done); + -+ // Now generate the shuffle code. -+ for (int i = 0; i < total_args_passed; i++) { -+ if (sig_bt[i] == T_VOID) { -+ assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half"); -+ continue; ++ // Must save x10 if if it is live now because cmpxchg must use it ++ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { ++ save_native_result(masm, ret_type, stack_slots); + } + -+ // Pick up 0, 1 or 2 words from SP+offset. ++ // get address of the stack lock ++ __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); ++ // get old displaced header ++ __ ld(old_hdr, Address(x10, 0)); + -+ assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), -+ "scrambled load targets?"); -+ // Load in argument order going down. -+ int ld_off = (total_args_passed - i - 1) * Interpreter::stackElementSize; -+ // Point to interpreter value (vs. tag) -+ int next_off = ld_off - Interpreter::stackElementSize; ++ // Atomic swap old header if oop still contains the stack lock ++ Label succeed; ++ __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock); ++ __ bind(succeed); + -+ VMReg r_1 = regs[i].first(); -+ VMReg r_2 = regs[i].second(); -+ if (!r_1->is_valid()) { -+ assert(!r_2->is_valid(), ""); -+ continue; ++ // slow path re-enters here ++ __ bind(unlock_done); ++ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { ++ restore_native_result(masm, ret_type, stack_slots); + } -+ if (r_1->is_stack()) { -+ // Convert stack slot to an SP offset (+ wordSize to account for return address ) -+ int st_off = regs[i].first()->reg2stack() * VMRegImpl::stack_slot_size; -+ if (!r_2->is_valid()) { -+ __ lw(t0, Address(esp, ld_off)); -+ __ sd(t0, Address(sp, st_off), /*temp register*/t2); -+ } else { -+ // -+ // We are using two optoregs. This can be either T_OBJECT, -+ // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates -+ // two slots but only uses one for thr T_LONG or T_DOUBLE case -+ // So we must adjust where to pick up the data to match the -+ // interpreter. -+ // -+ // Interpreter local[n] == MSW, local[n+1] == LSW however locals -+ // are accessed as negative so LSW is at LOW address -+ -+ // ld_off is MSW so get LSW -+ const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? -+ next_off : ld_off; -+ __ ld(t0, Address(esp, offset)); -+ // st_off is LSW (i.e. reg.first()) -+ __ sd(t0, Address(sp, st_off), /*temp register*/t2); -+ } -+ } else if (r_1->is_Register()) { // Register argument -+ Register r = r_1->as_Register(); -+ if (r_2->is_valid()) { -+ // -+ // We are using two VMRegs. This can be either T_OBJECT, -+ // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates -+ // two slots but only uses one for thr T_LONG or T_DOUBLE case -+ // So we must adjust where to pick up the data to match the -+ // interpreter. + -+ const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? -+ next_off : ld_off; ++ __ bind(done); ++ } + -+ // this can be a misaligned move -+ __ ld(r, Address(esp, offset)); -+ } else { -+ // sign extend and use a full word? -+ __ lw(r, Address(esp, ld_off)); -+ } -+ } else { -+ if (!r_2->is_valid()) { -+ __ flw(r_1->as_FloatRegister(), Address(esp, ld_off)); -+ } else { -+ __ fld(r_1->as_FloatRegister(), Address(esp, next_off)); -+ } -+ } ++ Label dtrace_method_exit, dtrace_method_exit_done; ++ { ++ int32_t offset = 0; ++ __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset); ++ __ lbu(t0, Address(t0, offset)); ++ __ bnez(t0, dtrace_method_exit); ++ __ bind(dtrace_method_exit_done); + } + -+ // 6243940 We might end up in handle_wrong_method if -+ // the callee is deoptimized as we race thru here. If that -+ // happens we don't want to take a safepoint because the -+ // caller frame will look interpreted and arguments are now -+ // "compiled" so it is much better to make this transition -+ // invisible to the stack walking code. Unfortunately if -+ // we try and find the callee by normal means a safepoint -+ // is possible. So we stash the desired callee in the thread -+ // and the vm will find there should this case occur. ++ __ reset_last_Java_frame(false); + -+ __ sd(xmethod, Address(xthread, JavaThread::callee_target_offset())); ++ // Unbox oop result, e.g. JNIHandles::resolve result. ++ if (is_reference_type(ret_type)) { ++ __ resolve_jobject(x10, xthread, t1); ++ } + -+ __ jr(t1); -+} ++ if (CheckJNICalls) { ++ // clear_pending_jni_exception_check ++ __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset())); ++ } + -+// --------------------------------------------------------------- -+AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, -+ int total_args_passed, -+ int comp_args_on_stack, -+ const BasicType *sig_bt, -+ const VMRegPair *regs, -+ AdapterFingerPrint* fingerprint) { -+ address i2c_entry = __ pc(); -+ gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); ++ // reset handle block ++ __ ld(x12, Address(xthread, JavaThread::active_handles_offset())); ++ __ sd(zr, Address(x12, JNIHandleBlock::top_offset_in_bytes())); + -+ address c2i_unverified_entry = __ pc(); -+ Label skip_fixup; ++ __ leave(); + -+ Label ok; ++ // Any exception pending? ++ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); ++ __ bnez(t0, exception_pending); + -+ const Register holder = t1; -+ const Register receiver = j_rarg0; -+ const Register tmp = t2; // A call-clobbered register not used for arg passing ++ // We're done ++ __ ret(); + -+ // ------------------------------------------------------------------------- -+ // Generate a C2I adapter. On entry we know xmethod holds the Method* during calls -+ // to the interpreter. The args start out packed in the compiled layout. They -+ // need to be unpacked into the interpreter layout. This will almost always -+ // require some stack space. We grow the current (compiled) stack, then repack -+ // the args. We finally end in a jump to the generic interpreter entry point. -+ // On exit from the interpreter, the interpreter will restore our SP (lest the -+ // compiled code, which relys solely on SP and not FP, get sick). ++ // Unexpected paths are out of line and go here + -+ { -+ __ block_comment("c2i_unverified_entry {"); -+ __ load_klass(t0, receiver); -+ __ ld(tmp, Address(holder, CompiledICHolder::holder_klass_offset())); -+ __ ld(xmethod, Address(holder, CompiledICHolder::holder_metadata_offset())); -+ __ beq(t0, tmp, ok); -+ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); ++ // forward the exception ++ __ bind(exception_pending); + -+ __ bind(ok); -+ // Method might have been compiled since the call site was patched to -+ // interpreted; if that is the case treat it as a miss so we can get -+ // the call site corrected. -+ __ ld(t0, Address(xmethod, in_bytes(Method::code_offset()))); -+ __ beqz(t0, skip_fixup); -+ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); -+ __ block_comment("} c2i_unverified_entry"); -+ } ++ // and forward the exception ++ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + -+ address c2i_entry = __ pc(); ++ // Slow path locking & unlocking ++ if (method->is_synchronized()) { + -+ // Class initialization barrier for static methods -+ address c2i_no_clinit_check_entry = NULL; -+ if (VM_Version::supports_fast_class_init_checks()) { -+ Label L_skip_barrier; ++ __ block_comment("Slow path lock {"); ++ __ bind(slow_path_lock); + -+ { // Bypass the barrier for non-static methods -+ __ lwu(t0, Address(xmethod, Method::access_flags_offset())); -+ __ andi(t1, t0, JVM_ACC_STATIC); -+ __ beqz(t1, L_skip_barrier); // non-static -+ } ++ // has last_Java_frame setup. No exceptions so do vanilla call not call_VM ++ // args are (oop obj, BasicLock* lock, JavaThread* thread) + -+ __ load_method_holder(t1, xmethod); -+ __ clinit_barrier(t1, t0, &L_skip_barrier); -+ __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); ++ // protect the args we've loaded ++ save_args(masm, total_c_args, c_arg, out_regs); + -+ __ bind(L_skip_barrier); -+ c2i_no_clinit_check_entry = __ pc(); -+ } ++ __ mv(c_rarg0, obj_reg); ++ __ mv(c_rarg1, lock_reg); ++ __ mv(c_rarg2, xthread); + -+ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ bs->c2i_entry_barrier(masm); ++ // Not a leaf but we have last_Java_frame setup as we want ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3); ++ restore_args(masm, total_c_args, c_arg, out_regs); + -+ gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); ++#ifdef ASSERT ++ { Label L; ++ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); ++ __ beqz(t0, L); ++ __ stop("no pending exception allowed on exit from monitorenter"); ++ __ bind(L); ++ } ++#endif ++ __ j(lock_done); + -+ __ flush(); -+ return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry); -+} ++ __ block_comment("} Slow path lock"); + -+int SharedRuntime::vector_calling_convention(VMRegPair *regs, -+ uint num_bits, -+ uint total_args_passed) { -+ Unimplemented(); -+ return 0; -+} ++ __ block_comment("Slow path unlock {"); ++ __ bind(slow_path_unlock); + -+int SharedRuntime::c_calling_convention(const BasicType *sig_bt, -+ VMRegPair *regs, -+ VMRegPair *regs2, -+ int total_args_passed) { -+ assert(regs2 == NULL, "not needed on riscv"); ++ if (ret_type == T_FLOAT || ret_type == T_DOUBLE) { ++ save_native_result(masm, ret_type, stack_slots); ++ } + -+ // We return the amount of VMRegImpl stack slots we need to reserve for all -+ // the arguments NOT counting out_preserve_stack_slots. ++ __ mv(c_rarg2, xthread); ++ __ la(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); ++ __ mv(c_rarg0, obj_reg); + -+ static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { -+ c_rarg0, c_rarg1, c_rarg2, c_rarg3, -+ c_rarg4, c_rarg5, c_rarg6, c_rarg7 -+ }; -+ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_c] = { -+ c_farg0, c_farg1, c_farg2, c_farg3, -+ c_farg4, c_farg5, c_farg6, c_farg7 -+ }; ++ // Save pending exception around call to VM (which contains an EXCEPTION_MARK) ++ // NOTE that obj_reg == x9 currently ++ __ ld(x9, Address(xthread, in_bytes(Thread::pending_exception_offset()))); ++ __ sd(zr, Address(xthread, in_bytes(Thread::pending_exception_offset()))); + -+ uint int_args = 0; -+ uint fp_args = 0; -+ uint stk_args = 0; // inc by 2 each time ++ rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)); + -+ for (int i = 0; i < total_args_passed; i++) { -+ switch (sig_bt[i]) { -+ case T_BOOLEAN: // fall through -+ case T_CHAR: // fall through -+ case T_BYTE: // fall through -+ case T_SHORT: // fall through -+ case T_INT: -+ if (int_args < Argument::n_int_register_parameters_c) { -+ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); -+ } else { -+ regs[i].set1(VMRegImpl::stack2reg(stk_args)); -+ stk_args += 2; -+ } -+ break; -+ case T_LONG: // fall through -+ assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); -+ case T_OBJECT: // fall through -+ case T_ARRAY: // fall through -+ case T_ADDRESS: // fall through -+ case T_METADATA: -+ if (int_args < Argument::n_int_register_parameters_c) { -+ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); -+ } else { -+ regs[i].set2(VMRegImpl::stack2reg(stk_args)); -+ stk_args += 2; -+ } -+ break; -+ case T_FLOAT: -+ if (fp_args < Argument::n_float_register_parameters_c) { -+ regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); -+ } else if (int_args < Argument::n_int_register_parameters_c) { -+ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); -+ } else { -+ regs[i].set1(VMRegImpl::stack2reg(stk_args)); -+ stk_args += 2; -+ } -+ break; -+ case T_DOUBLE: -+ assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); -+ if (fp_args < Argument::n_float_register_parameters_c) { -+ regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); -+ } else if (int_args < Argument::n_int_register_parameters_c) { -+ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); -+ } else { -+ regs[i].set2(VMRegImpl::stack2reg(stk_args)); -+ stk_args += 2; -+ } -+ break; -+ case T_VOID: // Halves of longs and doubles -+ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); -+ regs[i].set_bad(); -+ break; -+ default: -+ ShouldNotReachHere(); ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); ++ __ beqz(t0, L); ++ __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); ++ __ bind(L); + } -+ } ++#endif /* ASSERT */ + -+ return stk_args; -+} ++ __ sd(x9, Address(xthread, in_bytes(Thread::pending_exception_offset()))); + -+// On 64 bit we will store integer like items to the stack as -+// 64 bits items (riscv64 abi) even though java would only store -+// 32bits for a parameter. On 32bit it will simply be 32 bits -+// So this routine will do 32->32 on 32bit and 32->64 on 64bit -+static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { -+ assert_cond(masm != NULL); -+ if (src.first()->is_stack()) { -+ if (dst.first()->is_stack()) { -+ // stack to stack -+ __ ld(t0, Address(fp, reg2offset_in(src.first()))); -+ __ sd(t0, Address(sp, reg2offset_out(dst.first()))); -+ } else { -+ // stack to reg -+ __ lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); -+ } -+ } else if (dst.first()->is_stack()) { -+ // reg to stack -+ __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); -+ } else { -+ if (dst.first() != src.first()) { -+ // 32bits extend sign -+ __ addw(dst.first()->as_Register(), src.first()->as_Register(), zr); ++ if (ret_type == T_FLOAT || ret_type == T_DOUBLE) { ++ restore_native_result(masm, ret_type, stack_slots); + } -+ } -+} -+ -+// An oop arg. Must pass a handle not the oop itself -+static void object_move(MacroAssembler* masm, -+ OopMap* map, -+ int oop_handle_offset, -+ int framesize_in_slots, -+ VMRegPair src, -+ VMRegPair dst, -+ bool is_receiver, -+ int* receiver_offset) { -+ assert_cond(masm != NULL && map != NULL && receiver_offset != NULL); -+ // must pass a handle. First figure out the location we use as a handle -+ Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register(); -+ -+ // See if oop is NULL if it is we need no handle ++ __ j(unlock_done); + -+ if (src.first()->is_stack()) { ++ __ block_comment("} Slow path unlock"); + -+ // Oop is already on the stack as an argument -+ int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); -+ map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); -+ if (is_receiver) { -+ *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; -+ } ++ } // synchronized + -+ __ ld(t0, Address(fp, reg2offset_in(src.first()))); -+ __ la(rHandle, Address(fp, reg2offset_in(src.first()))); -+ // conditionally move a NULL -+ Label notZero1; -+ __ bnez(t0, notZero1); -+ __ mv(rHandle, zr); -+ __ bind(notZero1); -+ } else { ++ // SLOW PATH Reguard the stack if needed + -+ // Oop is in an a register we must store it to the space we reserve -+ // on the stack for oop_handles and pass a handle if oop is non-NULL ++ __ bind(reguard); ++ save_native_result(masm, ret_type, stack_slots); ++ rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); ++ restore_native_result(masm, ret_type, stack_slots); ++ // and continue ++ __ j(reguard_done); + -+ const Register rOop = src.first()->as_Register(); -+ int oop_slot = -1; -+ if (rOop == j_rarg0) { -+ oop_slot = 0; -+ } else if (rOop == j_rarg1) { -+ oop_slot = 1; -+ } else if (rOop == j_rarg2) { -+ oop_slot = 2; -+ } else if (rOop == j_rarg3) { -+ oop_slot = 3; -+ } else if (rOop == j_rarg4) { -+ oop_slot = 4; -+ } else if (rOop == j_rarg5) { -+ oop_slot = 5; -+ } else if (rOop == j_rarg6) { -+ oop_slot = 6; -+ } else { -+ assert(rOop == j_rarg7, "wrong register"); -+ oop_slot = 7; -+ } ++ // SLOW PATH safepoint ++ { ++ __ block_comment("safepoint {"); ++ __ bind(safepoint_in_progress); + -+ oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset; -+ int offset = oop_slot * VMRegImpl::stack_slot_size; ++ // Don't use call_VM as it will see a possible pending exception and forward it ++ // and never return here preventing us from clearing _last_native_pc down below. ++ // ++ save_native_result(masm, ret_type, stack_slots); ++ __ mv(c_rarg0, xthread); ++#ifndef PRODUCT ++ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); ++#endif ++ int32_t offset = 0; ++ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), offset); ++ __ jalr(x1, t0, offset); + -+ map->set_oop(VMRegImpl::stack2reg(oop_slot)); -+ // Store oop in handle area, may be NULL -+ __ sd(rOop, Address(sp, offset)); -+ if (is_receiver) { -+ *receiver_offset = offset; -+ } ++ // Restore any method result value ++ restore_native_result(masm, ret_type, stack_slots); + -+ //rOop maybe the same as rHandle -+ if (rOop == rHandle) { -+ Label isZero; -+ __ beqz(rOop, isZero); -+ __ la(rHandle, Address(sp, offset)); -+ __ bind(isZero); -+ } else { -+ Label notZero2; -+ __ la(rHandle, Address(sp, offset)); -+ __ bnez(rOop, notZero2); -+ __ mv(rHandle, zr); -+ __ bind(notZero2); -+ } ++ __ j(safepoint_in_progress_done); ++ __ block_comment("} safepoint"); + } + -+ // If arg is on the stack then place it otherwise it is already in correct reg. -+ if (dst.first()->is_stack()) { -+ __ sd(rHandle, Address(sp, reg2offset_out(dst.first()))); -+ } -+} ++ // SLOW PATH dtrace support ++ { ++ __ block_comment("dtrace entry {"); ++ __ bind(dtrace_method_entry); + -+// A float arg may have to do float reg int reg conversion -+static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { -+ assert(src.first()->is_stack() && dst.first()->is_stack() || -+ src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); -+ assert_cond(masm != NULL); -+ if (src.first()->is_stack()) { -+ if (dst.first()->is_stack()) { -+ __ lwu(t0, Address(fp, reg2offset_in(src.first()))); -+ __ sw(t0, Address(sp, reg2offset_out(dst.first()))); -+ } else if (dst.first()->is_Register()) { -+ __ lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); -+ } else { -+ ShouldNotReachHere(); -+ } -+ } else if (src.first() != dst.first()) { -+ if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { -+ __ fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); -+ } else { -+ ShouldNotReachHere(); -+ } -+ } -+} -+ -+// A long move -+static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { -+ assert_cond(masm != NULL); -+ if (src.first()->is_stack()) { -+ if (dst.first()->is_stack()) { -+ // stack to stack -+ __ ld(t0, Address(fp, reg2offset_in(src.first()))); -+ __ sd(t0, Address(sp, reg2offset_out(dst.first()))); -+ } else { -+ // stack to reg -+ __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); -+ } -+ } else if (dst.first()->is_stack()) { -+ // reg to stack -+ __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); -+ } else { -+ if (dst.first() != src.first()) { -+ __ mv(dst.first()->as_Register(), src.first()->as_Register()); -+ } -+ } -+} -+ -+// A double move -+static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { -+ assert(src.first()->is_stack() && dst.first()->is_stack() || -+ src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); -+ assert_cond(masm != NULL); -+ if (src.first()->is_stack()) { -+ if (dst.first()->is_stack()) { -+ __ ld(t0, Address(fp, reg2offset_in(src.first()))); -+ __ sd(t0, Address(sp, reg2offset_out(dst.first()))); -+ } else if (dst.first()-> is_Register()) { -+ __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); -+ } else { -+ ShouldNotReachHere(); -+ } -+ } else if (src.first() != dst.first()) { -+ if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { -+ __ fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); -+ } else { -+ ShouldNotReachHere(); -+ } -+ } -+} ++ // We have all of the arguments setup at this point. We must not touch any register ++ // argument registers at this point (what if we save/restore them there are no oop? + -+void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { -+ assert_cond(masm != NULL); -+ // We always ignore the frame_slots arg and just use the space just below frame pointer -+ // which by this time is free to use -+ switch (ret_type) { -+ case T_FLOAT: -+ __ fsw(f10, Address(fp, -3 * wordSize)); -+ break; -+ case T_DOUBLE: -+ __ fsd(f10, Address(fp, -3 * wordSize)); -+ break; -+ case T_VOID: break; -+ default: { -+ __ sd(x10, Address(fp, -3 * wordSize)); -+ } ++ save_args(masm, total_c_args, c_arg, out_regs); ++ __ mov_metadata(c_rarg1, method()); ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), ++ xthread, c_rarg1); ++ restore_args(masm, total_c_args, c_arg, out_regs); ++ __ j(dtrace_method_entry_done); ++ __ block_comment("} dtrace entry"); + } -+} + -+void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { -+ assert_cond(masm != NULL); -+ // We always ignore the frame_slots arg and just use the space just below frame pointer -+ // which by this time is free to use -+ switch (ret_type) { -+ case T_FLOAT: -+ __ flw(f10, Address(fp, -3 * wordSize)); -+ break; -+ case T_DOUBLE: -+ __ fld(f10, Address(fp, -3 * wordSize)); -+ break; -+ case T_VOID: break; -+ default: { -+ __ ld(x10, Address(fp, -3 * wordSize)); -+ } ++ { ++ __ block_comment("dtrace exit {"); ++ __ bind(dtrace_method_exit); ++ save_native_result(masm, ret_type, stack_slots); ++ __ mov_metadata(c_rarg1, method()); ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), ++ xthread, c_rarg1); ++ restore_native_result(masm, ret_type, stack_slots); ++ __ j(dtrace_method_exit_done); ++ __ block_comment("} dtrace exit"); + } -+} + -+static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { -+ assert_cond(masm != NULL && args != NULL); -+ RegSet x; -+ for ( int i = first_arg ; i < arg_count ; i++ ) { -+ if (args[i].first()->is_Register()) { -+ x = x + args[i].first()->as_Register(); -+ } else if (args[i].first()->is_FloatRegister()) { -+ __ addi(sp, sp, -2 * wordSize); -+ __ fsd(args[i].first()->as_FloatRegister(), Address(sp, 0)); -+ } -+ } -+ __ push_reg(x, sp); -+} ++ __ flush(); + -+static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { -+ assert_cond(masm != NULL && args != NULL); -+ RegSet x; -+ for ( int i = first_arg ; i < arg_count ; i++ ) { -+ if (args[i].first()->is_Register()) { -+ x = x + args[i].first()->as_Register(); -+ } else { -+ ; -+ } -+ } -+ __ pop_reg(x, sp); -+ for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { -+ if (args[i].first()->is_Register()) { -+ ; -+ } else if (args[i].first()->is_FloatRegister()) { -+ __ fld(args[i].first()->as_FloatRegister(), Address(sp, 0)); -+ __ add(sp, sp, 2 * wordSize); -+ } -+ } ++ nmethod *nm = nmethod::new_native_nmethod(method, ++ compile_id, ++ masm->code(), ++ vep_offset, ++ frame_complete, ++ stack_slots / VMRegImpl::slots_per_word, ++ (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), ++ in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), ++ oop_maps); ++ assert(nm != NULL, "create native nmethod fail!"); ++ return nm; +} + -+static void rt_call(MacroAssembler* masm, address dest) { -+ assert_cond(masm != NULL); -+ CodeBlob *cb = CodeCache::find_blob(dest); -+ if (cb) { -+ __ far_call(RuntimeAddress(dest)); -+ } else { -+ int32_t offset = 0; -+ __ la_patchable(t0, RuntimeAddress(dest), offset); -+ __ jalr(x1, t0, offset); ++// this function returns the adjust size (in number of words) to a c2i adapter ++// activation for use during deoptimization ++int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { ++ assert(callee_locals >= callee_parameters, ++ "test and remove; got more parms than locals"); ++ if (callee_locals < callee_parameters) { ++ return 0; // No adjustment for negative locals + } ++ int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords; ++ // diff is counted in stack words ++ return align_up(diff, 2); +} + -+static void verify_oop_args(MacroAssembler* masm, -+ const methodHandle& method, -+ const BasicType* sig_bt, -+ const VMRegPair* regs) { -+ const Register temp_reg = x9; // not part of any compiled calling seq -+ if (VerifyOops) { -+ for (int i = 0; i < method->size_of_parameters(); i++) { -+ if (sig_bt[i] == T_OBJECT || -+ sig_bt[i] == T_ARRAY) { -+ VMReg r = regs[i].first(); -+ assert(r->is_valid(), "bad oop arg"); -+ if (r->is_stack()) { -+ __ ld(temp_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size)); -+ __ verify_oop(temp_reg); -+ } else { -+ __ verify_oop(r->as_Register()); -+ } -+ } -+ } -+ } -+} ++//------------------------------generate_deopt_blob---------------------------- ++void SharedRuntime::generate_deopt_blob() { ++ // Allocate space for the code ++ ResourceMark rm; ++ // Setup code generation tools ++ int pad = 0; ++ CodeBuffer buffer("deopt_blob", 2048 + pad, 1024); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ int frame_size_in_words = -1; ++ OopMap* map = NULL; ++ OopMapSet *oop_maps = new OopMapSet(); ++ assert_cond(masm != NULL && oop_maps != NULL); ++ RegisterSaver reg_saver; + -+static void gen_special_dispatch(MacroAssembler* masm, -+ const methodHandle& method, -+ const BasicType* sig_bt, -+ const VMRegPair* regs) { -+ verify_oop_args(masm, method, sig_bt, regs); -+ vmIntrinsics::ID iid = method->intrinsic_id(); ++ // ------------- ++ // This code enters when returning to a de-optimized nmethod. A return ++ // address has been pushed on the the stack, and return values are in ++ // registers. ++ // If we are doing a normal deopt then we were called from the patched ++ // nmethod from the point we returned to the nmethod. So the return ++ // address on the stack is wrong by NativeCall::instruction_size ++ // We will adjust the value so it looks like we have the original return ++ // address on the stack (like when we eagerly deoptimized). ++ // In the case of an exception pending when deoptimizing, we enter ++ // with a return address on the stack that points after the call we patched ++ // into the exception handler. We have the following register state from, ++ // e.g., the forward exception stub (see stubGenerator_riscv.cpp). ++ // x10: exception oop ++ // x9: exception handler ++ // x13: throwing pc ++ // So in this case we simply jam x13 into the useless return address and ++ // the stack looks just like we want. ++ // ++ // At this point we need to de-opt. We save the argument return ++ // registers. We call the first C routine, fetch_unroll_info(). This ++ // routine captures the return values and returns a structure which ++ // describes the current frame size and the sizes of all replacement frames. ++ // The current frame is compiled code and may contain many inlined ++ // functions, each with their own JVM state. We pop the current frame, then ++ // push all the new frames. Then we call the C routine unpack_frames() to ++ // populate these frames. Finally unpack_frames() returns us the new target ++ // address. Notice that callee-save registers are BLOWN here; they have ++ // already been captured in the vframeArray at the time the return PC was ++ // patched. ++ address start = __ pc(); ++ Label cont; + -+ // Now write the args into the outgoing interpreter space -+ bool has_receiver = false; -+ Register receiver_reg = noreg; -+ int member_arg_pos = -1; -+ Register member_reg = noreg; -+ int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); -+ if (ref_kind != 0) { -+ member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument -+ member_reg = x9; // known to be free at this point -+ has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); -+ } else if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) { -+ has_receiver = true; -+ } else { -+ fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid)); -+ } ++ // Prolog for non exception case! + -+ if (member_reg != noreg) { -+ // Load the member_arg into register, if necessary. -+ SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); -+ VMReg r = regs[member_arg_pos].first(); -+ if (r->is_stack()) { -+ __ ld(member_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size)); -+ } else { -+ // no data motion is needed -+ member_reg = r->as_Register(); -+ } -+ } ++ // Save everything in sight. ++ map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); + -+ if (has_receiver) { -+ // Make sure the receiver is loaded into a register. -+ assert(method->size_of_parameters() > 0, "oob"); -+ assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); -+ VMReg r = regs[0].first(); -+ assert(r->is_valid(), "bad receiver arg"); -+ if (r->is_stack()) { -+ // Porting note: This assumes that compiled calling conventions always -+ // pass the receiver oop in a register. If this is not true on some -+ // platform, pick a temp and load the receiver from stack. -+ fatal("receiver always in a register"); -+ receiver_reg = x12; // known to be free at this point -+ __ ld(receiver_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size)); -+ } else { -+ // no data motion is needed -+ receiver_reg = r->as_Register(); -+ } -+ } ++ // Normal deoptimization. Save exec mode for unpack_frames. ++ __ mvw(xcpool, Deoptimization::Unpack_deopt); // callee-saved ++ __ j(cont); + -+ // Figure out which address we are really jumping to: -+ MethodHandles::generate_method_handle_dispatch(masm, iid, -+ receiver_reg, member_reg, /*for_compiler_entry:*/ true); -+} ++ int reexecute_offset = __ pc() - start; + -+// --------------------------------------------------------------------------- -+// Generate a native wrapper for a given method. The method takes arguments -+// in the Java compiled code convention, marshals them to the native -+// convention (handlizes oops, etc), transitions to native, makes the call, -+// returns to java state (possibly blocking), unhandlizes any result and -+// returns. -+// -+// Critical native functions are a shorthand for the use of -+// GetPrimtiveArrayCritical and disallow the use of any other JNI -+// functions. The wrapper is expected to unpack the arguments before -+// passing them to the callee and perform checks before and after the -+// native call to ensure that they GCLocker -+// lock_critical/unlock_critical semantics are followed. Some other -+// parts of JNI setup are skipped like the tear down of the JNI handle -+// block and the check for pending exceptions it's impossible for them -+// to be thrown. -+// -+// They are roughly structured like this: -+// if (GCLocker::needs_gc()) SharedRuntime::block_for_jni_critical() -+// tranistion to thread_in_native -+// unpack arrray arguments and call native entry point -+// check for safepoint in progress -+// check if any thread suspend flags are set -+// call into JVM and possible unlock the JNI critical -+// if a GC was suppressed while in the critical native. -+// transition back to thread_in_Java -+// return to caller -+// -+nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, -+ const methodHandle& method, -+ int compile_id, -+ BasicType* in_sig_bt, -+ VMRegPair* in_regs, -+ BasicType ret_type) { -+ if (method->is_method_handle_intrinsic()) { -+ vmIntrinsics::ID iid = method->intrinsic_id(); -+ intptr_t start = (intptr_t)__ pc(); -+ int vep_offset = ((intptr_t)__ pc()) - start; ++ // Reexecute case ++ // return address is the pc describes what bci to do re-execute at + -+ // First instruction must be a nop as it may need to be patched on deoptimisation -+ __ nop(); -+ gen_special_dispatch(masm, -+ method, -+ in_sig_bt, -+ in_regs); -+ int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period -+ __ flush(); -+ int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually -+ return nmethod::new_native_nmethod(method, -+ compile_id, -+ masm->code(), -+ vep_offset, -+ frame_complete, -+ stack_slots / VMRegImpl::slots_per_word, -+ in_ByteSize(-1), -+ in_ByteSize(-1), -+ (OopMapSet*)NULL); -+ } -+ address native_func = method->native_function(); -+ assert(native_func != NULL, "must have function"); ++ // No need to update map as each call to save_live_registers will produce identical oopmap ++ (void) reg_saver.save_live_registers(masm, 0, &frame_size_in_words); + -+ // An OopMap for lock (and class if static) -+ OopMapSet *oop_maps = new OopMapSet(); -+ assert_cond(oop_maps != NULL); -+ intptr_t start = (intptr_t)__ pc(); ++ __ mvw(xcpool, Deoptimization::Unpack_reexecute); // callee-saved ++ __ j(cont); + -+ // We have received a description of where all the java arg are located -+ // on entry to the wrapper. We need to convert these args to where -+ // the jni function will expect them. To figure out where they go -+ // we convert the java signature to a C signature by inserting -+ // the hidden arguments as arg[0] and possibly arg[1] (static method) ++ int exception_offset = __ pc() - start; + -+ const int total_in_args = method->size_of_parameters(); -+ int total_c_args = total_in_args + (method->is_static() ? 2 : 1); ++ // Prolog for exception case + -+ BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); -+ VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); -+ BasicType* in_elem_bt = NULL; ++ // all registers are dead at this entry point, except for x10, and ++ // x13 which contain the exception oop and exception pc ++ // respectively. Set them in TLS and fall thru to the ++ // unpack_with_exception_in_tls entry point. + -+ int argc = 0; -+ out_sig_bt[argc++] = T_ADDRESS; -+ if (method->is_static()) { -+ out_sig_bt[argc++] = T_OBJECT; -+ } ++ __ sd(x13, Address(xthread, JavaThread::exception_pc_offset())); ++ __ sd(x10, Address(xthread, JavaThread::exception_oop_offset())); + -+ for (int i = 0; i < total_in_args ; i++) { -+ out_sig_bt[argc++] = in_sig_bt[i]; -+ } ++ int exception_in_tls_offset = __ pc() - start; + -+ // Now figure out where the args must be stored and how much stack space -+ // they require. -+ int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); ++ // new implementation because exception oop is now passed in JavaThread + -+ // Compute framesize for the wrapper. We need to handlize all oops in -+ // incoming registers ++ // Prolog for exception case ++ // All registers must be preserved because they might be used by LinearScan ++ // Exceptiop oop and throwing PC are passed in JavaThread ++ // tos: stack at point of call to method that threw the exception (i.e. only ++ // args are on the stack, no return address) + -+ // Calculate the total number of stack slots we will need. ++ // The return address pushed by save_live_registers will be patched ++ // later with the throwing pc. The correct value is not available ++ // now because loading it from memory would destroy registers. + -+ // First count the abi requirement plus all of the outgoing args -+ int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; ++ // NB: The SP at this point must be the SP of the method that is ++ // being deoptimized. Deoptimization assumes that the frame created ++ // here by save_live_registers is immediately below the method's SP. ++ // This is a somewhat fragile mechanism. + -+ // Now the space for the inbound oop handle area -+ int total_save_slots = 8 * VMRegImpl::slots_per_word; // 8 arguments passed in registers ++ // Save everything in sight. ++ map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); + -+ int oop_handle_offset = stack_slots; -+ stack_slots += total_save_slots; ++ // Now it is safe to overwrite any register + -+ // Now any space we need for handlizing a klass if static method ++ // Deopt during an exception. Save exec mode for unpack_frames. ++ __ li(xcpool, Deoptimization::Unpack_exception); // callee-saved + -+ int klass_slot_offset = 0; -+ int klass_offset = -1; -+ int lock_slot_offset = 0; -+ bool is_static = false; ++ // load throwing pc from JavaThread and patch it as the return address ++ // of the current frame. Then clear the field in JavaThread + -+ if (method->is_static()) { -+ klass_slot_offset = stack_slots; -+ stack_slots += VMRegImpl::slots_per_word; -+ klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; -+ is_static = true; -+ } ++ __ ld(x13, Address(xthread, JavaThread::exception_pc_offset())); ++ __ sd(x13, Address(fp, frame::return_addr_offset * wordSize)); ++ __ sd(zr, Address(xthread, JavaThread::exception_pc_offset())); + -+ // Plus a lock if needed ++#ifdef ASSERT ++ // verify that there is really an exception oop in JavaThread ++ __ ld(x10, Address(xthread, JavaThread::exception_oop_offset())); ++ __ verify_oop(x10); + -+ if (method->is_synchronized()) { -+ lock_slot_offset = stack_slots; -+ stack_slots += VMRegImpl::slots_per_word; -+ } ++ // verify that there is no pending exception ++ Label no_pending_exception; ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ beqz(t0, no_pending_exception); ++ __ stop("must not have pending exception here"); ++ __ bind(no_pending_exception); ++#endif + -+ // Now a place (+2) to save return values or temp during shuffling -+ // + 4 for return address (which we own) and saved fp -+ stack_slots += 6; ++ __ bind(cont); + -+ // Ok The space we have allocated will look like: -+ // -+ // -+ // FP-> | | -+ // | 2 slots (ra) | -+ // | 2 slots (fp) | -+ // |---------------------| -+ // | 2 slots for moves | -+ // |---------------------| -+ // | lock box (if sync) | -+ // |---------------------| <- lock_slot_offset -+ // | klass (if static) | -+ // |---------------------| <- klass_slot_offset -+ // | oopHandle area | -+ // |---------------------| <- oop_handle_offset (8 java arg registers) -+ // | outbound memory | -+ // | based arguments | -+ // | | -+ // |---------------------| -+ // | | -+ // SP-> | out_preserved_slots | -+ // ++ // Call C code. Need thread and this frame, but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. + // ++ // UnrollBlock* fetch_unroll_info(JavaThread* thread) + ++ // fetch_unroll_info needs to call last_java_frame(). + -+ // Now compute actual number of stack words we need rounding to make -+ // stack properly aligned. -+ stack_slots = align_up(stack_slots, StackAlignmentInSlots); -+ -+ int stack_size = stack_slots * VMRegImpl::stack_slot_size; -+ -+ // First thing make an ic check to see if we should even be here -+ -+ // We are free to use all registers as temps without saving them and -+ // restoring them except fp. fp is the only callee save register -+ // as far as the interpreter and the compiler(s) are concerned. -+ -+ -+ const Register ic_reg = t1; -+ const Register receiver = j_rarg0; -+ -+ Label hit; -+ Label exception_pending; -+ -+ assert_different_registers(ic_reg, receiver, t0); -+ __ verify_oop(receiver); -+ __ cmp_klass(receiver, ic_reg, t0, hit); -+ -+ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); -+ -+ // Verified entry point must be aligned -+ __ align(8); -+ -+ __ bind(hit); -+ -+ int vep_offset = ((intptr_t)__ pc()) - start; ++ Label retaddr; ++ __ set_last_Java_frame(sp, noreg, retaddr, t0); ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld(t0, Address(xthread, ++ JavaThread::last_Java_fp_offset())); ++ __ beqz(t0, L); ++ __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared"); ++ __ bind(L); ++ } ++#endif // ASSERT ++ __ mv(c_rarg0, xthread); ++ __ mv(c_rarg1, xcpool); ++ int32_t offset = 0; ++ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)), offset); ++ __ jalr(x1, t0, offset); ++ __ bind(retaddr); + -+ // If we have to make this method not-entrant we'll overwrite its -+ // first instruction with a jump. -+ __ nop(); ++ // Need to have an oopmap that tells fetch_unroll_info where to ++ // find any register it might need. ++ oop_maps->add_gc_map(__ pc() - start, map); + -+ if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) { -+ Label L_skip_barrier; -+ __ mov_metadata(t1, method->method_holder()); // InstanceKlass* -+ __ clinit_barrier(t1, t0, &L_skip_barrier); -+ __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); ++ __ reset_last_Java_frame(false); + -+ __ bind(L_skip_barrier); -+ } ++ // Load UnrollBlock* into x15 ++ __ mv(x15, x10); + -+ // Generate stack overflow check -+ __ bang_stack_with_offset(checked_cast(StackOverflow::stack_shadow_zone_size())); ++ __ lwu(xcpool, Address(x15, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); ++ Label noException; ++ __ li(t0, Deoptimization::Unpack_exception); ++ __ bne(xcpool, t0, noException); // Was exception pending? ++ __ ld(x10, Address(xthread, JavaThread::exception_oop_offset())); ++ __ ld(x13, Address(xthread, JavaThread::exception_pc_offset())); ++ __ sd(zr, Address(xthread, JavaThread::exception_oop_offset())); ++ __ sd(zr, Address(xthread, JavaThread::exception_pc_offset())); + -+ // Generate a new frame for the wrapper. -+ __ enter(); -+ // -2 because return address is already present and so is saved fp -+ __ sub(sp, sp, stack_size - 2 * wordSize); ++ __ verify_oop(x10); + -+ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ assert_cond(bs != NULL); -+ bs->nmethod_entry_barrier(masm); ++ // Overwrite the result registers with the exception results. ++ __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10))); + -+ // Frame is now completed as far as size and linkage. -+ int frame_complete = ((intptr_t)__ pc()) - start; ++ __ bind(noException); + -+ // We use x18 as the oop handle for the receiver/klass -+ // It is callee save so it survives the call to native ++ // Only register save data is on the stack. ++ // Now restore the result registers. Everything else is either dead ++ // or captured in the vframeArray. + -+ const Register oop_handle_reg = x18; ++ // Restore fp result register ++ __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10))); ++ // Restore integer result register ++ __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10))); + -+ // -+ // We immediately shuffle the arguments so that any vm call we have to -+ // make from here on out (sync slow path, jvmti, etc.) we will have -+ // captured the oops from our caller and have a valid oopMap for -+ // them. ++ // Pop all of the register save area off the stack ++ __ add(sp, sp, frame_size_in_words * wordSize); + -+ // ----------------- -+ // The Grand Shuffle ++ // All of the register save area has been popped of the stack. Only the ++ // return address remains. + -+ // The Java calling convention is either equal (linux) or denser (win64) than the -+ // c calling convention. However the because of the jni_env argument the c calling -+ // convention always has at least one more (and two for static) arguments than Java. -+ // Therefore if we move the args from java -> c backwards then we will never have -+ // a register->register conflict and we don't have to build a dependency graph -+ // and figure out how to break any cycles. ++ // Pop all the frames we must move/replace. + // -+ -+ // Record esp-based slot for receiver on stack for non-static methods -+ int receiver_offset = -1; -+ -+ // This is a trick. We double the stack slots so we can claim -+ // the oops in the caller's frame. Since we are sure to have -+ // more args than the caller doubling is enough to make -+ // sure we can capture all the incoming oop args from the -+ // caller. ++ // Frame picture (youngest to oldest) ++ // 1: self-frame (no frame link) ++ // 2: deopting frame (no frame link) ++ // 3: caller of deopting frame (could be compiled/interpreted). + // -+ OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); -+ assert_cond(map != NULL); ++ // Note: by leaving the return address of self-frame on the stack ++ // and using the size of frame 2 to adjust the stack ++ // when we are done the return to frame 3 will still be on the stack. + -+ int float_args = 0; -+ int int_args = 0; ++ // Pop deoptimized frame ++ __ lwu(x12, Address(x15, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes())); ++ __ sub(x12, x12, 2 * wordSize); ++ __ add(sp, sp, x12); ++ __ ld(fp, Address(sp, 0)); ++ __ ld(ra, Address(sp, wordSize)); ++ __ addi(sp, sp, 2 * wordSize); ++ // RA should now be the return address to the caller (3) + +#ifdef ASSERT -+ bool reg_destroyed[RegisterImpl::number_of_registers]; -+ bool freg_destroyed[FloatRegisterImpl::number_of_registers]; -+ for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { -+ reg_destroyed[r] = false; -+ } -+ for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { -+ freg_destroyed[f] = false; -+ } ++ // Compilers generate code that bang the stack by as much as the ++ // interpreter would need. So this stack banging should never ++ // trigger a fault. Verify that it does not on non product builds. ++ __ lwu(x9, Address(x15, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); ++ __ bang_stack_size(x9, x12); ++#endif ++ // Load address of array of frame pcs into x12 ++ __ ld(x12, Address(x15, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); + -+#endif /* ASSERT */ ++ // Load address of array of frame sizes into x14 ++ __ ld(x14, Address(x15, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes())); + -+ // For JNI natives the incoming and outgoing registers are offset upwards. -+ GrowableArray arg_order(2 * total_in_args); -+ VMRegPair tmp_vmreg; -+ tmp_vmreg.set2(x9->as_VMReg()); ++ // Load counter into x13 ++ __ lwu(x13, Address(x15, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes())); + -+ for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { -+ arg_order.push(i); -+ arg_order.push(c_arg); -+ } ++ // Now adjust the caller's stack to make up for the extra locals ++ // but record the original sp so that we can save it in the skeletal interpreter ++ // frame and the stack walking of interpreter_sender will get the unextended sp ++ // value and not the "real" sp value. + -+ int temploc = -1; -+ for (int ai = 0; ai < arg_order.length(); ai += 2) { -+ int i = arg_order.at(ai); -+ int c_arg = arg_order.at(ai + 1); -+ __ block_comment(err_msg("mv %d -> %d", i, c_arg)); -+ assert(c_arg != -1 && i != -1, "wrong order"); -+#ifdef ASSERT -+ if (in_regs[i].first()->is_Register()) { -+ assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); -+ } else if (in_regs[i].first()->is_FloatRegister()) { -+ assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!"); -+ } -+ if (out_regs[c_arg].first()->is_Register()) { -+ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; -+ } else if (out_regs[c_arg].first()->is_FloatRegister()) { -+ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; -+ } -+#endif /* ASSERT */ -+ switch (in_sig_bt[i]) { -+ case T_ARRAY: -+ case T_OBJECT: -+ object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], -+ ((i == 0) && (!is_static)), -+ &receiver_offset); -+ int_args++; -+ break; -+ case T_VOID: -+ break; ++ const Register sender_sp = x16; + -+ case T_FLOAT: -+ float_move(masm, in_regs[i], out_regs[c_arg]); -+ float_args++; -+ break; ++ __ mv(sender_sp, sp); ++ __ lwu(x9, Address(x15, ++ Deoptimization::UnrollBlock:: ++ caller_adjustment_offset_in_bytes())); ++ __ sub(sp, sp, x9); + -+ case T_DOUBLE: -+ assert( i + 1 < total_in_args && -+ in_sig_bt[i + 1] == T_VOID && -+ out_sig_bt[c_arg + 1] == T_VOID, "bad arg list"); -+ double_move(masm, in_regs[i], out_regs[c_arg]); -+ float_args++; -+ break; ++ // Push interpreter frames in a loop ++ __ li(t0, 0xDEADDEAD); // Make a recognizable pattern ++ __ mv(t1, t0); ++ Label loop; ++ __ bind(loop); ++ __ ld(x9, Address(x14, 0)); // Load frame size ++ __ addi(x14, x14, wordSize); ++ __ sub(x9, x9, 2 * wordSize); // We'll push pc and fp by hand ++ __ ld(ra, Address(x12, 0)); // Load pc ++ __ addi(x12, x12, wordSize); ++ __ enter(); // Save old & set new fp ++ __ sub(sp, sp, x9); // Prolog ++ // This value is corrected by layout_activation_impl ++ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable ++ __ mv(sender_sp, sp); // Pass sender_sp to next frame ++ __ addi(x13, x13, -1); // Decrement counter ++ __ bnez(x13, loop); + -+ case T_LONG : -+ long_move(masm, in_regs[i], out_regs[c_arg]); -+ int_args++; -+ break; ++ // Re-push self-frame ++ __ ld(ra, Address(x12)); ++ __ enter(); + -+ case T_ADDRESS: -+ assert(false, "found T_ADDRESS in java args"); -+ break; ++ // Allocate a full sized register save area. We subtract 2 because ++ // enter() just pushed 2 words ++ __ sub(sp, sp, (frame_size_in_words - 2) * wordSize); + -+ default: -+ move32_64(masm, in_regs[i], out_regs[c_arg]); -+ int_args++; -+ } -+ } ++ // Restore frame locals after moving the frame ++ __ fsd(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10))); ++ __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10))); + -+ // point c_arg at the first arg that is already loaded in case we -+ // need to spill before we call out -+ int c_arg = total_c_args - total_in_args; ++ // Call C code. Need thread but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. Call should ++ // restore return values to their stack-slots with the new SP. ++ // ++ // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode) + -+ // Pre-load a static method's oop into c_rarg1. -+ if (method->is_static()) { ++ // Use fp because the frames look interpreted now ++ // Don't need the precise return PC here, just precise enough to point into this code blob. ++ address the_pc = __ pc(); ++ __ set_last_Java_frame(sp, fp, the_pc, t0); + -+ // load oop into a register -+ __ movoop(c_rarg1, -+ JNIHandles::make_local(method->method_holder()->java_mirror()), -+ /*immediate*/true); ++ __ mv(c_rarg0, xthread); ++ __ mv(c_rarg1, xcpool); // second arg: exec_mode ++ offset = 0; ++ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset); ++ __ jalr(x1, t0, offset); + -+ // Now handlize the static class mirror it's known not-null. -+ __ sd(c_rarg1, Address(sp, klass_offset)); -+ map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); ++ // Set an oopmap for the call site ++ // Use the same PC we used for the last java frame ++ oop_maps->add_gc_map(the_pc - start, ++ new OopMap(frame_size_in_words, 0)); + -+ // Now get the handle -+ __ la(c_rarg1, Address(sp, klass_offset)); -+ // and protect the arg if we must spill -+ c_arg--; -+ } ++ // Clear fp AND pc ++ __ reset_last_Java_frame(true); + -+ // Change state to native (we save the return address in the thread, since it might not -+ // be pushed on the stack when we do a stack traversal). -+ // We use the same pc/oopMap repeatedly when we call out ++ // Collect return values ++ __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10))); ++ __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10))); + -+ Label native_return; -+ __ set_last_Java_frame(sp, noreg, native_return, t0); ++ // Pop self-frame. ++ __ leave(); // Epilog + -+ Label dtrace_method_entry, dtrace_method_entry_done; -+ { -+ int32_t offset = 0; -+ __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset); -+ __ lbu(t0, Address(t0, offset)); -+ __ addw(t0, t0, zr); -+ __ bnez(t0, dtrace_method_entry); -+ __ bind(dtrace_method_entry_done); -+ } ++ // Jump to interpreter ++ __ ret(); + -+ // RedefineClasses() tracing support for obsolete method entry -+ if (log_is_enabled(Trace, redefine, class, obsolete)) { -+ // protect the args we've loaded -+ save_args(masm, total_c_args, c_arg, out_regs); -+ __ mov_metadata(c_rarg1, method()); -+ __ call_VM_leaf( -+ CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), -+ xthread, c_rarg1); -+ restore_args(masm, total_c_args, c_arg, out_regs); -+ } ++ // Make sure all code is generated ++ masm->flush(); + -+ // Lock a synchronized method ++ _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); ++ assert(_deopt_blob != NULL, "create deoptimization blob fail!"); ++ _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); ++} + -+ // Register definitions used by locking and unlocking ++uint SharedRuntime::out_preserve_stack_slots() { ++ return 0; ++} + -+ const Register swap_reg = x10; -+ const Register obj_reg = x9; // Will contain the oop -+ const Register lock_reg = x30; // Address of compiler lock object (BasicLock) -+ const Register old_hdr = x30; // value of old header at unlock time -+ const Register tmp = ra; ++#ifdef COMPILER2 ++//------------------------------generate_uncommon_trap_blob-------------------- ++void SharedRuntime::generate_uncommon_trap_blob() { ++ // Allocate space for the code ++ ResourceMark rm; ++ // Setup code generation tools ++ CodeBuffer buffer("uncommon_trap_blob", 2048, 1024); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ assert_cond(masm != NULL); + -+ Label slow_path_lock; -+ Label lock_done; ++ assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); + -+ if (method->is_synchronized()) { ++ address start = __ pc(); + -+ const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); ++ // Push self-frame. We get here with a return address in RA ++ // and sp should be 16 byte aligned ++ // push fp and retaddr by hand ++ __ addi(sp, sp, -2 * wordSize); ++ __ sd(ra, Address(sp, wordSize)); ++ __ sd(fp, Address(sp, 0)); ++ // we don't expect an arg reg save area ++#ifndef PRODUCT ++ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); ++#endif ++ // compiler left unloaded_class_index in j_rarg0 move to where the ++ // runtime expects it. ++ __ addiw(c_rarg1, j_rarg0, 0); + -+ // Get the handle (the 2nd argument) -+ __ mv(oop_handle_reg, c_rarg1); ++ // we need to set the past SP to the stack pointer of the stub frame ++ // and the pc to the address where this runtime call will return ++ // although actually any pc in this code blob will do). ++ Label retaddr; ++ __ set_last_Java_frame(sp, noreg, retaddr, t0); + -+ // Get address of the box ++ // Call C code. Need thread but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. Call should ++ // capture callee-saved registers as well as return values. ++ // ++ // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index, jint exec_mode) ++ // ++ // n.b. 3 gp args, 0 fp args, integral return type + -+ __ la(lock_reg, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); ++ __ mv(c_rarg0, xthread); ++ __ mvw(c_rarg2, (unsigned)Deoptimization::Unpack_uncommon_trap); ++ int32_t offset = 0; ++ __ la_patchable(t0, ++ RuntimeAddress(CAST_FROM_FN_PTR(address, ++ Deoptimization::uncommon_trap)), offset); ++ __ jalr(x1, t0, offset); ++ __ bind(retaddr); + -+ // Load the oop from the handle -+ __ ld(obj_reg, Address(oop_handle_reg, 0)); ++ // Set an oopmap for the call site ++ OopMapSet* oop_maps = new OopMapSet(); ++ OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0); ++ assert_cond(oop_maps != NULL && map != NULL); + -+ if (!UseHeavyMonitors) { -+ // Load (object->mark() | 1) into swap_reg % x10 -+ __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); -+ __ ori(swap_reg, t0, 1); ++ // location of fp is known implicitly by the frame sender code + -+ // Save (object->mark() | 1) into BasicLock's displaced header -+ __ sd(swap_reg, Address(lock_reg, mark_word_offset)); ++ oop_maps->add_gc_map(__ pc() - start, map); + -+ // src -> dest if dest == x10 else x10 <- dest -+ { -+ Label here; -+ __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL); -+ } ++ __ reset_last_Java_frame(false); + -+ // Test if the oopMark is an obvious stack pointer, i.e., -+ // 1) (mark & 3) == 0, and -+ // 2) sp <= mark < mark + os::pagesize() -+ // These 3 tests can be done by evaluating the following -+ // expression: ((mark - sp) & (3 - os::vm_page_size())), -+ // assuming both stack pointer and pagesize have their -+ // least significant 2 bits clear. -+ // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg -+ -+ __ sub(swap_reg, swap_reg, sp); -+ __ andi(swap_reg, swap_reg, 3 - os::vm_page_size()); -+ -+ // Save the test result, for recursive case, the result is zero -+ __ sd(swap_reg, Address(lock_reg, mark_word_offset)); -+ __ bnez(swap_reg, slow_path_lock); -+ } else { -+ __ j(slow_path_lock); -+ } ++ // move UnrollBlock* into x14 ++ __ mv(x14, x10); + -+ // Slow path will re-enter here -+ __ bind(lock_done); ++#ifdef ASSERT ++ { Label L; ++ __ lwu(t0, Address(x14, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); ++ __ mvw(t1, Deoptimization::Unpack_uncommon_trap); ++ __ beq(t0, t1, L); ++ __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared"); ++ __ bind(L); + } ++#endif + ++ // Pop all the frames we must move/replace. ++ // ++ // Frame picture (youngest to oldest) ++ // 1: self-frame (no frame link) ++ // 2: deopting frame (no frame link) ++ // 3: caller of deopting frame (could be compiled/interpreted). + -+ // Finally just about ready to make the JNI call ++ __ add(sp, sp, (SimpleRuntimeFrame::framesize) << LogBytesPerInt); // Epilog! + -+ // get JNIEnv* which is first argument to native -+ __ la(c_rarg0, Address(xthread, in_bytes(JavaThread::jni_environment_offset()))); ++ // Pop deoptimized frame (int) ++ __ lwu(x12, Address(x14, ++ Deoptimization::UnrollBlock:: ++ size_of_deoptimized_frame_offset_in_bytes())); ++ __ sub(x12, x12, 2 * wordSize); ++ __ add(sp, sp, x12); ++ __ ld(fp, sp, 0); ++ __ ld(ra, sp, wordSize); ++ __ addi(sp, sp, 2 * wordSize); ++ // RA should now be the return address to the caller (3) frame + -+ // Now set thread in native -+ __ la(t1, Address(xthread, JavaThread::thread_state_offset())); -+ __ mv(t0, _thread_in_native); -+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); -+ __ sw(t0, Address(t1)); ++#ifdef ASSERT ++ // Compilers generate code that bang the stack by as much as the ++ // interpreter would need. So this stack banging should never ++ // trigger a fault. Verify that it does not on non product builds. ++ __ lwu(x11, Address(x14, ++ Deoptimization::UnrollBlock:: ++ total_frame_sizes_offset_in_bytes())); ++ __ bang_stack_size(x11, x12); ++#endif + -+ rt_call(masm, native_func); ++ // Load address of array of frame pcs into x12 (address*) ++ __ ld(x12, Address(x14, ++ Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); + -+ __ bind(native_return); ++ // Load address of array of frame sizes into x15 (intptr_t*) ++ __ ld(x15, Address(x14, ++ Deoptimization::UnrollBlock:: ++ frame_sizes_offset_in_bytes())); + -+ intptr_t return_pc = (intptr_t) __ pc(); -+ oop_maps->add_gc_map(return_pc - start, map); ++ // Counter ++ __ lwu(x13, Address(x14, ++ Deoptimization::UnrollBlock:: ++ number_of_frames_offset_in_bytes())); // (int) + -+ // Unpack native results. -+ if (ret_type != T_OBJECT && ret_type != T_ARRAY) { -+ __ cast_primitive_type(ret_type, x10); -+ } ++ // Now adjust the caller's stack to make up for the extra locals but ++ // record the original sp so that we can save it in the skeletal ++ // interpreter frame and the stack walking of interpreter_sender ++ // will get the unextended sp value and not the "real" sp value. + -+ Label safepoint_in_progress, safepoint_in_progress_done; -+ Label after_transition; ++ const Register sender_sp = t1; // temporary register + -+ // Switch thread to "native transition" state before reading the synchronization state. -+ // This additional state is necessary because reading and testing the synchronization -+ // state is not atomic w.r.t. GC, as this scenario demonstrates: -+ // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. -+ // VM thread changes sync state to synchronizing and suspends threads for GC. -+ // Thread A is resumed to finish this native method, but doesn't block here since it -+ // didn't see any synchronization is progress, and escapes. -+ __ mv(t0, _thread_in_native_trans); ++ __ lwu(x11, Address(x14, ++ Deoptimization::UnrollBlock:: ++ caller_adjustment_offset_in_bytes())); // (int) ++ __ mv(sender_sp, sp); ++ __ sub(sp, sp, x11); + -+ __ sw(t0, Address(xthread, JavaThread::thread_state_offset())); ++ // Push interpreter frames in a loop ++ Label loop; ++ __ bind(loop); ++ __ ld(x11, Address(x15, 0)); // Load frame size ++ __ sub(x11, x11, 2 * wordSize); // We'll push pc and fp by hand ++ __ ld(ra, Address(x12, 0)); // Save return address ++ __ enter(); // and old fp & set new fp ++ __ sub(sp, sp, x11); // Prolog ++ __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable ++ // This value is corrected by layout_activation_impl ++ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ __ mv(sender_sp, sp); // Pass sender_sp to next frame ++ __ add(x15, x15, wordSize); // Bump array pointer (sizes) ++ __ add(x12, x12, wordSize); // Bump array pointer (pcs) ++ __ subw(x13, x13, 1); // Decrement counter ++ __ bgtz(x13, loop); ++ __ ld(ra, Address(x12, 0)); // save final return address ++ // Re-push self-frame ++ __ enter(); // & old fp & set new fp + -+ // Force this write out before the read below -+ __ membar(MacroAssembler::AnyAny); ++ // Use fp because the frames look interpreted now ++ // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. ++ // Don't need the precise return PC here, just precise enough to point into this code blob. ++ address the_pc = __ pc(); ++ __ set_last_Java_frame(sp, fp, the_pc, t0); + -+ // check for safepoint operation in progress and/or pending suspend requests -+ { -+ // We need an acquire here to ensure that any subsequent load of the -+ // global SafepointSynchronize::_state flag is ordered after this load -+ // of the thread-local polling word. We don't want this poll to -+ // return false (i.e. not safepointing) and a later poll of the global -+ // SafepointSynchronize::_state spuriously to return true. -+ // This is to avoid a race when we're in a native->Java transition -+ // racing the code which wakes up from a safepoint. -+ -+ __ safepoint_poll(safepoint_in_progress, true /* at_return */, true /* acquire */, false /* in_nmethod */); -+ __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset())); -+ __ bnez(t0, safepoint_in_progress); -+ __ bind(safepoint_in_progress_done); -+ } ++ // Call C code. Need thread but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. Call should ++ // restore return values to their stack-slots with the new SP. ++ // ++ // BasicType unpack_frames(JavaThread* thread, int exec_mode) ++ // + -+ // change thread state -+ __ la(t1, Address(xthread, JavaThread::thread_state_offset())); -+ __ mv(t0, _thread_in_Java); -+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); -+ __ sw(t0, Address(t1)); -+ __ bind(after_transition); -+ -+ Label reguard; -+ Label reguard_done; -+ __ lbu(t0, Address(xthread, JavaThread::stack_guard_state_offset())); -+ __ mv(t1, StackOverflow::stack_guard_yellow_reserved_disabled); -+ __ beq(t0, t1, reguard); -+ __ bind(reguard_done); ++ // n.b. 2 gp args, 0 fp args, integral return type + -+ // native result if any is live ++ // sp should already be aligned ++ __ mv(c_rarg0, xthread); ++ __ mvw(c_rarg1, (unsigned)Deoptimization::Unpack_uncommon_trap); ++ offset = 0; ++ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset); ++ __ jalr(x1, t0, offset); + -+ // Unlock -+ Label unlock_done; -+ Label slow_path_unlock; -+ if (method->is_synchronized()) { ++ // Set an oopmap for the call site ++ // Use the same PC we used for the last java frame ++ oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); + -+ // Get locked oop from the handle we passed to jni -+ __ ld(obj_reg, Address(oop_handle_reg, 0)); ++ // Clear fp AND pc ++ __ reset_last_Java_frame(true); + -+ Label done; ++ // Pop self-frame. ++ __ leave(); // Epilog + -+ if (!UseHeavyMonitors) { -+ // Simple recursive lock? -+ __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); -+ __ beqz(t0, done); -+ } ++ // Jump to interpreter ++ __ ret(); + ++ // Make sure all code is generated ++ masm->flush(); + -+ // Must save x10 if if it is live now because cmpxchg must use it -+ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { -+ save_native_result(masm, ret_type, stack_slots); -+ } ++ _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, ++ SimpleRuntimeFrame::framesize >> 1); ++} ++#endif // COMPILER2 + -+ if (!UseHeavyMonitors) { -+ // get address of the stack lock -+ __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); -+ // get old displaced header -+ __ ld(old_hdr, Address(x10, 0)); ++//------------------------------generate_handler_blob------ ++// ++// Generate a special Compile2Runtime blob that saves all registers, ++// and setup oopmap. ++// ++SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { ++ ResourceMark rm; ++ OopMapSet *oop_maps = new OopMapSet(); ++ assert_cond(oop_maps != NULL); ++ OopMap* map = NULL; + -+ // Atomic swap old header if oop still contains the stack lock -+ Label succeed; -+ __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock); -+ __ bind(succeed); -+ } else { -+ __ j(slow_path_unlock); -+ } ++ // Allocate space for the code. Setup code generation tools. ++ CodeBuffer buffer("handler_blob", 2048, 1024); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ assert_cond(masm != NULL); + -+ // slow path re-enters here -+ __ bind(unlock_done); -+ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { -+ restore_native_result(masm, ret_type, stack_slots); -+ } ++ address start = __ pc(); ++ address call_pc = NULL; ++ int frame_size_in_words = -1; ++ bool cause_return = (poll_type == POLL_AT_RETURN); ++ RegisterSaver reg_saver; + -+ __ bind(done); -+ } ++ // Save Integer and Float registers. ++ map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); + -+ Label dtrace_method_exit, dtrace_method_exit_done; -+ { -+ int32_t offset = 0; -+ __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset); -+ __ lbu(t0, Address(t0, offset)); -+ __ bnez(t0, dtrace_method_exit); -+ __ bind(dtrace_method_exit_done); -+ } ++ // The following is basically a call_VM. However, we need the precise ++ // address of the call in order to generate an oopmap. Hence, we do all the ++ // work outselves. + -+ __ reset_last_Java_frame(false); ++ Label retaddr; ++ __ set_last_Java_frame(sp, noreg, retaddr, t0); + -+ // Unbox oop result, e.g. JNIHandles::resolve result. -+ if (is_reference_type(ret_type)) { -+ __ resolve_jobject(x10, xthread, t1); -+ } ++ // The return address must always be correct so that frame constructor never ++ // sees an invalid pc. + -+ if (CheckJNICalls) { -+ // clear_pending_jni_exception_check -+ __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset())); ++ if (!cause_return) { ++ // overwrite the return address pushed by save_live_registers ++ // Additionally, x18 is a callee-saved register so we can look at ++ // it later to determine if someone changed the return address for ++ // us! ++ __ ld(x18, Address(xthread, JavaThread::saved_exception_pc_offset())); ++ __ sd(x18, Address(fp, frame::return_addr_offset * wordSize)); + } + -+ // reset handle block -+ __ ld(x12, Address(xthread, JavaThread::active_handles_offset())); -+ __ sd(zr, Address(x12, JNIHandleBlock::top_offset_in_bytes())); -+ -+ __ leave(); ++ // Do the call ++ __ mv(c_rarg0, xthread); ++ int32_t offset = 0; ++ __ la_patchable(t0, RuntimeAddress(call_ptr), offset); ++ __ jalr(x1, t0, offset); ++ __ bind(retaddr); + -+ // Any exception pending? -+ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); -+ __ bnez(t0, exception_pending); ++ // Set an oopmap for the call site. This oopmap will map all ++ // oop-registers and debug-info registers as callee-saved. This ++ // will allow deoptimization at this safepoint to find all possible ++ // debug-info recordings, as well as let GC find all oops. + -+ // We're done -+ __ ret(); ++ oop_maps->add_gc_map( __ pc() - start, map); + -+ // Unexpected paths are out of line and go here ++ Label noException; + -+ // forward the exception -+ __ bind(exception_pending); ++ __ reset_last_Java_frame(false); + -+ // and forward the exception -+ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); ++ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + -+ // Slow path locking & unlocking -+ if (method->is_synchronized()) { ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ beqz(t0, noException); + -+ __ block_comment("Slow path lock {"); -+ __ bind(slow_path_lock); ++ // Exception pending + -+ // has last_Java_frame setup. No exceptions so do vanilla call not call_VM -+ // args are (oop obj, BasicLock* lock, JavaThread* thread) ++ reg_saver.restore_live_registers(masm); + -+ // protect the args we've loaded -+ save_args(masm, total_c_args, c_arg, out_regs); ++ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + -+ __ mv(c_rarg0, obj_reg); -+ __ mv(c_rarg1, lock_reg); -+ __ mv(c_rarg2, xthread); ++ // No exception case ++ __ bind(noException); + -+ // Not a leaf but we have last_Java_frame setup as we want -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3); -+ restore_args(masm, total_c_args, c_arg, out_regs); ++ Label no_adjust, bail; ++ if (SafepointMechanism::uses_thread_local_poll() && !cause_return) { ++ // If our stashed return pc was modified by the runtime we avoid touching it ++ __ ld(t0, Address(fp, frame::return_addr_offset * wordSize)); ++ __ bne(x18, t0, no_adjust); + +#ifdef ASSERT -+ { Label L; -+ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); -+ __ beqz(t0, L); -+ __ stop("no pending exception allowed on exit from monitorenter"); -+ __ bind(L); -+ } ++ // Verify the correct encoding of the poll we're about to skip. ++ // See NativeInstruction::is_lwu_to_zr() ++ __ lwu(t0, Address(x18)); ++ __ andi(t1, t0, 0b0000011); ++ __ mv(t2, 0b0000011); ++ __ bne(t1, t2, bail); // 0-6:0b0000011 ++ __ srli(t1, t0, 7); ++ __ andi(t1, t1, 0b00000); ++ __ bnez(t1, bail); // 7-11:0b00000 ++ __ srli(t1, t0, 12); ++ __ andi(t1, t1, 0b110); ++ __ mv(t2, 0b110); ++ __ bne(t1, t2, bail); // 12-14:0b110 +#endif -+ __ j(lock_done); ++ // Adjust return pc forward to step over the safepoint poll instruction ++ __ add(x18, x18, NativeInstruction::instruction_size); ++ __ sd(x18, Address(fp, frame::return_addr_offset * wordSize)); ++ } + -+ __ block_comment("} Slow path lock"); ++ __ bind(no_adjust); ++ // Normal exit, restore registers and exit. + -+ __ block_comment("Slow path unlock {"); -+ __ bind(slow_path_unlock); ++ reg_saver.restore_live_registers(masm); ++ __ ret(); + -+ if (ret_type == T_FLOAT || ret_type == T_DOUBLE) { -+ save_native_result(masm, ret_type, stack_slots); -+ } ++#ifdef ASSERT ++ __ bind(bail); ++ __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected"); ++#endif + -+ __ mv(c_rarg2, xthread); -+ __ la(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); -+ __ mv(c_rarg0, obj_reg); ++ // Make sure all code is generated ++ masm->flush(); + -+ // Save pending exception around call to VM (which contains an EXCEPTION_MARK) -+ // NOTE that obj_reg == x9 currently -+ __ ld(x9, Address(xthread, in_bytes(Thread::pending_exception_offset()))); -+ __ sd(zr, Address(xthread, in_bytes(Thread::pending_exception_offset()))); ++ // Fill-out other meta info ++ return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); ++} + -+ rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)); ++// ++// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss ++// ++// Generate a stub that calls into vm to find out the proper destination ++// of a java call. All the argument registers are live at this point ++// but since this is generic code we don't know what they are and the caller ++// must do any gc of the args. ++// ++RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { ++ assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); + -+#ifdef ASSERT -+ { -+ Label L; -+ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); -+ __ beqz(t0, L); -+ __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); -+ __ bind(L); -+ } -+#endif /* ASSERT */ ++ // allocate space for the code ++ ResourceMark rm; + -+ __ sd(x9, Address(xthread, in_bytes(Thread::pending_exception_offset()))); ++ CodeBuffer buffer(name, 1000, 512); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ assert_cond(masm != NULL); + -+ if (ret_type == T_FLOAT || ret_type == T_DOUBLE) { -+ restore_native_result(masm, ret_type, stack_slots); -+ } -+ __ j(unlock_done); ++ int frame_size_in_words = -1; ++ RegisterSaver reg_saver; + -+ __ block_comment("} Slow path unlock"); ++ OopMapSet *oop_maps = new OopMapSet(); ++ assert_cond(oop_maps != NULL); ++ OopMap* map = NULL; + -+ } // synchronized ++ int start = __ offset(); + -+ // SLOW PATH Reguard the stack if needed ++ map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); + -+ __ bind(reguard); -+ save_native_result(masm, ret_type, stack_slots); -+ rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); -+ restore_native_result(masm, ret_type, stack_slots); -+ // and continue -+ __ j(reguard_done); ++ int frame_complete = __ offset(); + -+ // SLOW PATH safepoint + { -+ __ block_comment("safepoint {"); -+ __ bind(safepoint_in_progress); ++ Label retaddr; ++ __ set_last_Java_frame(sp, noreg, retaddr, t0); + -+ // Don't use call_VM as it will see a possible pending exception and forward it -+ // and never return here preventing us from clearing _last_native_pc down below. -+ // -+ save_native_result(masm, ret_type, stack_slots); + __ mv(c_rarg0, xthread); -+#ifndef PRODUCT -+ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); -+#endif + int32_t offset = 0; -+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), offset); ++ __ la_patchable(t0, RuntimeAddress(destination), offset); + __ jalr(x1, t0, offset); ++ __ bind(retaddr); ++ } + -+ // Restore any method result value -+ restore_native_result(masm, ret_type, stack_slots); ++ // Set an oopmap for the call site. ++ // We need this not only for callee-saved registers, but also for volatile ++ // registers that the compiler might be keeping live across a safepoint. + -+ __ j(safepoint_in_progress_done); -+ __ block_comment("} safepoint"); -+ } ++ oop_maps->add_gc_map( __ offset() - start, map); + -+ // SLOW PATH dtrace support -+ { -+ __ block_comment("dtrace entry {"); -+ __ bind(dtrace_method_entry); ++ // x10 contains the address we are going to jump to assuming no exception got installed + -+ // We have all of the arguments setup at this point. We must not touch any register -+ // argument registers at this point (what if we save/restore them there are no oop? ++ // clear last_Java_sp ++ __ reset_last_Java_frame(false); ++ // check for pending exceptions ++ Label pending; ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ bnez(t0, pending); + -+ save_args(masm, total_c_args, c_arg, out_regs); -+ __ mov_metadata(c_rarg1, method()); -+ __ call_VM_leaf( -+ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), -+ xthread, c_rarg1); -+ restore_args(masm, total_c_args, c_arg, out_regs); -+ __ j(dtrace_method_entry_done); -+ __ block_comment("} dtrace entry"); -+ } ++ // get the returned Method* ++ __ get_vm_result_2(xmethod, xthread); ++ __ sd(xmethod, Address(sp, reg_saver.reg_offset_in_bytes(xmethod))); + -+ { -+ __ block_comment("dtrace exit {"); -+ __ bind(dtrace_method_exit); -+ save_native_result(masm, ret_type, stack_slots); -+ __ mov_metadata(c_rarg1, method()); -+ __ call_VM_leaf( -+ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), -+ xthread, c_rarg1); -+ restore_native_result(masm, ret_type, stack_slots); -+ __ j(dtrace_method_exit_done); -+ __ block_comment("} dtrace exit"); -+ } ++ // x10 is where we want to jump, overwrite t0 which is saved and temporary ++ __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(t0))); ++ reg_saver.restore_live_registers(masm); + -+ __ flush(); ++ // We are back the the original state on entry and ready to go. + -+ nmethod *nm = nmethod::new_native_nmethod(method, -+ compile_id, -+ masm->code(), -+ vep_offset, -+ frame_complete, -+ stack_slots / VMRegImpl::slots_per_word, -+ (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), -+ in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), -+ oop_maps); -+ assert(nm != NULL, "create native nmethod fail!"); -+ return nm; -+} ++ __ jr(t0); + -+// this function returns the adjust size (in number of words) to a c2i adapter -+// activation for use during deoptimization -+int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { -+ assert(callee_locals >= callee_parameters, -+ "test and remove; got more parms than locals"); -+ if (callee_locals < callee_parameters) { -+ return 0; // No adjustment for negative locals -+ } -+ int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords; -+ // diff is counted in stack words -+ return align_up(diff, 2); ++ // Pending exception after the safepoint ++ ++ __ bind(pending); ++ ++ reg_saver.restore_live_registers(masm); ++ ++ // exception pending => remove activation and forward to exception handler ++ ++ __ sd(zr, Address(xthread, JavaThread::vm_result_offset())); ++ ++ __ ld(x10, Address(xthread, Thread::pending_exception_offset())); ++ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); ++ ++ // ------------- ++ // make sure all code is generated ++ masm->flush(); ++ ++ // return the blob ++ return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true); +} + -+//------------------------------generate_deopt_blob---------------------------- -+void SharedRuntime::generate_deopt_blob() { ++#ifdef COMPILER2 ++//------------------------------generate_exception_blob--------------------------- ++// creates exception blob at the end ++// Using exception blob, this code is jumped from a compiled method. ++// (see emit_exception_handler in riscv.ad file) ++// ++// Given an exception pc at a call we call into the runtime for the ++// handler in this method. This handler might merely restore state ++// (i.e. callee save registers) unwind the frame and jump to the ++// exception handler for the nmethod if there is no Java level handler ++// for the nmethod. ++// ++// This code is entered with a jmp. ++// ++// Arguments: ++// x10: exception oop ++// x13: exception pc ++// ++// Results: ++// x10: exception oop ++// x13: exception pc in caller ++// destination: exception handler of caller ++// ++// Note: the exception pc MUST be at a call (precise debug information) ++// Registers x10, x13, x12, x14, x15, t0 are not callee saved. ++// ++ ++void OptoRuntime::generate_exception_blob() { ++ assert(!OptoRuntime::is_callee_saved_register(R13_num), ""); ++ assert(!OptoRuntime::is_callee_saved_register(R10_num), ""); ++ assert(!OptoRuntime::is_callee_saved_register(R12_num), ""); ++ ++ assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); ++ + // Allocate space for the code + ResourceMark rm; + // Setup code generation tools -+ int pad = 0; -+ CodeBuffer buffer("deopt_blob", 2048 + pad, 1024); ++ CodeBuffer buffer("exception_blob", 2048, 1024); + MacroAssembler* masm = new MacroAssembler(&buffer); -+ int frame_size_in_words = -1; -+ OopMap* map = NULL; -+ OopMapSet *oop_maps = new OopMapSet(); -+ assert_cond(masm != NULL && oop_maps != NULL); -+ RegisterSaver reg_saver(COMPILER2_OR_JVMCI != 0); ++ assert_cond(masm != NULL); + -+ // ------------- -+ // This code enters when returning to a de-optimized nmethod. A return -+ // address has been pushed on the the stack, and return values are in -+ // registers. -+ // If we are doing a normal deopt then we were called from the patched -+ // nmethod from the point we returned to the nmethod. So the return -+ // address on the stack is wrong by NativeCall::instruction_size -+ // We will adjust the value so it looks like we have the original return -+ // address on the stack (like when we eagerly deoptimized). -+ // In the case of an exception pending when deoptimizing, we enter -+ // with a return address on the stack that points after the call we patched -+ // into the exception handler. We have the following register state from, -+ // e.g., the forward exception stub (see stubGenerator_riscv.cpp). -+ // x10: exception oop -+ // x9: exception handler -+ // x13: throwing pc -+ // So in this case we simply jam x13 into the useless return address and -+ // the stack looks just like we want. ++ // TODO check various assumptions made here + // -+ // At this point we need to de-opt. We save the argument return -+ // registers. We call the first C routine, fetch_unroll_info(). This -+ // routine captures the return values and returns a structure which -+ // describes the current frame size and the sizes of all replacement frames. -+ // The current frame is compiled code and may contain many inlined -+ // functions, each with their own JVM state. We pop the current frame, then -+ // push all the new frames. Then we call the C routine unpack_frames() to -+ // populate these frames. Finally unpack_frames() returns us the new target -+ // address. Notice that callee-save registers are BLOWN here; they have -+ // already been captured in the vframeArray at the time the return PC was -+ // patched. ++ // make sure we do so before running this ++ + address start = __ pc(); -+ Label cont; + -+ // Prolog for non exception case! ++ // push fp and retaddr by hand ++ // Exception pc is 'return address' for stack walker ++ __ addi(sp, sp, -2 * wordSize); ++ __ sd(ra, Address(sp, wordSize)); ++ __ sd(fp, Address(sp)); ++ // there are no callee save registers and we don't expect an ++ // arg reg save area ++#ifndef PRODUCT ++ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); ++#endif ++ // Store exception in Thread object. We cannot pass any arguments to the ++ // handle_exception call, since we do not want to make any assumption ++ // about the size of the frame where the exception happened in. ++ __ sd(x10, Address(xthread, JavaThread::exception_oop_offset())); ++ __ sd(x13, Address(xthread, JavaThread::exception_pc_offset())); + -+ // Save everything in sight. -+ map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); ++ // This call does all the hard work. It checks if an exception handler ++ // exists in the method. ++ // If so, it returns the handler address. ++ // If not, it prepares for stack-unwinding, restoring the callee-save ++ // registers of the frame being removed. ++ // ++ // address OptoRuntime::handle_exception_C(JavaThread* thread) ++ // ++ // n.b. 1 gp arg, 0 fp args, integral return type + -+ // Normal deoptimization. Save exec mode for unpack_frames. -+ __ mvw(xcpool, Deoptimization::Unpack_deopt); // callee-saved -+ __ j(cont); ++ // the stack should always be aligned ++ address the_pc = __ pc(); ++ __ set_last_Java_frame(sp, noreg, the_pc, t0); ++ __ mv(c_rarg0, xthread); ++ int32_t offset = 0; ++ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)), offset); ++ __ jalr(x1, t0, offset); + -+ int reexecute_offset = __ pc() - start; + -+ // Reexecute case -+ // return address is the pc describes what bci to do re-execute at ++ // handle_exception_C is a special VM call which does not require an explicit ++ // instruction sync afterwards. + -+ // No need to update map as each call to save_live_registers will produce identical oopmap -+ (void) reg_saver.save_live_registers(masm, 0, &frame_size_in_words); ++ // Set an oopmap for the call site. This oopmap will only be used if we ++ // are unwinding the stack. Hence, all locations will be dead. ++ // Callee-saved registers will be the same as the frame above (i.e., ++ // handle_exception_stub), since they were restored when we got the ++ // exception. + -+ __ mvw(xcpool, Deoptimization::Unpack_reexecute); // callee-saved -+ __ j(cont); ++ OopMapSet* oop_maps = new OopMapSet(); ++ assert_cond(oop_maps != NULL); + -+ int exception_offset = __ pc() - start; ++ oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); + -+ // Prolog for exception case ++ __ reset_last_Java_frame(false); + -+ // all registers are dead at this entry point, except for x10, and -+ // x13 which contain the exception oop and exception pc -+ // respectively. Set them in TLS and fall thru to the -+ // unpack_with_exception_in_tls entry point. ++ // Restore callee-saved registers + -+ __ sd(x13, Address(xthread, JavaThread::exception_pc_offset())); -+ __ sd(x10, Address(xthread, JavaThread::exception_oop_offset())); ++ // fp is an implicitly saved callee saved register (i.e. the calling ++ // convention will save restore it in prolog/epilog) Other than that ++ // there are no callee save registers now that adapter frames are gone. ++ // and we dont' expect an arg reg save area ++ __ ld(fp, Address(sp)); ++ __ ld(x13, Address(sp, wordSize)); ++ __ addi(sp, sp , 2 * wordSize); + -+ int exception_in_tls_offset = __ pc() - start; -+ -+ // new implementation because exception oop is now passed in JavaThread ++ // x10: exception handler + -+ // Prolog for exception case -+ // All registers must be preserved because they might be used by LinearScan -+ // Exceptiop oop and throwing PC are passed in JavaThread -+ // tos: stack at point of call to method that threw the exception (i.e. only -+ // args are on the stack, no return address) ++ // We have a handler in x10 (could be deopt blob). ++ __ mv(t0, x10); + -+ // The return address pushed by save_live_registers will be patched -+ // later with the throwing pc. The correct value is not available -+ // now because loading it from memory would destroy registers. ++ // Get the exception oop ++ __ ld(x10, Address(xthread, JavaThread::exception_oop_offset())); ++ // Get the exception pc in case we are deoptimized ++ __ ld(x14, Address(xthread, JavaThread::exception_pc_offset())); ++#ifdef ASSERT ++ __ sd(zr, Address(xthread, JavaThread::exception_handler_pc_offset())); ++ __ sd(zr, Address(xthread, JavaThread::exception_pc_offset())); ++#endif ++ // Clear the exception oop so GC no longer processes it as a root. ++ __ sd(zr, Address(xthread, JavaThread::exception_oop_offset())); + -+ // NB: The SP at this point must be the SP of the method that is -+ // being deoptimized. Deoptimization assumes that the frame created -+ // here by save_live_registers is immediately below the method's SP. -+ // This is a somewhat fragile mechanism. ++ // x10: exception oop ++ // t0: exception handler ++ // x14: exception pc ++ // Jump to handler + -+ // Save everything in sight. -+ map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); ++ __ jr(t0); + -+ // Now it is safe to overwrite any register ++ // Make sure all code is generated ++ masm->flush(); + -+ // Deopt during an exception. Save exec mode for unpack_frames. -+ __ li(xcpool, Deoptimization::Unpack_exception); // callee-saved ++ // Set exception blob ++ _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); ++} ++#endif // COMPILER2 +diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +new file mode 100644 +index 0000000000..9970229c5c +--- /dev/null ++++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +@@ -0,0 +1,3743 @@ ++/* ++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+ // load throwing pc from JavaThread and patch it as the return address -+ // of the current frame. Then clear the field in JavaThread ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "compiler/oopMap.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/universe.hpp" ++#include "nativeInst_riscv.hpp" ++#include "oops/instanceOop.hpp" ++#include "oops/method.hpp" ++#include "oops/objArrayKlass.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++#include "utilities/align.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++#if INCLUDE_ZGC ++#include "gc/z/zThreadLocalData.hpp" ++#endif + -+ __ ld(x13, Address(xthread, JavaThread::exception_pc_offset())); -+ __ sd(x13, Address(fp, frame::return_addr_offset * wordSize)); -+ __ sd(zr, Address(xthread, JavaThread::exception_pc_offset())); ++// Declaration and definition of StubGenerator (no .hpp file). ++// For a more detailed description of the stub routine structure ++// see the comment in stubRoutines.hpp + -+#ifdef ASSERT -+ // verify that there is really an exception oop in JavaThread -+ __ ld(x10, Address(xthread, JavaThread::exception_oop_offset())); -+ __ verify_oop(x10); ++#undef __ ++#define __ _masm-> + -+ // verify that there is no pending exception -+ Label no_pending_exception; -+ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); -+ __ beqz(t0, no_pending_exception); -+ __ stop("must not have pending exception here"); -+ __ bind(no_pending_exception); ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) +#endif + -+ __ bind(cont); ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + -+ // Call C code. Need thread and this frame, but NOT official VM entry -+ // crud. We cannot block on this call, no GC can happen. -+ // -+ // UnrollBlock* fetch_unroll_info(JavaThread* thread) ++// Stub Code definitions + -+ // fetch_unroll_info needs to call last_java_frame(). ++class StubGenerator: public StubCodeGenerator { ++ private: + -+ Label retaddr; -+ __ set_last_Java_frame(sp, noreg, retaddr, t0); -+#ifdef ASSERT -+ { -+ Label L; -+ __ ld(t0, Address(xthread, -+ JavaThread::last_Java_fp_offset())); -+ __ beqz(t0, L); -+ __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared"); -+ __ bind(L); ++#ifdef PRODUCT ++#define inc_counter_np(counter) ((void)0) ++#else ++ void inc_counter_np_(int& counter) { ++ __ la(t1, ExternalAddress((address)&counter)); ++ __ lwu(t0, Address(t1, 0)); ++ __ addiw(t0, t0, 1); ++ __ sw(t0, Address(t1, 0)); + } -+#endif // ASSERT -+ __ mv(c_rarg0, xthread); -+ __ mv(c_rarg1, xcpool); -+ int32_t offset = 0; -+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)), offset); -+ __ jalr(x1, t0, offset); -+ __ bind(retaddr); ++#define inc_counter_np(counter) \ ++ BLOCK_COMMENT("inc_counter " #counter); \ ++ inc_counter_np_(counter); ++#endif + -+ // Need to have an oopmap that tells fetch_unroll_info where to -+ // find any register it might need. -+ oop_maps->add_gc_map(__ pc() - start, map); ++ // Call stubs are used to call Java from C ++ // ++ // Arguments: ++ // c_rarg0: call wrapper address address ++ // c_rarg1: result address ++ // c_rarg2: result type BasicType ++ // c_rarg3: method Method* ++ // c_rarg4: (interpreter) entry point address ++ // c_rarg5: parameters intptr_t* ++ // c_rarg6: parameter size (in words) int ++ // c_rarg7: thread Thread* ++ // ++ // There is no return from the stub itself as any Java result ++ // is written to result ++ // ++ // we save x1 (ra) as the return PC at the base of the frame and ++ // link x8 (fp) below it as the frame pointer installing sp (x2) ++ // into fp. ++ // ++ // we save x10-x17, which accounts for all the c arguments. ++ // ++ // TODO: strictly do we need to save them all? they are treated as ++ // volatile by C so could we omit saving the ones we are going to ++ // place in global registers (thread? method?) or those we only use ++ // during setup of the Java call? ++ // ++ // we don't need to save x5 which C uses as an indirect result location ++ // return register. ++ // ++ // we don't need to save x6-x7 and x28-x31 which both C and Java treat as ++ // volatile ++ // ++ // we save x9, x18-x27, f8-f9, and f18-f27 which Java uses as temporary ++ // registers and C expects to be callee-save ++ // ++ // so the stub frame looks like this when we enter Java code ++ // ++ // [ return_from_Java ] <--- sp ++ // [ argument word n ] ++ // ... ++ // -34 [ argument word 1 ] ++ // -33 [ saved f27 ] <--- sp_after_call ++ // -32 [ saved f26 ] ++ // -31 [ saved f25 ] ++ // -30 [ saved f24 ] ++ // -29 [ saved f23 ] ++ // -28 [ saved f22 ] ++ // -27 [ saved f21 ] ++ // -26 [ saved f20 ] ++ // -25 [ saved f19 ] ++ // -24 [ saved f18 ] ++ // -23 [ saved f9 ] ++ // -22 [ saved f8 ] ++ // -21 [ saved x27 ] ++ // -20 [ saved x26 ] ++ // -19 [ saved x25 ] ++ // -18 [ saved x24 ] ++ // -17 [ saved x23 ] ++ // -16 [ saved x22 ] ++ // -15 [ saved x21 ] ++ // -14 [ saved x20 ] ++ // -13 [ saved x19 ] ++ // -12 [ saved x18 ] ++ // -11 [ saved x9 ] ++ // -10 [ call wrapper (x10) ] ++ // -9 [ result (x11) ] ++ // -8 [ result type (x12) ] ++ // -7 [ method (x13) ] ++ // -6 [ entry point (x14) ] ++ // -5 [ parameters (x15) ] ++ // -4 [ parameter size (x16) ] ++ // -3 [ thread (x17) ] ++ // -2 [ saved fp (x8) ] ++ // -1 [ saved ra (x1) ] ++ // 0 [ ] <--- fp == saved sp (x2) + -+ __ reset_last_Java_frame(false); ++ // Call stub stack layout word offsets from fp ++ enum call_stub_layout { ++ sp_after_call_off = -33, + -+ // Load UnrollBlock* into x15 -+ __ mv(x15, x10); ++ f27_off = -33, ++ f26_off = -32, ++ f25_off = -31, ++ f24_off = -30, ++ f23_off = -29, ++ f22_off = -28, ++ f21_off = -27, ++ f20_off = -26, ++ f19_off = -25, ++ f18_off = -24, ++ f9_off = -23, ++ f8_off = -22, + -+ __ lwu(xcpool, Address(x15, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); -+ Label noException; -+ __ li(t0, Deoptimization::Unpack_exception); -+ __ bne(xcpool, t0, noException); // Was exception pending? -+ __ ld(x10, Address(xthread, JavaThread::exception_oop_offset())); -+ __ ld(x13, Address(xthread, JavaThread::exception_pc_offset())); -+ __ sd(zr, Address(xthread, JavaThread::exception_oop_offset())); -+ __ sd(zr, Address(xthread, JavaThread::exception_pc_offset())); ++ x27_off = -21, ++ x26_off = -20, ++ x25_off = -19, ++ x24_off = -18, ++ x23_off = -17, ++ x22_off = -16, ++ x21_off = -15, ++ x20_off = -14, ++ x19_off = -13, ++ x18_off = -12, ++ x9_off = -11, + -+ __ verify_oop(x10); ++ call_wrapper_off = -10, ++ result_off = -9, ++ result_type_off = -8, ++ method_off = -7, ++ entry_point_off = -6, ++ parameters_off = -5, ++ parameter_size_off = -4, ++ thread_off = -3, ++ fp_f = -2, ++ retaddr_off = -1, ++ }; + -+ // Overwrite the result registers with the exception results. -+ __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10))); ++ address generate_call_stub(address& return_address) { ++ assert((int)frame::entry_frame_after_call_words == -(int)sp_after_call_off + 1 && ++ (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, ++ "adjust this code"); + -+ __ bind(noException); ++ StubCodeMark mark(this, "StubRoutines", "call_stub"); ++ address start = __ pc(); + -+ // Only register save data is on the stack. -+ // Now restore the result registers. Everything else is either dead -+ // or captured in the vframeArray. ++ const Address sp_after_call (fp, sp_after_call_off * wordSize); + -+ // Restore fp result register -+ __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10))); -+ // Restore integer result register -+ __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10))); ++ const Address call_wrapper (fp, call_wrapper_off * wordSize); ++ const Address result (fp, result_off * wordSize); ++ const Address result_type (fp, result_type_off * wordSize); ++ const Address method (fp, method_off * wordSize); ++ const Address entry_point (fp, entry_point_off * wordSize); ++ const Address parameters (fp, parameters_off * wordSize); ++ const Address parameter_size(fp, parameter_size_off * wordSize); + -+ // Pop all of the register save area off the stack -+ __ add(sp, sp, frame_size_in_words * wordSize); ++ const Address thread (fp, thread_off * wordSize); + -+ // All of the register save area has been popped of the stack. Only the -+ // return address remains. ++ const Address f27_save (fp, f27_off * wordSize); ++ const Address f26_save (fp, f26_off * wordSize); ++ const Address f25_save (fp, f25_off * wordSize); ++ const Address f24_save (fp, f24_off * wordSize); ++ const Address f23_save (fp, f23_off * wordSize); ++ const Address f22_save (fp, f22_off * wordSize); ++ const Address f21_save (fp, f21_off * wordSize); ++ const Address f20_save (fp, f20_off * wordSize); ++ const Address f19_save (fp, f19_off * wordSize); ++ const Address f18_save (fp, f18_off * wordSize); ++ const Address f9_save (fp, f9_off * wordSize); ++ const Address f8_save (fp, f8_off * wordSize); + -+ // Pop all the frames we must move/replace. -+ // -+ // Frame picture (youngest to oldest) -+ // 1: self-frame (no frame link) -+ // 2: deopting frame (no frame link) -+ // 3: caller of deopting frame (could be compiled/interpreted). -+ // -+ // Note: by leaving the return address of self-frame on the stack -+ // and using the size of frame 2 to adjust the stack -+ // when we are done the return to frame 3 will still be on the stack. ++ const Address x27_save (fp, x27_off * wordSize); ++ const Address x26_save (fp, x26_off * wordSize); ++ const Address x25_save (fp, x25_off * wordSize); ++ const Address x24_save (fp, x24_off * wordSize); ++ const Address x23_save (fp, x23_off * wordSize); ++ const Address x22_save (fp, x22_off * wordSize); ++ const Address x21_save (fp, x21_off * wordSize); ++ const Address x20_save (fp, x20_off * wordSize); ++ const Address x19_save (fp, x19_off * wordSize); ++ const Address x18_save (fp, x18_off * wordSize); + -+ // Pop deoptimized frame -+ __ lwu(x12, Address(x15, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes())); -+ __ sub(x12, x12, 2 * wordSize); -+ __ add(sp, sp, x12); -+ __ ld(fp, Address(sp, 0)); -+ __ ld(ra, Address(sp, wordSize)); -+ __ addi(sp, sp, 2 * wordSize); -+ // RA should now be the return address to the caller (3) ++ const Address x9_save (fp, x9_off * wordSize); + -+#ifdef ASSERT -+ // Compilers generate code that bang the stack by as much as the -+ // interpreter would need. So this stack banging should never -+ // trigger a fault. Verify that it does not on non product builds. -+ __ lwu(x9, Address(x15, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); -+ __ bang_stack_size(x9, x12); -+#endif -+ // Load address of array of frame pcs into x12 -+ __ ld(x12, Address(x15, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); ++ // stub code + -+ // Load address of array of frame sizes into x14 -+ __ ld(x14, Address(x15, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes())); ++ address riscv_entry = __ pc(); + -+ // Load counter into x13 -+ __ lwu(x13, Address(x15, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes())); ++ // set up frame and move sp to end of save area ++ __ enter(); ++ __ addi(sp, fp, sp_after_call_off * wordSize); + -+ // Now adjust the caller's stack to make up for the extra locals -+ // but record the original sp so that we can save it in the skeletal interpreter -+ // frame and the stack walking of interpreter_sender will get the unextended sp -+ // value and not the "real" sp value. ++ // save register parameters and Java temporary/global registers ++ // n.b. we save thread even though it gets installed in ++ // xthread because we want to sanity check tp later ++ __ sd(c_rarg7, thread); ++ __ sw(c_rarg6, parameter_size); ++ __ sd(c_rarg5, parameters); ++ __ sd(c_rarg4, entry_point); ++ __ sd(c_rarg3, method); ++ __ sd(c_rarg2, result_type); ++ __ sd(c_rarg1, result); ++ __ sd(c_rarg0, call_wrapper); + -+ const Register sender_sp = x16; ++ __ sd(x9, x9_save); + -+ __ mv(sender_sp, sp); -+ __ lwu(x9, Address(x15, -+ Deoptimization::UnrollBlock:: -+ caller_adjustment_offset_in_bytes())); -+ __ sub(sp, sp, x9); ++ __ sd(x18, x18_save); ++ __ sd(x19, x19_save); ++ __ sd(x20, x20_save); ++ __ sd(x21, x21_save); ++ __ sd(x22, x22_save); ++ __ sd(x23, x23_save); ++ __ sd(x24, x24_save); ++ __ sd(x25, x25_save); ++ __ sd(x26, x26_save); ++ __ sd(x27, x27_save); + -+ // Push interpreter frames in a loop -+ __ li(t0, 0xDEADDEAD); // Make a recognizable pattern -+ __ mv(t1, t0); -+ Label loop; -+ __ bind(loop); -+ __ ld(x9, Address(x14, 0)); // Load frame size -+ __ addi(x14, x14, wordSize); -+ __ sub(x9, x9, 2 * wordSize); // We'll push pc and fp by hand -+ __ ld(ra, Address(x12, 0)); // Load pc -+ __ addi(x12, x12, wordSize); -+ __ enter(); // Save old & set new fp -+ __ sub(sp, sp, x9); // Prolog -+ // This value is corrected by layout_activation_impl -+ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); -+ __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable -+ __ mv(sender_sp, sp); // Pass sender_sp to next frame -+ __ addi(x13, x13, -1); // Decrement counter -+ __ bnez(x13, loop); ++ __ fsd(f8, f8_save); ++ __ fsd(f9, f9_save); ++ __ fsd(f18, f18_save); ++ __ fsd(f19, f19_save); ++ __ fsd(f20, f20_save); ++ __ fsd(f21, f21_save); ++ __ fsd(f22, f22_save); ++ __ fsd(f23, f23_save); ++ __ fsd(f24, f24_save); ++ __ fsd(f25, f25_save); ++ __ fsd(f26, f26_save); ++ __ fsd(f27, f27_save); + -+ // Re-push self-frame -+ __ ld(ra, Address(x12)); -+ __ enter(); ++ // install Java thread in global register now we have saved ++ // whatever value it held ++ __ mv(xthread, c_rarg7); + -+ // Allocate a full sized register save area. We subtract 2 because -+ // enter() just pushed 2 words -+ __ sub(sp, sp, (frame_size_in_words - 2) * wordSize); ++ // And method ++ __ mv(xmethod, c_rarg3); + -+ // Restore frame locals after moving the frame -+ __ fsd(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10))); -+ __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10))); ++ // set up the heapbase register ++ __ reinit_heapbase(); + -+ // Call C code. Need thread but NOT official VM entry -+ // crud. We cannot block on this call, no GC can happen. Call should -+ // restore return values to their stack-slots with the new SP. -+ // -+ // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode) ++#ifdef ASSERT ++ // make sure we have no pending exceptions ++ { ++ Label L; ++ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); ++ __ beqz(t0, L); ++ __ stop("StubRoutines::call_stub: entered with pending exception"); ++ __ BIND(L); ++ } ++#endif ++ // pass parameters if any ++ __ mv(esp, sp); ++ __ slli(t0, c_rarg6, LogBytesPerWord); ++ __ sub(t0, sp, t0); // Move SP out of the way ++ __ andi(sp, t0, -2 * wordSize); + -+ // Use fp because the frames look interpreted now -+ // Don't need the precise return PC here, just precise enough to point into this code blob. -+ address the_pc = __ pc(); -+ __ set_last_Java_frame(sp, fp, the_pc, t0); ++ BLOCK_COMMENT("pass parameters if any"); ++ Label parameters_done; ++ // parameter count is still in c_rarg6 ++ // and parameter pointer identifying param 1 is in c_rarg5 ++ __ beqz(c_rarg6, parameters_done); + -+ __ mv(c_rarg0, xthread); -+ __ mv(c_rarg1, xcpool); // second arg: exec_mode -+ offset = 0; -+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset); -+ __ jalr(x1, t0, offset); ++ address loop = __ pc(); ++ __ ld(t0, c_rarg5, 0); ++ __ addi(c_rarg5, c_rarg5, wordSize); ++ __ addi(c_rarg6, c_rarg6, -1); ++ __ push_reg(t0); ++ __ bgtz(c_rarg6, loop); + -+ // Set an oopmap for the call site -+ // Use the same PC we used for the last java frame -+ oop_maps->add_gc_map(the_pc - start, -+ new OopMap(frame_size_in_words, 0)); ++ __ BIND(parameters_done); + -+ // Clear fp AND pc -+ __ reset_last_Java_frame(true); ++ // call Java entry -- passing methdoOop, and current sp ++ // xmethod: Method* ++ // x30: sender sp ++ BLOCK_COMMENT("call Java function"); ++ __ mv(x30, sp); ++ __ jalr(c_rarg4); + -+ // Collect return values -+ __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10))); -+ __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10))); ++ // save current address for use by exception handling code + -+ // Pop self-frame. -+ __ leave(); // Epilog -+ -+ // Jump to interpreter -+ __ ret(); -+ -+ // Make sure all code is generated -+ masm->flush(); -+ -+ _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); -+ assert(_deopt_blob != NULL, "create deoptimization blob fail!"); -+ _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); -+} -+ -+// Number of stack slots between incoming argument block and the start of -+// a new frame. The PROLOG must add this many slots to the stack. The -+// EPILOG must remove this many slots. -+// RISCV needs two words for RA (return address) and FP (frame pointer). -+uint SharedRuntime::in_preserve_stack_slots() { -+ return 2 * VMRegImpl::slots_per_word; -+} ++ return_address = __ pc(); + -+uint SharedRuntime::out_preserve_stack_slots() { -+ return 0; -+} ++ // store result depending on type (everything that is not ++ // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) ++ // n.b. this assumes Java returns an integral result in x10 ++ // and a floating result in j_farg0 ++ __ ld(j_rarg2, result); ++ Label is_long, is_float, is_double, exit; ++ __ ld(j_rarg1, result_type); ++ __ li(t0, (u1)T_OBJECT); ++ __ beq(j_rarg1, t0, is_long); ++ __ li(t0, (u1)T_LONG); ++ __ beq(j_rarg1, t0, is_long); ++ __ li(t0, (u1)T_FLOAT); ++ __ beq(j_rarg1, t0, is_float); ++ __ li(t0, (u1)T_DOUBLE); ++ __ beq(j_rarg1, t0, is_double); + -+#ifdef COMPILER2 -+//------------------------------generate_uncommon_trap_blob-------------------- -+void SharedRuntime::generate_uncommon_trap_blob() { -+ // Allocate space for the code -+ ResourceMark rm; -+ // Setup code generation tools -+ CodeBuffer buffer("uncommon_trap_blob", 2048, 1024); -+ MacroAssembler* masm = new MacroAssembler(&buffer); -+ assert_cond(masm != NULL); ++ // handle T_INT case ++ __ sw(x10, Address(j_rarg2)); + -+ assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); ++ __ BIND(exit); + -+ address start = __ pc(); ++ // pop parameters ++ __ addi(esp, fp, sp_after_call_off * wordSize); + -+ // Push self-frame. We get here with a return address in RA -+ // and sp should be 16 byte aligned -+ // push fp and retaddr by hand -+ __ addi(sp, sp, -2 * wordSize); -+ __ sd(ra, Address(sp, wordSize)); -+ __ sd(fp, Address(sp, 0)); -+ // we don't expect an arg reg save area -+#ifndef PRODUCT -+ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); ++#ifdef ASSERT ++ // verify that threads correspond ++ { ++ Label L, S; ++ __ ld(t0, thread); ++ __ bne(xthread, t0, S); ++ __ get_thread(t0); ++ __ beq(xthread, t0, L); ++ __ BIND(S); ++ __ stop("StubRoutines::call_stub: threads must correspond"); ++ __ BIND(L); ++ } +#endif -+ // compiler left unloaded_class_index in j_rarg0 move to where the -+ // runtime expects it. -+ __ addiw(c_rarg1, j_rarg0, 0); + -+ // we need to set the past SP to the stack pointer of the stub frame -+ // and the pc to the address where this runtime call will return -+ // although actually any pc in this code blob will do). -+ Label retaddr; -+ __ set_last_Java_frame(sp, noreg, retaddr, t0); ++ // restore callee-save registers ++ __ fld(f27, f27_save); ++ __ fld(f26, f26_save); ++ __ fld(f25, f25_save); ++ __ fld(f24, f24_save); ++ __ fld(f23, f23_save); ++ __ fld(f22, f22_save); ++ __ fld(f21, f21_save); ++ __ fld(f20, f20_save); ++ __ fld(f19, f19_save); ++ __ fld(f18, f18_save); ++ __ fld(f9, f9_save); ++ __ fld(f8, f8_save); + -+ // Call C code. Need thread but NOT official VM entry -+ // crud. We cannot block on this call, no GC can happen. Call should -+ // capture callee-saved registers as well as return values. -+ // -+ // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index, jint exec_mode) -+ // -+ // n.b. 3 gp args, 0 fp args, integral return type ++ __ ld(x27, x27_save); ++ __ ld(x26, x26_save); ++ __ ld(x25, x25_save); ++ __ ld(x24, x24_save); ++ __ ld(x23, x23_save); ++ __ ld(x22, x22_save); ++ __ ld(x21, x21_save); ++ __ ld(x20, x20_save); ++ __ ld(x19, x19_save); ++ __ ld(x18, x18_save); + -+ __ mv(c_rarg0, xthread); -+ __ mvw(c_rarg2, (unsigned)Deoptimization::Unpack_uncommon_trap); -+ int32_t offset = 0; -+ __ la_patchable(t0, -+ RuntimeAddress(CAST_FROM_FN_PTR(address, -+ Deoptimization::uncommon_trap)), offset); -+ __ jalr(x1, t0, offset); -+ __ bind(retaddr); ++ __ ld(x9, x9_save); + -+ // Set an oopmap for the call site -+ OopMapSet* oop_maps = new OopMapSet(); -+ OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0); -+ assert_cond(oop_maps != NULL && map != NULL); ++ __ ld(c_rarg0, call_wrapper); ++ __ ld(c_rarg1, result); ++ __ ld(c_rarg2, result_type); ++ __ ld(c_rarg3, method); ++ __ ld(c_rarg4, entry_point); ++ __ ld(c_rarg5, parameters); ++ __ ld(c_rarg6, parameter_size); ++ __ ld(c_rarg7, thread); + -+ // location of fp is known implicitly by the frame sender code ++ // leave frame and return to caller ++ __ leave(); ++ __ ret(); + -+ oop_maps->add_gc_map(__ pc() - start, map); ++ // handle return types different from T_INT + -+ __ reset_last_Java_frame(false); ++ __ BIND(is_long); ++ __ sd(x10, Address(j_rarg2, 0)); ++ __ j(exit); + -+ // move UnrollBlock* into x14 -+ __ mv(x14, x10); ++ __ BIND(is_float); ++ __ fsw(j_farg0, Address(j_rarg2, 0), t0); ++ __ j(exit); + -+#ifdef ASSERT -+ { Label L; -+ __ lwu(t0, Address(x14, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); -+ __ mvw(t1, Deoptimization::Unpack_uncommon_trap); -+ __ beq(t0, t1, L); -+ __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared"); -+ __ bind(L); ++ __ BIND(is_double); ++ __ fsd(j_farg0, Address(j_rarg2, 0), t0); ++ __ j(exit); ++ ++ return start; + } -+#endif + -+ // Pop all the frames we must move/replace. ++ // Return point for a Java call if there's an exception thrown in ++ // Java code. The exception is caught and transformed into a ++ // pending exception stored in JavaThread that can be tested from ++ // within the VM. + // -+ // Frame picture (youngest to oldest) -+ // 1: self-frame (no frame link) -+ // 2: deopting frame (no frame link) -+ // 3: caller of deopting frame (could be compiled/interpreted). ++ // Note: Usually the parameters are removed by the callee. In case ++ // of an exception crossing an activation frame boundary, that is ++ // not the case if the callee is compiled code => need to setup the ++ // sp. ++ // ++ // x10: exception oop + -+ __ add(sp, sp, (SimpleRuntimeFrame::framesize) << LogBytesPerInt); // Epilog! ++ address generate_catch_exception() { ++ StubCodeMark mark(this, "StubRoutines", "catch_exception"); ++ address start = __ pc(); + -+ // Pop deoptimized frame (int) -+ __ lwu(x12, Address(x14, -+ Deoptimization::UnrollBlock:: -+ size_of_deoptimized_frame_offset_in_bytes())); -+ __ sub(x12, x12, 2 * wordSize); -+ __ add(sp, sp, x12); -+ __ ld(fp, sp, 0); -+ __ ld(ra, sp, wordSize); -+ __ addi(sp, sp, 2 * wordSize); -+ // RA should now be the return address to the caller (3) frame ++ // same as in generate_call_stub(): ++ const Address thread(fp, thread_off * wordSize); + +#ifdef ASSERT -+ // Compilers generate code that bang the stack by as much as the -+ // interpreter would need. So this stack banging should never -+ // trigger a fault. Verify that it does not on non product builds. -+ __ lwu(x11, Address(x14, -+ Deoptimization::UnrollBlock:: -+ total_frame_sizes_offset_in_bytes())); -+ __ bang_stack_size(x11, x12); ++ // verify that threads correspond ++ { ++ Label L, S; ++ __ ld(t0, thread); ++ __ bne(xthread, t0, S); ++ __ get_thread(t0); ++ __ beq(xthread, t0, L); ++ __ bind(S); ++ __ stop("StubRoutines::catch_exception: threads must correspond"); ++ __ bind(L); ++ } +#endif + -+ // Load address of array of frame pcs into x12 (address*) -+ __ ld(x12, Address(x14, -+ Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); -+ -+ // Load address of array of frame sizes into x15 (intptr_t*) -+ __ ld(x15, Address(x14, -+ Deoptimization::UnrollBlock:: -+ frame_sizes_offset_in_bytes())); -+ -+ // Counter -+ __ lwu(x13, Address(x14, -+ Deoptimization::UnrollBlock:: -+ number_of_frames_offset_in_bytes())); // (int) -+ -+ // Now adjust the caller's stack to make up for the extra locals but -+ // record the original sp so that we can save it in the skeletal -+ // interpreter frame and the stack walking of interpreter_sender -+ // will get the unextended sp value and not the "real" sp value. -+ -+ const Register sender_sp = t1; // temporary register ++ // set pending exception ++ __ verify_oop(x10); + -+ __ lwu(x11, Address(x14, -+ Deoptimization::UnrollBlock:: -+ caller_adjustment_offset_in_bytes())); // (int) -+ __ mv(sender_sp, sp); -+ __ sub(sp, sp, x11); ++ __ sd(x10, Address(xthread, Thread::pending_exception_offset())); ++ __ mv(t0, (address)__FILE__); ++ __ sd(t0, Address(xthread, Thread::exception_file_offset())); ++ __ mv(t0, (int)__LINE__); ++ __ sw(t0, Address(xthread, Thread::exception_line_offset())); + -+ // Push interpreter frames in a loop -+ Label loop; -+ __ bind(loop); -+ __ ld(x11, Address(x15, 0)); // Load frame size -+ __ sub(x11, x11, 2 * wordSize); // We'll push pc and fp by hand -+ __ ld(ra, Address(x12, 0)); // Save return address -+ __ enter(); // and old fp & set new fp -+ __ sub(sp, sp, x11); // Prolog -+ __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable -+ // This value is corrected by layout_activation_impl -+ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); -+ __ mv(sender_sp, sp); // Pass sender_sp to next frame -+ __ add(x15, x15, wordSize); // Bump array pointer (sizes) -+ __ add(x12, x12, wordSize); // Bump array pointer (pcs) -+ __ subw(x13, x13, 1); // Decrement counter -+ __ bgtz(x13, loop); -+ __ ld(ra, Address(x12, 0)); // save final return address -+ // Re-push self-frame -+ __ enter(); // & old fp & set new fp ++ // complete return to VM ++ assert(StubRoutines::_call_stub_return_address != NULL, ++ "_call_stub_return_address must have been generated before"); ++ __ j(StubRoutines::_call_stub_return_address); + -+ // Use fp because the frames look interpreted now -+ // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. -+ // Don't need the precise return PC here, just precise enough to point into this code blob. -+ address the_pc = __ pc(); -+ __ set_last_Java_frame(sp, fp, the_pc, t0); ++ return start; ++ } + -+ // Call C code. Need thread but NOT official VM entry -+ // crud. We cannot block on this call, no GC can happen. Call should -+ // restore return values to their stack-slots with the new SP. ++ // Continuation point for runtime calls returning with a pending ++ // exception. The pending exception check happened in the runtime ++ // or native call stub. The pending exception in Thread is ++ // converted into a Java-level exception. + // -+ // BasicType unpack_frames(JavaThread* thread, int exec_mode) ++ // Contract with Java-level exception handlers: ++ // x10: exception ++ // x13: throwing pc + // ++ // NOTE: At entry of this stub, exception-pc must be in RA !! + -+ // n.b. 2 gp args, 0 fp args, integral return type -+ -+ // sp should already be aligned -+ __ mv(c_rarg0, xthread); -+ __ mvw(c_rarg1, (unsigned)Deoptimization::Unpack_uncommon_trap); -+ offset = 0; -+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset); -+ __ jalr(x1, t0, offset); ++ // NOTE: this is always used as a jump target within generated code ++ // so it just needs to be generated code with no x86 prolog + -+ // Set an oopmap for the call site -+ // Use the same PC we used for the last java frame -+ oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); ++ address generate_forward_exception() { ++ StubCodeMark mark(this, "StubRoutines", "forward exception"); ++ address start = __ pc(); + -+ // Clear fp AND pc -+ __ reset_last_Java_frame(true); ++ // Upon entry, RA points to the return address returning into ++ // Java (interpreted or compiled) code; i.e., the return address ++ // becomes the throwing pc. ++ // ++ // Arguments pushed before the runtime call are still on the stack ++ // but the exception handler will reset the stack pointer -> ++ // ignore them. A potential result in registers can be ignored as ++ // well. + -+ // Pop self-frame. -+ __ leave(); // Epilog ++#ifdef ASSERT ++ // make sure this code is only executed if there is a pending exception ++ { ++ Label L; ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ bnez(t0, L); ++ __ stop("StubRoutines::forward exception: no pending exception (1)"); ++ __ bind(L); ++ } ++#endif + -+ // Jump to interpreter -+ __ ret(); ++ // compute exception handler into x9 + -+ // Make sure all code is generated -+ masm->flush(); ++ // call the VM to find the handler address associated with the ++ // caller address. pass thread in x10 and caller pc (ret address) ++ // in x11. n.b. the caller pc is in ra, unlike x86 where it is on ++ // the stack. ++ __ mv(c_rarg1, ra); ++ // ra will be trashed by the VM call so we move it to x9 ++ // (callee-saved) because we also need to pass it to the handler ++ // returned by this call. ++ __ mv(x9, ra); ++ BLOCK_COMMENT("call exception_handler_for_return_address"); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ++ SharedRuntime::exception_handler_for_return_address), ++ xthread, c_rarg1); ++ // we should not really care that ra is no longer the callee ++ // address. we saved the value the handler needs in x9 so we can ++ // just copy it to x13. however, the C2 handler will push its own ++ // frame and then calls into the VM and the VM code asserts that ++ // the PC for the frame above the handler belongs to a compiled ++ // Java method. So, we restore ra here to satisfy that assert. ++ __ mv(ra, x9); ++ // setup x10 & x13 & clear pending exception ++ __ mv(x13, x9); ++ __ mv(x9, x10); ++ __ ld(x10, Address(xthread, Thread::pending_exception_offset())); ++ __ sd(zr, Address(xthread, Thread::pending_exception_offset())); + -+ _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, -+ SimpleRuntimeFrame::framesize >> 1); -+} -+#endif // COMPILER2 ++#ifdef ASSERT ++ // make sure exception is set ++ { ++ Label L; ++ __ bnez(x10, L); ++ __ stop("StubRoutines::forward exception: no pending exception (2)"); ++ __ bind(L); ++ } ++#endif + -+//------------------------------generate_handler_blob------ -+// -+// Generate a special Compile2Runtime blob that saves all registers, -+// and setup oopmap. -+// -+SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { -+ ResourceMark rm; -+ OopMapSet *oop_maps = new OopMapSet(); -+ assert_cond(oop_maps != NULL); -+ OopMap* map = NULL; ++ // continue at exception handler ++ // x10: exception ++ // x13: throwing pc ++ // x9: exception handler ++ __ verify_oop(x10); ++ __ jr(x9); + -+ // Allocate space for the code. Setup code generation tools. -+ CodeBuffer buffer("handler_blob", 2048, 1024); -+ MacroAssembler* masm = new MacroAssembler(&buffer); -+ assert_cond(masm != NULL); ++ return start; ++ } + -+ address start = __ pc(); -+ address call_pc = NULL; -+ int frame_size_in_words = -1; -+ bool cause_return = (poll_type == POLL_AT_RETURN); -+ RegisterSaver reg_saver(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */); ++ // Non-destructive plausibility checks for oops ++ // ++ // Arguments: ++ // x10: oop to verify ++ // t0: error message ++ // ++ // Stack after saving c_rarg3: ++ // [tos + 0]: saved c_rarg3 ++ // [tos + 1]: saved c_rarg2 ++ // [tos + 2]: saved ra ++ // [tos + 3]: saved t1 ++ // [tos + 4]: saved x10 ++ // [tos + 5]: saved t0 ++ address generate_verify_oop() { + -+ // Save Integer and Float registers. -+ map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); ++ StubCodeMark mark(this, "StubRoutines", "verify_oop"); ++ address start = __ pc(); + -+ // The following is basically a call_VM. However, we need the precise -+ // address of the call in order to generate an oopmap. Hence, we do all the -+ // work outselves. ++ Label exit, error; + -+ Label retaddr; -+ __ set_last_Java_frame(sp, noreg, retaddr, t0); ++ __ push_reg(0x3000, sp); // save c_rarg2 and c_rarg3 + -+ // The return address must always be correct so that frame constructor never -+ // sees an invalid pc. ++ __ la(c_rarg2, ExternalAddress((address) StubRoutines::verify_oop_count_addr())); ++ __ ld(c_rarg3, Address(c_rarg2)); ++ __ add(c_rarg3, c_rarg3, 1); ++ __ sd(c_rarg3, Address(c_rarg2)); + -+ if (!cause_return) { -+ // overwrite the return address pushed by save_live_registers -+ // Additionally, x18 is a callee-saved register so we can look at -+ // it later to determine if someone changed the return address for -+ // us! -+ __ ld(x18, Address(xthread, JavaThread::saved_exception_pc_offset())); -+ __ sd(x18, Address(fp, frame::return_addr_offset * wordSize)); -+ } ++ // object is in x10 ++ // make sure object is 'reasonable' ++ __ beqz(x10, exit); // if obj is NULL it is OK + -+ // Do the call -+ __ mv(c_rarg0, xthread); -+ int32_t offset = 0; -+ __ la_patchable(t0, RuntimeAddress(call_ptr), offset); -+ __ jalr(x1, t0, offset); -+ __ bind(retaddr); ++ // Check if the oop is in the right area of memory ++ __ mv(c_rarg3, (intptr_t) Universe::verify_oop_mask()); ++ __ andr(c_rarg2, x10, c_rarg3); ++ __ mv(c_rarg3, (intptr_t) Universe::verify_oop_bits()); + -+ // Set an oopmap for the call site. This oopmap will map all -+ // oop-registers and debug-info registers as callee-saved. This -+ // will allow deoptimization at this safepoint to find all possible -+ // debug-info recordings, as well as let GC find all oops. ++ // Compare c_rarg2 and c_rarg3. ++ __ bne(c_rarg2, c_rarg3, error); + -+ oop_maps->add_gc_map( __ pc() - start, map); ++ // make sure klass is 'reasonable', which is not zero. ++ __ load_klass(x10, x10); // get klass ++ __ beqz(x10, error); // if klass is NULL it is broken + -+ Label noException; ++ // return if everything seems ok ++ __ bind(exit); + -+ __ reset_last_Java_frame(false); ++ __ pop_reg(0x3000, sp); // pop c_rarg2 and c_rarg3 ++ __ ret(); + -+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ // handle errors ++ __ bind(error); ++ __ pop_reg(0x3000, sp); // pop c_rarg2 and c_rarg3 + -+ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); -+ __ beqz(t0, noException); ++ __ pusha(); ++ // debug(char* msg, int64_t pc, int64_t regs[]) ++ __ mv(c_rarg0, t0); // pass address of error message ++ __ mv(c_rarg1, ra); // pass return address ++ __ mv(c_rarg2, sp); // pass address of regs on stack ++#ifndef PRODUCT ++ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); ++#endif ++ BLOCK_COMMENT("call MacroAssembler::debug"); ++ int32_t offset = 0; ++ __ movptr_with_offset(t0, CAST_FROM_FN_PTR(address, MacroAssembler::debug64), offset); ++ __ jalr(x1, t0, offset); ++ __ ebreak(); + -+ // Exception pending ++ return start; ++ } + -+ reg_saver.restore_live_registers(masm); ++ // The inner part of zero_words(). ++ // ++ // Inputs: ++ // x28: the HeapWord-aligned base address of an array to zero. ++ // x29: the count in HeapWords, x29 > 0. ++ // ++ // Returns x28 and x29, adjusted for the caller to clear. ++ // x28: the base address of the tail of words left to clear. ++ // x29: the number of words in the tail. ++ // x29 < MacroAssembler::zero_words_block_size. + -+ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); ++ address generate_zero_blocks() { ++ Label done; + -+ // No exception case -+ __ bind(noException); ++ const Register base = x28, cnt = x29; + -+ Label no_adjust, bail; -+ if (!cause_return) { -+ // If our stashed return pc was modified by the runtime we avoid touching it -+ __ ld(t0, Address(fp, frame::return_addr_offset * wordSize)); -+ __ bne(x18, t0, no_adjust); ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "zero_blocks"); ++ address start = __ pc(); + -+#ifdef ASSERT -+ // Verify the correct encoding of the poll we're about to skip. -+ // See NativeInstruction::is_lwu_to_zr() -+ __ lwu(t0, Address(x18)); -+ __ andi(t1, t0, 0b0000011); -+ __ mv(t2, 0b0000011); -+ __ bne(t1, t2, bail); // 0-6:0b0000011 -+ __ srli(t1, t0, 7); -+ __ andi(t1, t1, 0b00000); -+ __ bnez(t1, bail); // 7-11:0b00000 -+ __ srli(t1, t0, 12); -+ __ andi(t1, t1, 0b110); -+ __ mv(t2, 0b110); -+ __ bne(t1, t2, bail); // 12-14:0b110 -+#endif -+ // Adjust return pc forward to step over the safepoint poll instruction -+ __ add(x18, x18, NativeInstruction::instruction_size); -+ __ sd(x18, Address(fp, frame::return_addr_offset * wordSize)); -+ } ++ { ++ // Clear the remaining blocks. ++ Label loop; ++ __ sub(cnt, cnt, MacroAssembler::zero_words_block_size); ++ __ bltz(cnt, done); ++ __ bind(loop); ++ for (int i = 0; i < MacroAssembler::zero_words_block_size; i++) { ++ __ sd(zr, Address(base, 0)); ++ __ add(base, base, 8); ++ } ++ __ sub(cnt, cnt, MacroAssembler::zero_words_block_size); ++ __ bgez(cnt, loop); ++ __ bind(done); ++ __ add(cnt, cnt, MacroAssembler::zero_words_block_size); ++ } + -+ __ bind(no_adjust); -+ // Normal exit, restore registers and exit. ++ __ ret(); + -+ reg_saver.restore_live_registers(masm); -+ __ ret(); ++ return start; ++ } + -+#ifdef ASSERT -+ __ bind(bail); -+ __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected"); -+#endif ++ typedef enum { ++ copy_forwards = 1, ++ copy_backwards = -1 ++ } copy_direction; + -+ // Make sure all code is generated -+ masm->flush(); ++ // Bulk copy of blocks of 8 words. ++ // ++ // count is a count of words. ++ // ++ // Precondition: count >= 8 ++ // ++ // Postconditions: ++ // ++ // The least significant bit of count contains the remaining count ++ // of words to copy. The rest of count is trash. ++ // ++ // s and d are adjusted to point to the remaining words to copy ++ // ++ void generate_copy_longs(Label &start, Register s, Register d, Register count, ++ copy_direction direction) { ++ int unit = wordSize * direction; ++ int bias = wordSize; + -+ // Fill-out other meta info -+ return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); -+} ++ const Register tmp_reg0 = x13, tmp_reg1 = x14, tmp_reg2 = x15, tmp_reg3 = x16, ++ tmp_reg4 = x17, tmp_reg5 = x7, tmp_reg6 = x28, tmp_reg7 = x29; + -+// -+// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss -+// -+// Generate a stub that calls into vm to find out the proper destination -+// of a java call. All the argument registers are live at this point -+// but since this is generic code we don't know what they are and the caller -+// must do any gc of the args. -+// -+RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { -+ assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); ++ const Register stride = x30; + -+ // allocate space for the code -+ ResourceMark rm; ++ assert_different_registers(t0, tmp_reg0, tmp_reg1, tmp_reg2, tmp_reg3, ++ tmp_reg4, tmp_reg5, tmp_reg6, tmp_reg7); ++ assert_different_registers(s, d, count, t0); + -+ CodeBuffer buffer(name, 1000, 512); -+ MacroAssembler* masm = new MacroAssembler(&buffer); -+ assert_cond(masm != NULL); ++ Label again, drain; ++ const char* stub_name = NULL; ++ if (direction == copy_forwards) { ++ stub_name = "forward_copy_longs"; ++ } else { ++ stub_name = "backward_copy_longs"; ++ } ++ StubCodeMark mark(this, "StubRoutines", stub_name); ++ __ align(CodeEntryAlignment); ++ __ bind(start); + -+ int frame_size_in_words = -1; -+ RegisterSaver reg_saver(false /* save_vectors */); ++ if (direction == copy_forwards) { ++ __ sub(s, s, bias); ++ __ sub(d, d, bias); ++ } + -+ OopMapSet *oop_maps = new OopMapSet(); -+ assert_cond(oop_maps != NULL); -+ OopMap* map = NULL; ++#ifdef ASSERT ++ // Make sure we are never given < 8 words ++ { ++ Label L; + -+ int start = __ offset(); ++ __ li(t0, 8); ++ __ bge(count, t0, L); ++ __ stop("genrate_copy_longs called with < 8 words"); ++ __ bind(L); ++ } ++#endif + -+ map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); ++ __ ld(tmp_reg0, Address(s, 1 * unit)); ++ __ ld(tmp_reg1, Address(s, 2 * unit)); ++ __ ld(tmp_reg2, Address(s, 3 * unit)); ++ __ ld(tmp_reg3, Address(s, 4 * unit)); ++ __ ld(tmp_reg4, Address(s, 5 * unit)); ++ __ ld(tmp_reg5, Address(s, 6 * unit)); ++ __ ld(tmp_reg6, Address(s, 7 * unit)); ++ __ ld(tmp_reg7, Address(s, 8 * unit)); ++ __ addi(s, s, 8 * unit); + -+ int frame_complete = __ offset(); ++ __ sub(count, count, 16); ++ __ bltz(count, drain); + -+ { -+ Label retaddr; -+ __ set_last_Java_frame(sp, noreg, retaddr, t0); ++ __ bind(again); + -+ __ mv(c_rarg0, xthread); -+ int32_t offset = 0; -+ __ la_patchable(t0, RuntimeAddress(destination), offset); -+ __ jalr(x1, t0, offset); -+ __ bind(retaddr); -+ } ++ __ sd(tmp_reg0, Address(d, 1 * unit)); ++ __ sd(tmp_reg1, Address(d, 2 * unit)); ++ __ sd(tmp_reg2, Address(d, 3 * unit)); ++ __ sd(tmp_reg3, Address(d, 4 * unit)); ++ __ sd(tmp_reg4, Address(d, 5 * unit)); ++ __ sd(tmp_reg5, Address(d, 6 * unit)); ++ __ sd(tmp_reg6, Address(d, 7 * unit)); ++ __ sd(tmp_reg7, Address(d, 8 * unit)); + -+ // Set an oopmap for the call site. -+ // We need this not only for callee-saved registers, but also for volatile -+ // registers that the compiler might be keeping live across a safepoint. ++ __ ld(tmp_reg0, Address(s, 1 * unit)); ++ __ ld(tmp_reg1, Address(s, 2 * unit)); ++ __ ld(tmp_reg2, Address(s, 3 * unit)); ++ __ ld(tmp_reg3, Address(s, 4 * unit)); ++ __ ld(tmp_reg4, Address(s, 5 * unit)); ++ __ ld(tmp_reg5, Address(s, 6 * unit)); ++ __ ld(tmp_reg6, Address(s, 7 * unit)); ++ __ ld(tmp_reg7, Address(s, 8 * unit)); + -+ oop_maps->add_gc_map( __ offset() - start, map); ++ __ addi(s, s, 8 * unit); ++ __ addi(d, d, 8 * unit); + -+ // x10 contains the address we are going to jump to assuming no exception got installed ++ __ sub(count, count, 8); ++ __ bgez(count, again); + -+ // clear last_Java_sp -+ __ reset_last_Java_frame(false); -+ // check for pending exceptions -+ Label pending; -+ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); -+ __ bnez(t0, pending); ++ // Drain ++ __ bind(drain); + -+ // get the returned Method* -+ __ get_vm_result_2(xmethod, xthread); -+ __ sd(xmethod, Address(sp, reg_saver.reg_offset_in_bytes(xmethod))); ++ __ sd(tmp_reg0, Address(d, 1 * unit)); ++ __ sd(tmp_reg1, Address(d, 2 * unit)); ++ __ sd(tmp_reg2, Address(d, 3 * unit)); ++ __ sd(tmp_reg3, Address(d, 4 * unit)); ++ __ sd(tmp_reg4, Address(d, 5 * unit)); ++ __ sd(tmp_reg5, Address(d, 6 * unit)); ++ __ sd(tmp_reg6, Address(d, 7 * unit)); ++ __ sd(tmp_reg7, Address(d, 8 * unit)); ++ __ addi(d, d, 8 * unit); + -+ // x10 is where we want to jump, overwrite t0 which is saved and temporary -+ __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(t0))); -+ reg_saver.restore_live_registers(masm); ++ { ++ Label L1, L2; ++ __ andi(t0, count, 4); ++ __ beqz(t0, L1); + -+ // We are back the the original state on entry and ready to go. ++ __ ld(tmp_reg0, Address(s, 1 * unit)); ++ __ ld(tmp_reg1, Address(s, 2 * unit)); ++ __ ld(tmp_reg2, Address(s, 3 * unit)); ++ __ ld(tmp_reg3, Address(s, 4 * unit)); ++ __ addi(s, s, 4 * unit); + -+ __ jr(t0); ++ __ sd(tmp_reg0, Address(d, 1 * unit)); ++ __ sd(tmp_reg1, Address(d, 2 * unit)); ++ __ sd(tmp_reg2, Address(d, 3 * unit)); ++ __ sd(tmp_reg3, Address(d, 4 * unit)); ++ __ addi(d, d, 4 * unit); + -+ // Pending exception after the safepoint ++ __ bind(L1); + -+ __ bind(pending); ++ if (direction == copy_forwards) { ++ __ addi(s, s, bias); ++ __ addi(d, d, bias); ++ } + -+ reg_saver.restore_live_registers(masm); ++ __ andi(t0, count, 2); ++ __ beqz(t0, L2); ++ if (direction == copy_backwards) { ++ __ addi(s, s, 2 * unit); ++ __ ld(tmp_reg0, Address(s)); ++ __ ld(tmp_reg1, Address(s, wordSize)); ++ __ addi(d, d, 2 * unit); ++ __ sd(tmp_reg0, Address(d)); ++ __ sd(tmp_reg1, Address(d, wordSize)); ++ } else { ++ __ ld(tmp_reg0, Address(s)); ++ __ ld(tmp_reg1, Address(s, wordSize)); ++ __ addi(s, s, 2 * unit); ++ __ sd(tmp_reg0, Address(d)); ++ __ sd(tmp_reg1, Address(d, wordSize)); ++ __ addi(d, d, 2 * unit); ++ } ++ __ bind(L2); ++ } + -+ // exception pending => remove activation and forward to exception handler ++ __ ret(); ++ } + -+ __ sd(zr, Address(xthread, JavaThread::vm_result_offset())); ++ Label copy_f, copy_b; + -+ __ ld(x10, Address(xthread, Thread::pending_exception_offset())); -+ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); ++ // All-singing all-dancing memory copy. ++ // ++ // Copy count units of memory from s to d. The size of a unit is ++ // step, which can be positive or negative depending on the direction ++ // of copy. If is_aligned is false, we align the source address. ++ // ++ /* ++ * if (is_aligned) { ++ * goto copy_8_bytes; ++ * } ++ * bool is_backwards = step < 0; ++ * int granularity = uabs(step); ++ * count = count * granularity; * count bytes ++ * ++ * if (is_backwards) { ++ * s += count; ++ * d += count; ++ * } ++ * ++ * count limit maybe greater than 16, for better performance ++ * if (count < 16) { ++ * goto copy_small; ++ * } ++ * ++ * if ((dst % 8) == (src % 8)) { ++ * aligned; ++ * goto copy8; ++ * } ++ * ++ * copy_small: ++ * load element one by one; ++ * done; ++ */ + -+ // ------------- -+ // make sure all code is generated -+ masm->flush(); ++ typedef void (MacroAssembler::*copy_insn)(Register Rd, const Address &adr, Register temp); + -+ // return the blob -+ return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true); -+} ++ void copy_memory_v(Register s, Register d, Register count, Register tmp, int step) { ++ bool is_backward = step < 0; ++ int granularity = uabs(step); + -+#ifdef COMPILER2 -+RuntimeStub* SharedRuntime::make_native_invoker(address call_target, -+ int shadow_space_bytes, -+ const GrowableArray& input_registers, -+ const GrowableArray& output_registers) { -+ Unimplemented(); -+ return nullptr; -+} ++ const Register src = x30, dst = x31, vl = x14, cnt = x15, tmp1 = x16, tmp2 = x17; ++ assert_different_registers(s, d, cnt, vl, tmp, tmp1, tmp2); ++ Assembler::SEW sew = Assembler::elembytes_to_sew(granularity); ++ Label loop_forward, loop_backward, done; + -+//------------------------------generate_exception_blob--------------------------- -+// creates exception blob at the end -+// Using exception blob, this code is jumped from a compiled method. -+// (see emit_exception_handler in riscv.ad file) -+// -+// Given an exception pc at a call we call into the runtime for the -+// handler in this method. This handler might merely restore state -+// (i.e. callee save registers) unwind the frame and jump to the -+// exception handler for the nmethod if there is no Java level handler -+// for the nmethod. -+// -+// This code is entered with a jmp. -+// -+// Arguments: -+// x10: exception oop -+// x13: exception pc -+// -+// Results: -+// x10: exception oop -+// x13: exception pc in caller -+// destination: exception handler of caller -+// -+// Note: the exception pc MUST be at a call (precise debug information) -+// Registers x10, x13, x12, x14, x15, t0 are not callee saved. -+// ++ __ mv(dst, d); ++ __ mv(src, s); ++ __ mv(cnt, count); + -+void OptoRuntime::generate_exception_blob() { -+ assert(!OptoRuntime::is_callee_saved_register(R13_num), ""); -+ assert(!OptoRuntime::is_callee_saved_register(R10_num), ""); -+ assert(!OptoRuntime::is_callee_saved_register(R12_num), ""); ++ __ bind(loop_forward); ++ __ vsetvli(vl, cnt, sew, Assembler::m8); ++ if (is_backward) { ++ __ bne(vl, cnt, loop_backward); ++ } + -+ assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); ++ __ vlex_v(v0, src, sew); ++ __ sub(cnt, cnt, vl); ++ __ slli(vl, vl, (int)sew); ++ __ add(src, src, vl); + -+ // Allocate space for the code -+ ResourceMark rm; -+ // Setup code generation tools -+ CodeBuffer buffer("exception_blob", 2048, 1024); -+ MacroAssembler* masm = new MacroAssembler(&buffer); -+ assert_cond(masm != NULL); ++ __ vsex_v(v0, dst, sew); ++ __ add(dst, dst, vl); ++ __ bnez(cnt, loop_forward); + -+ // TODO check various assumptions made here -+ // -+ // make sure we do so before running this ++ if (is_backward) { ++ __ j(done); + -+ address start = __ pc(); ++ __ bind(loop_backward); ++ __ sub(tmp, cnt, vl); ++ __ slli(tmp, tmp, sew); ++ __ add(tmp1, s, tmp); ++ __ vlex_v(v0, tmp1, sew); ++ __ add(tmp2, d, tmp); ++ __ vsex_v(v0, tmp2, sew); ++ __ sub(cnt, cnt, vl); ++ __ bnez(cnt, loop_forward); ++ __ bind(done); ++ } ++ } + -+ // push fp and retaddr by hand -+ // Exception pc is 'return address' for stack walker -+ __ addi(sp, sp, -2 * wordSize); -+ __ sd(ra, Address(sp, wordSize)); -+ __ sd(fp, Address(sp)); -+ // there are no callee save registers and we don't expect an -+ // arg reg save area -+#ifndef PRODUCT -+ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); -+#endif -+ // Store exception in Thread object. We cannot pass any arguments to the -+ // handle_exception call, since we do not want to make any assumption -+ // about the size of the frame where the exception happened in. -+ __ sd(x10, Address(xthread, JavaThread::exception_oop_offset())); -+ __ sd(x13, Address(xthread, JavaThread::exception_pc_offset())); ++ void copy_memory(bool is_aligned, Register s, Register d, ++ Register count, Register tmp, int step) { ++ if (UseRVV) { ++ return copy_memory_v(s, d, count, tmp, step); ++ } + -+ // This call does all the hard work. It checks if an exception handler -+ // exists in the method. -+ // If so, it returns the handler address. -+ // If not, it prepares for stack-unwinding, restoring the callee-save -+ // registers of the frame being removed. -+ // -+ // address OptoRuntime::handle_exception_C(JavaThread* thread) -+ // -+ // n.b. 1 gp arg, 0 fp args, integral return type ++ bool is_backwards = step < 0; ++ int granularity = uabs(step); + -+ // the stack should always be aligned -+ address the_pc = __ pc(); -+ __ set_last_Java_frame(sp, noreg, the_pc, t0); -+ __ mv(c_rarg0, xthread); -+ int32_t offset = 0; -+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)), offset); -+ __ jalr(x1, t0, offset); ++ const Register src = x30, dst = x31, cnt = x15, tmp3 = x16, tmp4 = x17; + ++ Label same_aligned; ++ Label copy8, copy_small, done; + -+ // handle_exception_C is a special VM call which does not require an explicit -+ // instruction sync afterwards. ++ copy_insn ld_arr = NULL, st_arr = NULL; ++ switch (granularity) { ++ case 1 : ++ ld_arr = (copy_insn)&MacroAssembler::lbu; ++ st_arr = (copy_insn)&MacroAssembler::sb; ++ break; ++ case 2 : ++ ld_arr = (copy_insn)&MacroAssembler::lhu; ++ st_arr = (copy_insn)&MacroAssembler::sh; ++ break; ++ case 4 : ++ ld_arr = (copy_insn)&MacroAssembler::lwu; ++ st_arr = (copy_insn)&MacroAssembler::sw; ++ break; ++ case 8 : ++ ld_arr = (copy_insn)&MacroAssembler::ld; ++ st_arr = (copy_insn)&MacroAssembler::sd; ++ break; ++ default : ++ ShouldNotReachHere(); ++ } + -+ // Set an oopmap for the call site. This oopmap will only be used if we -+ // are unwinding the stack. Hence, all locations will be dead. -+ // Callee-saved registers will be the same as the frame above (i.e., -+ // handle_exception_stub), since they were restored when we got the -+ // exception. ++ __ beqz(count, done); ++ __ slli(cnt, count, exact_log2(granularity)); ++ if (is_backwards) { ++ __ add(src, s, cnt); ++ __ add(dst, d, cnt); ++ } else { ++ __ mv(src, s); ++ __ mv(dst, d); ++ } + -+ OopMapSet* oop_maps = new OopMapSet(); -+ assert_cond(oop_maps != NULL); ++ if (is_aligned) { ++ __ addi(tmp, cnt, -8); ++ __ bgez(tmp, copy8); ++ __ j(copy_small); ++ } + -+ oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); ++ __ mv(tmp, 16); ++ __ blt(cnt, tmp, copy_small); + -+ __ reset_last_Java_frame(false); ++ __ xorr(tmp, src, dst); ++ __ andi(tmp, tmp, 0b111); ++ __ bnez(tmp, copy_small); + -+ // Restore callee-saved registers ++ __ bind(same_aligned); ++ __ andi(tmp, src, 0b111); ++ __ beqz(tmp, copy8); ++ if (is_backwards) { ++ __ addi(src, src, step); ++ __ addi(dst, dst, step); ++ } ++ (_masm->*ld_arr)(tmp3, Address(src), t0); ++ (_masm->*st_arr)(tmp3, Address(dst), t0); ++ if (!is_backwards) { ++ __ addi(src, src, step); ++ __ addi(dst, dst, step); ++ } ++ __ addi(cnt, cnt, -granularity); ++ __ beqz(cnt, done); ++ __ j(same_aligned); + -+ // fp is an implicitly saved callee saved register (i.e. the calling -+ // convention will save restore it in prolog/epilog) Other than that -+ // there are no callee save registers now that adapter frames are gone. -+ // and we dont' expect an arg reg save area -+ __ ld(fp, Address(sp)); -+ __ ld(x13, Address(sp, wordSize)); -+ __ addi(sp, sp , 2 * wordSize); ++ __ bind(copy8); ++ if (is_backwards) { ++ __ addi(src, src, -wordSize); ++ __ addi(dst, dst, -wordSize); ++ } ++ __ ld(tmp3, Address(src)); ++ __ sd(tmp3, Address(dst)); ++ if (!is_backwards) { ++ __ addi(src, src, wordSize); ++ __ addi(dst, dst, wordSize); ++ } ++ __ addi(cnt, cnt, -wordSize); ++ __ addi(tmp4, cnt, -8); ++ __ bgez(tmp4, copy8); // cnt >= 8, do next loop + -+ // x10: exception handler ++ __ beqz(cnt, done); + -+ // We have a handler in x10 (could be deopt blob). -+ __ mv(t0, x10); ++ __ bind(copy_small); ++ if (is_backwards) { ++ __ addi(src, src, step); ++ __ addi(dst, dst, step); ++ } ++ (_masm->*ld_arr)(tmp3, Address(src), t0); ++ (_masm->*st_arr)(tmp3, Address(dst), t0); ++ if (!is_backwards) { ++ __ addi(src, src, step); ++ __ addi(dst, dst, step); ++ } ++ __ addi(cnt, cnt, -granularity); ++ __ bgtz(cnt, copy_small); + -+ // Get the exception oop -+ __ ld(x10, Address(xthread, JavaThread::exception_oop_offset())); -+ // Get the exception pc in case we are deoptimized -+ __ ld(x14, Address(xthread, JavaThread::exception_pc_offset())); -+#ifdef ASSERT -+ __ sd(zr, Address(xthread, JavaThread::exception_handler_pc_offset())); -+ __ sd(zr, Address(xthread, JavaThread::exception_pc_offset())); -+#endif -+ // Clear the exception oop so GC no longer processes it as a root. -+ __ sd(zr, Address(xthread, JavaThread::exception_oop_offset())); ++ __ bind(done); ++ } + -+ // x10: exception oop -+ // t0: exception handler -+ // x14: exception pc -+ // Jump to handler -+ -+ __ jr(t0); -+ -+ // Make sure all code is generated -+ masm->flush(); -+ -+ // Set exception blob -+ _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); -+} -+#endif // COMPILER2 -diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp -new file mode 100644 -index 00000000000..b3fdd04db1b ---- /dev/null -+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp -@@ -0,0 +1,3864 @@ -+/* -+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+#include "precompiled.hpp" -+#include "asm/macroAssembler.hpp" -+#include "asm/macroAssembler.inline.hpp" -+#include "compiler/oopMap.hpp" -+#include "gc/shared/barrierSet.hpp" -+#include "gc/shared/barrierSetAssembler.hpp" -+#include "interpreter/interpreter.hpp" -+#include "memory/universe.hpp" -+#include "nativeInst_riscv.hpp" -+#include "oops/instanceOop.hpp" -+#include "oops/method.hpp" -+#include "oops/objArrayKlass.hpp" -+#include "oops/oop.inline.hpp" -+#include "prims/methodHandles.hpp" -+#include "runtime/frame.inline.hpp" -+#include "runtime/handles.inline.hpp" -+#include "runtime/sharedRuntime.hpp" -+#include "runtime/stubCodeGenerator.hpp" -+#include "runtime/stubRoutines.hpp" -+#include "runtime/thread.inline.hpp" -+#include "utilities/align.hpp" -+#include "utilities/powerOfTwo.hpp" -+#ifdef COMPILER2 -+#include "opto/runtime.hpp" -+#endif -+#if INCLUDE_ZGC -+#include "gc/z/zThreadLocalData.hpp" -+#endif -+ -+// Declaration and definition of StubGenerator (no .hpp file). -+// For a more detailed description of the stub routine structure -+// see the comment in stubRoutines.hpp -+ -+#undef __ -+#define __ _masm-> -+ -+#ifdef PRODUCT -+#define BLOCK_COMMENT(str) /* nothing */ -+#else -+#define BLOCK_COMMENT(str) __ block_comment(str) -+#endif -+ -+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") -+ -+// Stub Code definitions -+ -+class StubGenerator: public StubCodeGenerator { -+ private: ++ // Scan over array at a for count oops, verifying each one. ++ // Preserves a and count, clobbers t0 and t1. ++ void verify_oop_array(size_t size, Register a, Register count, Register temp) { ++ Label loop, end; ++ __ mv(t1, zr); ++ __ slli(t0, count, exact_log2(size)); ++ __ bind(loop); ++ __ bgeu(t1, t0, end); + -+#ifdef PRODUCT -+#define inc_counter_np(counter) ((void)0) -+#else -+ void inc_counter_np_(int& counter) { -+ __ la(t1, ExternalAddress((address)&counter)); -+ __ lwu(t0, Address(t1, 0)); -+ __ addiw(t0, t0, 1); -+ __ sw(t0, Address(t1, 0)); ++ __ add(temp, a, t1); ++ if (size == (size_t)wordSize) { ++ __ ld(temp, Address(temp, 0)); ++ __ verify_oop(temp); ++ } else { ++ __ lwu(temp, Address(temp, 0)); ++ __ decode_heap_oop(temp); // calls verify_oop ++ } ++ __ add(t1, t1, size); ++ __ j(loop); ++ __ bind(end); + } -+#define inc_counter_np(counter) \ -+ BLOCK_COMMENT("inc_counter " #counter); \ -+ inc_counter_np_(counter); -+#endif + -+ // Call stubs are used to call Java from C -+ // + // Arguments: -+ // c_rarg0: call wrapper address address -+ // c_rarg1: result address -+ // c_rarg2: result type BasicType -+ // c_rarg3: method Method* -+ // c_rarg4: (interpreter) entry point address -+ // c_rarg5: parameters intptr_t* -+ // c_rarg6: parameter size (in words) int -+ // c_rarg7: thread Thread* -+ // -+ // There is no return from the stub itself as any Java result -+ // is written to result -+ // -+ // we save x1 (ra) as the return PC at the base of the frame and -+ // link x8 (fp) below it as the frame pointer installing sp (x2) -+ // into fp. -+ // -+ // we save x10-x17, which accounts for all the c arguments. -+ // -+ // TODO: strictly do we need to save them all? they are treated as -+ // volatile by C so could we omit saving the ones we are going to -+ // place in global registers (thread? method?) or those we only use -+ // during setup of the Java call? -+ // -+ // we don't need to save x5 which C uses as an indirect result location -+ // return register. ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string + // -+ // we don't need to save x6-x7 and x28-x31 which both C and Java treat as -+ // volatile ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero + // -+ // we save x18-x27 which Java uses as temporary registers and C -+ // expects to be callee-save ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. + // -+ // so the stub frame looks like this when we enter Java code ++ // Side Effects: ++ // disjoint_int_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_int_oop_copy(). + // -+ // [ return_from_Java ] <--- sp -+ // [ argument word n ] -+ // ... -+ // -22 [ argument word 1 ] -+ // -21 [ saved x27 ] <--- sp_after_call -+ // -20 [ saved x26 ] -+ // -19 [ saved x25 ] -+ // -18 [ saved x24 ] -+ // -17 [ saved x23 ] -+ // -16 [ saved x22 ] -+ // -15 [ saved x21 ] -+ // -14 [ saved x20 ] -+ // -13 [ saved x19 ] -+ // -12 [ saved x18 ] -+ // -11 [ saved x9 ] -+ // -10 [ call wrapper (x10) ] -+ // -9 [ result (x11) ] -+ // -8 [ result type (x12) ] -+ // -7 [ method (x13) ] -+ // -6 [ entry point (x14) ] -+ // -5 [ parameters (x15) ] -+ // -4 [ parameter size (x16) ] -+ // -3 [ thread (x17) ] -+ // -2 [ saved fp (x8) ] -+ // -1 [ saved ra (x1) ] -+ // 0 [ ] <--- fp == saved sp (x2) -+ -+ // Call stub stack layout word offsets from fp -+ enum call_stub_layout { -+ sp_after_call_off = -21, -+ -+ x27_off = -21, -+ x26_off = -20, -+ x25_off = -19, -+ x24_off = -18, -+ x23_off = -17, -+ x22_off = -16, -+ x21_off = -15, -+ x20_off = -14, -+ x19_off = -13, -+ x18_off = -12, -+ x9_off = -11, -+ -+ call_wrapper_off = -10, -+ result_off = -9, -+ result_type_off = -8, -+ method_off = -7, -+ entry_point_off = -6, -+ parameters_off = -5, -+ parameter_size_off = -4, -+ thread_off = -3, -+ fp_f = -2, -+ retaddr_off = -1, -+ }; -+ -+ address generate_call_stub(address& return_address) { -+ assert((int)frame::entry_frame_after_call_words == -(int)sp_after_call_off + 1 && -+ (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, -+ "adjust this code"); -+ -+ StubCodeMark mark(this, "StubRoutines", "call_stub"); ++ address generate_disjoint_copy(size_t size, bool aligned, bool is_oop, address* entry, ++ const char* name, bool dest_uninitialized = false) { ++ const Register s = c_rarg0, d = c_rarg1, count = c_rarg2; ++ RegSet saved_reg = RegSet::of(s, d, count); ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); -+ -+ const Address sp_after_call (fp, sp_after_call_off * wordSize); -+ -+ const Address call_wrapper (fp, call_wrapper_off * wordSize); -+ const Address result (fp, result_off * wordSize); -+ const Address result_type (fp, result_type_off * wordSize); -+ const Address method (fp, method_off * wordSize); -+ const Address entry_point (fp, entry_point_off * wordSize); -+ const Address parameters (fp, parameters_off * wordSize); -+ const Address parameter_size(fp, parameter_size_off * wordSize); -+ -+ const Address thread (fp, thread_off * wordSize); -+ -+ const Address x27_save (fp, x27_off * wordSize); -+ const Address x26_save (fp, x26_off * wordSize); -+ const Address x25_save (fp, x25_off * wordSize); -+ const Address x24_save (fp, x24_off * wordSize); -+ const Address x23_save (fp, x23_off * wordSize); -+ const Address x22_save (fp, x22_off * wordSize); -+ const Address x21_save (fp, x21_off * wordSize); -+ const Address x20_save (fp, x20_off * wordSize); -+ const Address x19_save (fp, x19_off * wordSize); -+ const Address x18_save (fp, x18_off * wordSize); -+ -+ const Address x9_save (fp, x9_off * wordSize); -+ -+ // stub code -+ -+ address riscv_entry = __ pc(); -+ -+ // set up frame and move sp to end of save area + __ enter(); -+ __ addi(sp, fp, sp_after_call_off * wordSize); -+ -+ // save register parameters and Java temporary/global registers -+ // n.b. we save thread even though it gets installed in -+ // xthread because we want to sanity check tp later -+ __ sd(c_rarg7, thread); -+ __ sw(c_rarg6, parameter_size); -+ __ sd(c_rarg5, parameters); -+ __ sd(c_rarg4, entry_point); -+ __ sd(c_rarg3, method); -+ __ sd(c_rarg2, result_type); -+ __ sd(c_rarg1, result); -+ __ sd(c_rarg0, call_wrapper); -+ -+ __ sd(x9, x9_save); -+ -+ __ sd(x18, x18_save); -+ __ sd(x19, x19_save); -+ __ sd(x20, x20_save); -+ __ sd(x21, x21_save); -+ __ sd(x22, x22_save); -+ __ sd(x23, x23_save); -+ __ sd(x24, x24_save); -+ __ sd(x25, x25_save); -+ __ sd(x26, x26_save); -+ __ sd(x27, x27_save); -+ -+ // install Java thread in global register now we have saved -+ // whatever value it held -+ __ mv(xthread, c_rarg7); -+ -+ // And method -+ __ mv(xmethod, c_rarg3); -+ -+ // set up the heapbase register -+ __ reinit_heapbase(); + -+#ifdef ASSERT -+ // make sure we have no pending exceptions -+ { -+ Label L; -+ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); -+ __ beqz(t0, L); -+ __ stop("StubRoutines::call_stub: entered with pending exception"); -+ __ BIND(L); ++ if (entry != NULL) { ++ *entry = __ pc(); ++ // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) ++ BLOCK_COMMENT("Entry:"); + } -+#endif -+ // pass parameters if any -+ __ mv(esp, sp); -+ __ slli(t0, c_rarg6, LogBytesPerWord); -+ __ sub(t0, sp, t0); // Move SP out of the way -+ __ andi(sp, t0, -2 * wordSize); -+ -+ BLOCK_COMMENT("pass parameters if any"); -+ Label parameters_done; -+ // parameter count is still in c_rarg6 -+ // and parameter pointer identifying param 1 is in c_rarg5 -+ __ beqz(c_rarg6, parameters_done); + -+ address loop = __ pc(); -+ __ ld(t0, c_rarg5, 0); -+ __ addi(c_rarg5, c_rarg5, wordSize); -+ __ addi(c_rarg6, c_rarg6, -1); -+ __ push_reg(t0); -+ __ bgtz(c_rarg6, loop); -+ -+ __ BIND(parameters_done); -+ -+ // call Java entry -- passing methdoOop, and current sp -+ // xmethod: Method* -+ // x30: sender sp -+ BLOCK_COMMENT("call Java function"); -+ __ mv(x30, sp); -+ __ jalr(c_rarg4); -+ -+ // save current address for use by exception handling code -+ -+ return_address = __ pc(); -+ -+ // store result depending on type (everything that is not -+ // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) -+ // n.b. this assumes Java returns an integral result in x10 -+ // and a floating result in j_farg0 -+ __ ld(j_rarg2, result); -+ Label is_long, is_float, is_double, exit; -+ __ ld(j_rarg1, result_type); -+ __ li(t0, (u1)T_OBJECT); -+ __ beq(j_rarg1, t0, is_long); -+ __ li(t0, (u1)T_LONG); -+ __ beq(j_rarg1, t0, is_long); -+ __ li(t0, (u1)T_FLOAT); -+ __ beq(j_rarg1, t0, is_float); -+ __ li(t0, (u1)T_DOUBLE); -+ __ beq(j_rarg1, t0, is_double); -+ -+ // handle T_INT case -+ __ sw(x10, Address(j_rarg2)); -+ -+ __ BIND(exit); ++ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ if (aligned) { ++ decorators |= ARRAYCOPY_ALIGNED; ++ } + -+ // pop parameters -+ __ addi(esp, fp, sp_after_call_off * wordSize); ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_reg); + -+#ifdef ASSERT -+ // verify that threads correspond -+ { -+ Label L, S; -+ __ ld(t0, thread); -+ __ bne(xthread, t0, S); -+ __ get_thread(t0); -+ __ beq(xthread, t0, L); -+ __ BIND(S); -+ __ stop("StubRoutines::call_stub: threads must correspond"); -+ __ BIND(L); ++ if (is_oop) { ++ // save regs before copy_memory ++ __ push_reg(RegSet::of(d, count), sp); + } -+#endif + -+ // restore callee-save registers -+ __ ld(x27, x27_save); -+ __ ld(x26, x26_save); -+ __ ld(x25, x25_save); -+ __ ld(x24, x24_save); -+ __ ld(x23, x23_save); -+ __ ld(x22, x22_save); -+ __ ld(x21, x21_save); -+ __ ld(x20, x20_save); -+ __ ld(x19, x19_save); -+ __ ld(x18, x18_save); ++ copy_memory(aligned, s, d, count, t0, size); + -+ __ ld(x9, x9_save); ++ if (is_oop) { ++ __ pop_reg(RegSet::of(d, count), sp); ++ if (VerifyOops) { ++ verify_oop_array(size, d, count, t2); ++ } ++ } + -+ __ ld(c_rarg0, call_wrapper); -+ __ ld(c_rarg1, result); -+ __ ld(c_rarg2, result_type); -+ __ ld(c_rarg3, method); -+ __ ld(c_rarg4, entry_point); -+ __ ld(c_rarg5, parameters); -+ __ ld(c_rarg6, parameter_size); -+ __ ld(c_rarg7, thread); ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet()); + -+ // leave frame and return to caller + __ leave(); ++ __ mv(x10, zr); // return 0 + __ ret(); -+ -+ // handle return types different from T_INT -+ -+ __ BIND(is_long); -+ __ sd(x10, Address(j_rarg2, 0)); -+ __ j(exit); -+ -+ __ BIND(is_float); -+ __ fsw(j_farg0, Address(j_rarg2, 0), t0); -+ __ j(exit); -+ -+ __ BIND(is_double); -+ __ fsd(j_farg0, Address(j_rarg2, 0), t0); -+ __ j(exit); -+ + return start; + } + -+ // Return point for a Java call if there's an exception thrown in -+ // Java code. The exception is caught and transformed into a -+ // pending exception stored in JavaThread that can be tested from -+ // within the VM. ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string + // -+ // Note: Usually the parameters are removed by the callee. In case -+ // of an exception crossing an activation frame boundary, that is -+ // not the case if the callee is compiled code => need to setup the -+ // sp. ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero + // -+ // x10: exception oop -+ -+ address generate_catch_exception() { -+ StubCodeMark mark(this, "StubRoutines", "catch_exception"); ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ address generate_conjoint_copy(size_t size, bool aligned, bool is_oop, address nooverlap_target, ++ address* entry, const char* name, ++ bool dest_uninitialized = false) { ++ const Register s = c_rarg0, d = c_rarg1, count = c_rarg2; ++ RegSet saved_regs = RegSet::of(s, d, count); ++ StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); ++ __ enter(); + -+ // same as in generate_call_stub(): -+ const Address thread(fp, thread_off * wordSize); -+ -+#ifdef ASSERT -+ // verify that threads correspond -+ { -+ Label L, S; -+ __ ld(t0, thread); -+ __ bne(xthread, t0, S); -+ __ get_thread(t0); -+ __ beq(xthread, t0, L); -+ __ bind(S); -+ __ stop("StubRoutines::catch_exception: threads must correspond"); -+ __ bind(L); ++ if (entry != NULL) { ++ *entry = __ pc(); ++ // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) ++ BLOCK_COMMENT("Entry:"); + } -+#endif + -+ // set pending exception -+ __ verify_oop(x10); ++ // use fwd copy when (d-s) above_equal (count*size) ++ __ sub(t0, d, s); ++ __ slli(t1, count, exact_log2(size)); ++ __ bgeu(t0, t1, nooverlap_target); + -+ __ sd(x10, Address(xthread, Thread::pending_exception_offset())); -+ __ mv(t0, (address)__FILE__); -+ __ sd(t0, Address(xthread, Thread::exception_file_offset())); -+ __ mv(t0, (int)__LINE__); -+ __ sw(t0, Address(xthread, Thread::exception_line_offset())); ++ DecoratorSet decorators = IN_HEAP | IS_ARRAY; ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ if (aligned) { ++ decorators |= ARRAYCOPY_ALIGNED; ++ } + -+ // complete return to VM -+ assert(StubRoutines::_call_stub_return_address != NULL, -+ "_call_stub_return_address must have been generated before"); -+ __ j(StubRoutines::_call_stub_return_address); ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_regs); + -+ return start; -+ } ++ if (is_oop) { ++ // save regs before copy_memory ++ __ push_reg(RegSet::of(d, count), sp); ++ } + -+ // Continuation point for runtime calls returning with a pending -+ // exception. The pending exception check happened in the runtime -+ // or native call stub. The pending exception in Thread is -+ // converted into a Java-level exception. -+ // -+ // Contract with Java-level exception handlers: -+ // x10: exception -+ // x13: throwing pc -+ // -+ // NOTE: At entry of this stub, exception-pc must be in RA !! -+ -+ // NOTE: this is always used as a jump target within generated code -+ // so it just needs to be generated code with no x86 prolog -+ -+ address generate_forward_exception() { -+ StubCodeMark mark(this, "StubRoutines", "forward exception"); -+ address start = __ pc(); -+ -+ // Upon entry, RA points to the return address returning into -+ // Java (interpreted or compiled) code; i.e., the return address -+ // becomes the throwing pc. -+ // -+ // Arguments pushed before the runtime call are still on the stack -+ // but the exception handler will reset the stack pointer -> -+ // ignore them. A potential result in registers can be ignored as -+ // well. -+ -+#ifdef ASSERT -+ // make sure this code is only executed if there is a pending exception -+ { -+ Label L; -+ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); -+ __ bnez(t0, L); -+ __ stop("StubRoutines::forward exception: no pending exception (1)"); -+ __ bind(L); -+ } -+#endif -+ -+ // compute exception handler into x9 -+ -+ // call the VM to find the handler address associated with the -+ // caller address. pass thread in x10 and caller pc (ret address) -+ // in x11. n.b. the caller pc is in ra, unlike x86 where it is on -+ // the stack. -+ __ mv(c_rarg1, ra); -+ // ra will be trashed by the VM call so we move it to x9 -+ // (callee-saved) because we also need to pass it to the handler -+ // returned by this call. -+ __ mv(x9, ra); -+ BLOCK_COMMENT("call exception_handler_for_return_address"); -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, -+ SharedRuntime::exception_handler_for_return_address), -+ xthread, c_rarg1); -+ // we should not really care that ra is no longer the callee -+ // address. we saved the value the handler needs in x9 so we can -+ // just copy it to x13. however, the C2 handler will push its own -+ // frame and then calls into the VM and the VM code asserts that -+ // the PC for the frame above the handler belongs to a compiled -+ // Java method. So, we restore ra here to satisfy that assert. -+ __ mv(ra, x9); -+ // setup x10 & x13 & clear pending exception -+ __ mv(x13, x9); -+ __ mv(x9, x10); -+ __ ld(x10, Address(xthread, Thread::pending_exception_offset())); -+ __ sd(zr, Address(xthread, Thread::pending_exception_offset())); ++ copy_memory(aligned, s, d, count, t0, -size); + -+#ifdef ASSERT -+ // make sure exception is set -+ { -+ Label L; -+ __ bnez(x10, L); -+ __ stop("StubRoutines::forward exception: no pending exception (2)"); -+ __ bind(L); ++ if (is_oop) { ++ __ pop_reg(RegSet::of(d, count), sp); ++ if (VerifyOops) { ++ verify_oop_array(size, d, count, t2); ++ } + } -+#endif -+ -+ // continue at exception handler -+ // x10: exception -+ // x13: throwing pc -+ // x9: exception handler -+ __ verify_oop(x10); -+ __ jr(x9); -+ ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet()); ++ __ leave(); ++ __ mv(x10, zr); // return 0 ++ __ ret(); + return start; + } + -+ // Non-destructive plausibility checks for oops -+ // + // Arguments: -+ // x10: oop to verify -+ // t0: error message ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string + // -+ // Stack after saving c_rarg3: -+ // [tos + 0]: saved c_rarg3 -+ // [tos + 1]: saved c_rarg2 -+ // [tos + 2]: saved ra -+ // [tos + 3]: saved t1 -+ // [tos + 4]: saved x10 -+ // [tos + 5]: saved t0 -+ address generate_verify_oop() { -+ -+ StubCodeMark mark(this, "StubRoutines", "verify_oop"); -+ address start = __ pc(); -+ -+ Label exit, error; -+ -+ __ push_reg(0x3000, sp); // save c_rarg2 and c_rarg3 -+ -+ __ la(c_rarg2, ExternalAddress((address) StubRoutines::verify_oop_count_addr())); -+ __ ld(c_rarg3, Address(c_rarg2)); -+ __ add(c_rarg3, c_rarg3, 1); -+ __ sd(c_rarg3, Address(c_rarg2)); -+ -+ // object is in x10 -+ // make sure object is 'reasonable' -+ __ beqz(x10, exit); // if obj is NULL it is OK -+ -+#if INCLUDE_ZGC -+ if (UseZGC) { -+ // Check if mask is good. -+ // verifies that ZAddressBadMask & x10 == 0 -+ __ ld(c_rarg3, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); -+ __ andr(c_rarg2, x10, c_rarg3); -+ __ bnez(c_rarg2, error); -+ } -+#endif ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ // Side Effects: ++ // disjoint_byte_copy_entry is set to the no-overlap entry point // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ // Side Effects: ++ // disjoint_byte_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_byte_copy(). ++ // ++ address generate_disjoint_byte_copy(bool aligned, address* entry, const char* name) { ++ const bool not_oop = false; ++ return generate_disjoint_copy(sizeof (jbyte), aligned, not_oop, entry, name); ++ } + -+ // Check if the oop is in the right area of memory -+ __ mv(c_rarg3, (intptr_t) Universe::verify_oop_mask()); -+ __ andr(c_rarg2, x10, c_rarg3); -+ __ mv(c_rarg3, (intptr_t) Universe::verify_oop_bits()); ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ address generate_conjoint_byte_copy(bool aligned, address nooverlap_target, ++ address* entry, const char* name) { ++ const bool not_oop = false; ++ return generate_conjoint_copy(sizeof (jbyte), aligned, not_oop, nooverlap_target, entry, name); ++ } + -+ // Compare c_rarg2 and c_rarg3. -+ __ bne(c_rarg2, c_rarg3, error); ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we ++ // let the hardware handle it. The two or four words within dwords ++ // or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ // Side Effects: ++ // disjoint_short_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_short_copy(). ++ // ++ address generate_disjoint_short_copy(bool aligned, ++ address* entry, const char* name) { ++ const bool not_oop = false; ++ return generate_disjoint_copy(sizeof (jshort), aligned, not_oop, entry, name); ++ } + -+ // make sure klass is 'reasonable', which is not zero. -+ __ load_klass(x10, x10); // get klass -+ __ beqz(x10, error); // if klass is NULL it is broken ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we ++ // let the hardware handle it. The two or four words within dwords ++ // or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ address generate_conjoint_short_copy(bool aligned, address nooverlap_target, ++ address* entry, const char* name) { ++ const bool not_oop = false; ++ return generate_conjoint_copy(sizeof (jshort), aligned, not_oop, nooverlap_target, entry, name); ++ } + -+ // return if everything seems ok -+ __ bind(exit); ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ // Side Effects: ++ // disjoint_int_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_int_oop_copy(). ++ // ++ address generate_disjoint_int_copy(bool aligned, address* entry, ++ const char* name, bool dest_uninitialized = false) { ++ const bool not_oop = false; ++ return generate_disjoint_copy(sizeof (jint), aligned, not_oop, entry, name); ++ } + -+ __ pop_reg(0x3000, sp); // pop c_rarg2 and c_rarg3 -+ __ ret(); ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ address generate_conjoint_int_copy(bool aligned, address nooverlap_target, ++ address* entry, const char* name, ++ bool dest_uninitialized = false) { ++ const bool not_oop = false; ++ return generate_conjoint_copy(sizeof (jint), aligned, not_oop, nooverlap_target, entry, name); ++ } + -+ // handle errors -+ __ bind(error); -+ __ pop_reg(0x3000, sp); // pop c_rarg2 and c_rarg3 + -+ __ pusha(); -+ // debug(char* msg, int64_t pc, int64_t regs[]) -+ __ mv(c_rarg0, t0); // pass address of error message -+ __ mv(c_rarg1, ra); // pass return address -+ __ mv(c_rarg2, sp); // pass address of regs on stack -+#ifndef PRODUCT -+ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); -+#endif -+ BLOCK_COMMENT("call MacroAssembler::debug"); -+ int32_t offset = 0; -+ __ movptr_with_offset(t0, CAST_FROM_FN_PTR(address, MacroAssembler::debug64), offset); -+ __ jalr(x1, t0, offset); -+ __ ebreak(); ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as size_t, can be zero ++ // ++ // Side Effects: ++ // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the ++ // no-overlap entry point used by generate_conjoint_long_oop_copy(). ++ // ++ address generate_disjoint_long_copy(bool aligned, address* entry, ++ const char* name, bool dest_uninitialized = false) { ++ const bool not_oop = false; ++ return generate_disjoint_copy(sizeof (jlong), aligned, not_oop, entry, name); ++ } + -+ return start; ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as size_t, can be zero ++ // ++ address generate_conjoint_long_copy(bool aligned, ++ address nooverlap_target, address* entry, ++ const char* name, bool dest_uninitialized = false) { ++ const bool not_oop = false; ++ return generate_conjoint_copy(sizeof (jlong), aligned, not_oop, nooverlap_target, entry, name); + } + -+ // The inner part of zero_words(). ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes ++ // ignored ++ // name - stub name string + // + // Inputs: -+ // x28: the HeapWord-aligned base address of an array to zero. -+ // x29: the count in HeapWords, x29 > 0. ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as size_t, can be zero + // -+ // Returns x28 and x29, adjusted for the caller to clear. -+ // x28: the base address of the tail of words left to clear. -+ // x29: the number of words in the tail. -+ // x29 < MacroAssembler::zero_words_block_size. ++ // Side Effects: ++ // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the ++ // no-overlap entry point used by generate_conjoint_long_oop_copy(). ++ // ++ address generate_disjoint_oop_copy(bool aligned, address* entry, ++ const char* name, bool dest_uninitialized) { ++ const bool is_oop = true; ++ const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); ++ return generate_disjoint_copy(size, aligned, is_oop, entry, name, dest_uninitialized); ++ } + -+ address generate_zero_blocks() { -+ Label done; ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as size_t, can be zero ++ // ++ address generate_conjoint_oop_copy(bool aligned, ++ address nooverlap_target, address* entry, ++ const char* name, bool dest_uninitialized) { ++ const bool is_oop = true; ++ const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); ++ return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry, ++ name, dest_uninitialized); ++ } + -+ const Register base = x28, cnt = x29; ++ // Helper for generating a dynamic type check. ++ // Smashes t0, t1. ++ void generate_type_check(Register sub_klass, ++ Register super_check_offset, ++ Register super_klass, ++ Label& L_success) { ++ assert_different_registers(sub_klass, super_check_offset, super_klass); + -+ __ align(CodeEntryAlignment); -+ StubCodeMark mark(this, "StubRoutines", "zero_blocks"); -+ address start = __ pc(); ++ BLOCK_COMMENT("type_check:"); + -+ { -+ // Clear the remaining blocks. -+ Label loop; -+ __ sub(cnt, cnt, MacroAssembler::zero_words_block_size); -+ __ bltz(cnt, done); -+ __ bind(loop); -+ for (int i = 0; i < MacroAssembler::zero_words_block_size; i++) { -+ __ sd(zr, Address(base, 0)); -+ __ add(base, base, 8); -+ } -+ __ sub(cnt, cnt, MacroAssembler::zero_words_block_size); -+ __ bgez(cnt, loop); -+ __ bind(done); -+ __ add(cnt, cnt, MacroAssembler::zero_words_block_size); -+ } ++ Label L_miss; + -+ __ ret(); ++ __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, NULL, super_check_offset); ++ __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL); + -+ return start; ++ // Fall through on failure! ++ __ BIND(L_miss); + } + -+ typedef enum { -+ copy_forwards = 1, -+ copy_backwards = -1 -+ } copy_direction; -+ -+ // Bulk copy of blocks of 8 words. -+ // -+ // count is a count of words. + // -+ // Precondition: count >= 8 -+ // -+ // Postconditions: ++ // Generate checkcasting array copy stub + // -+ // The least significant bit of count contains the remaining count -+ // of words to copy. The rest of count is trash. ++ // Input: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // c_rarg3 - size_t ckoff (super_check_offset) ++ // c_rarg4 - oop ckval (super_klass) + // -+ // s and d are adjusted to point to the remaining words to copy ++ // Output: ++ // x10 == 0 - success ++ // x10 == -1^K - failure, where K is partial transfer count + // -+ void generate_copy_longs(Label &start, Register s, Register d, Register count, -+ copy_direction direction) { -+ int unit = wordSize * direction; -+ int bias = wordSize; ++ address generate_checkcast_copy(const char* name, address* entry, ++ bool dest_uninitialized = false) { ++ Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop; + -+ const Register tmp_reg0 = x13, tmp_reg1 = x14, tmp_reg2 = x15, tmp_reg3 = x16, -+ tmp_reg4 = x17, tmp_reg5 = x7, tmp_reg6 = x28, tmp_reg7 = x29; ++ // Input registers (after setup_arg_regs) ++ const Register from = c_rarg0; // source array address ++ const Register to = c_rarg1; // destination array address ++ const Register count = c_rarg2; // elementscount ++ const Register ckoff = c_rarg3; // super_check_offset ++ const Register ckval = c_rarg4; // super_klass + -+ const Register stride = x30; ++ RegSet wb_pre_saved_regs = RegSet::range(c_rarg0, c_rarg4); ++ RegSet wb_post_saved_regs = RegSet::of(count); + -+ assert_different_registers(t0, tmp_reg0, tmp_reg1, tmp_reg2, tmp_reg3, -+ tmp_reg4, tmp_reg5, tmp_reg6, tmp_reg7); -+ assert_different_registers(s, d, count, t0); ++ // Registers used as temps (x7, x9, x18 are save-on-entry) ++ const Register count_save = x19; // orig elementscount ++ const Register start_to = x18; // destination array start address ++ const Register copied_oop = x7; // actual oop copied ++ const Register r9_klass = x9; // oop._klass ++ ++ //--------------------------------------------------------------- ++ // Assembler stub will be used for this call to arraycopy ++ // if the two arrays are subtypes of Object[] but the ++ // destination array type is not equal to or a supertype ++ // of the source type. Each element must be separately ++ // checked. ++ ++ assert_different_registers(from, to, count, ckoff, ckval, start_to, ++ copied_oop, r9_klass, count_save); + -+ Label again, drain; -+ const char* stub_name = NULL; -+ if (direction == copy_forwards) { -+ stub_name = "forward_copy_longs"; -+ } else { -+ stub_name = "backward_copy_longs"; -+ } -+ StubCodeMark mark(this, "StubRoutines", stub_name); + __ align(CodeEntryAlignment); -+ __ bind(start); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); + -+ if (direction == copy_forwards) { -+ __ sub(s, s, bias); -+ __ sub(d, d, bias); ++ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ ++ // Caller of this entry point must set up the argument registers. ++ if (entry != NULL) { ++ *entry = __ pc(); ++ BLOCK_COMMENT("Entry:"); + } + -+#ifdef ASSERT -+ // Make sure we are never given < 8 words -+ { -+ Label L; ++ // Empty array: Nothing to do ++ __ beqz(count, L_done); + -+ __ li(t0, 8); -+ __ bge(count, t0, L); -+ __ stop("genrate_copy_longs called with < 8 words"); ++ __ push_reg(RegSet::of(x7, x9, x18, x19), sp); ++ ++#ifdef ASSERT ++ BLOCK_COMMENT("assert consistent ckoff/ckval"); ++ // The ckoff and ckval must be mutually consistent, ++ // even though caller generates both. ++ { Label L; ++ int sco_offset = in_bytes(Klass::super_check_offset_offset()); ++ __ lwu(start_to, Address(ckval, sco_offset)); ++ __ beq(ckoff, start_to, L); ++ __ stop("super_check_offset inconsistent"); + __ bind(L); + } -+#endif -+ -+ __ ld(tmp_reg0, Address(s, 1 * unit)); -+ __ ld(tmp_reg1, Address(s, 2 * unit)); -+ __ ld(tmp_reg2, Address(s, 3 * unit)); -+ __ ld(tmp_reg3, Address(s, 4 * unit)); -+ __ ld(tmp_reg4, Address(s, 5 * unit)); -+ __ ld(tmp_reg5, Address(s, 6 * unit)); -+ __ ld(tmp_reg6, Address(s, 7 * unit)); -+ __ ld(tmp_reg7, Address(s, 8 * unit)); -+ __ addi(s, s, 8 * unit); ++#endif //ASSERT + -+ __ sub(count, count, 16); -+ __ bltz(count, drain); -+ -+ __ bind(again); ++ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT; ++ bool is_oop = true; ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } + -+ __ sd(tmp_reg0, Address(d, 1 * unit)); -+ __ sd(tmp_reg1, Address(d, 2 * unit)); -+ __ sd(tmp_reg2, Address(d, 3 * unit)); -+ __ sd(tmp_reg3, Address(d, 4 * unit)); -+ __ sd(tmp_reg4, Address(d, 5 * unit)); -+ __ sd(tmp_reg5, Address(d, 6 * unit)); -+ __ sd(tmp_reg6, Address(d, 7 * unit)); -+ __ sd(tmp_reg7, Address(d, 8 * unit)); ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->arraycopy_prologue(_masm, decorators, is_oop, from, to, count, wb_pre_saved_regs); + -+ __ ld(tmp_reg0, Address(s, 1 * unit)); -+ __ ld(tmp_reg1, Address(s, 2 * unit)); -+ __ ld(tmp_reg2, Address(s, 3 * unit)); -+ __ ld(tmp_reg3, Address(s, 4 * unit)); -+ __ ld(tmp_reg4, Address(s, 5 * unit)); -+ __ ld(tmp_reg5, Address(s, 6 * unit)); -+ __ ld(tmp_reg6, Address(s, 7 * unit)); -+ __ ld(tmp_reg7, Address(s, 8 * unit)); ++ // save the original count ++ __ mv(count_save, count); + -+ __ addi(s, s, 8 * unit); -+ __ addi(d, d, 8 * unit); ++ // Copy from low to high addresses ++ __ mv(start_to, to); // Save destination array start address ++ __ j(L_load_element); + -+ __ sub(count, count, 8); -+ __ bgez(count, again); ++ // ======== begin loop ======== ++ // (Loop is rotated; its entry is L_load_element.) ++ // Loop control: ++ // for count to 0 do ++ // copied_oop = load_heap_oop(from++) ++ // ... generate_type_check ... ++ // store_heap_oop(to++, copied_oop) ++ // end + -+ // Drain -+ __ bind(drain); ++ __ align(OptoLoopAlignment); + -+ __ sd(tmp_reg0, Address(d, 1 * unit)); -+ __ sd(tmp_reg1, Address(d, 2 * unit)); -+ __ sd(tmp_reg2, Address(d, 3 * unit)); -+ __ sd(tmp_reg3, Address(d, 4 * unit)); -+ __ sd(tmp_reg4, Address(d, 5 * unit)); -+ __ sd(tmp_reg5, Address(d, 6 * unit)); -+ __ sd(tmp_reg6, Address(d, 7 * unit)); -+ __ sd(tmp_reg7, Address(d, 8 * unit)); -+ __ addi(d, d, 8 * unit); ++ __ BIND(L_store_element); ++ __ store_heap_oop(Address(to, 0), copied_oop, noreg, noreg, AS_RAW); // store the oop ++ __ add(to, to, UseCompressedOops ? 4 : 8); ++ __ sub(count, count, 1); ++ __ beqz(count, L_do_card_marks); + -+ { -+ Label L1, L2; -+ __ andi(t0, count, 4); -+ __ beqz(t0, L1); ++ // ======== loop entry is here ======== ++ __ BIND(L_load_element); ++ __ load_heap_oop(copied_oop, Address(from, 0), noreg, noreg, AS_RAW); // load the oop ++ __ add(from, from, UseCompressedOops ? 4 : 8); ++ __ beqz(copied_oop, L_store_element); + -+ __ ld(tmp_reg0, Address(s, 1 * unit)); -+ __ ld(tmp_reg1, Address(s, 2 * unit)); -+ __ ld(tmp_reg2, Address(s, 3 * unit)); -+ __ ld(tmp_reg3, Address(s, 4 * unit)); -+ __ addi(s, s, 4 * unit); ++ __ load_klass(r9_klass, copied_oop);// query the object klass ++ generate_type_check(r9_klass, ckoff, ckval, L_store_element); ++ // ======== end loop ======== + -+ __ sd(tmp_reg0, Address(d, 1 * unit)); -+ __ sd(tmp_reg1, Address(d, 2 * unit)); -+ __ sd(tmp_reg2, Address(d, 3 * unit)); -+ __ sd(tmp_reg3, Address(d, 4 * unit)); -+ __ addi(d, d, 4 * unit); ++ // It was a real error; we must depend on the caller to finish the job. ++ // Register count = remaining oops, count_orig = total oops. ++ // Emit GC store barriers for the oops we have copied and report ++ // their number to the caller. + -+ __ bind(L1); ++ __ sub(count, count_save, count); // K = partially copied oop count ++ __ xori(count, count, -1); // report (-1^K) to caller ++ __ beqz(count, L_done_pop); + -+ if (direction == copy_forwards) { -+ __ addi(s, s, bias); -+ __ addi(d, d, bias); -+ } ++ __ BIND(L_do_card_marks); ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, t0, wb_post_saved_regs); + -+ __ andi(t0, count, 2); -+ __ beqz(t0, L2); -+ if (direction == copy_backwards) { -+ __ addi(s, s, 2 * unit); -+ __ ld(tmp_reg0, Address(s)); -+ __ ld(tmp_reg1, Address(s, wordSize)); -+ __ addi(d, d, 2 * unit); -+ __ sd(tmp_reg0, Address(d)); -+ __ sd(tmp_reg1, Address(d, wordSize)); -+ } else { -+ __ ld(tmp_reg0, Address(s)); -+ __ ld(tmp_reg1, Address(s, wordSize)); -+ __ addi(s, s, 2 * unit); -+ __ sd(tmp_reg0, Address(d)); -+ __ sd(tmp_reg1, Address(d, wordSize)); -+ __ addi(d, d, 2 * unit); -+ } -+ __ bind(L2); -+ } ++ __ bind(L_done_pop); ++ __ pop_reg(RegSet::of(x7, x9, x18, x19), sp); ++ inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); + ++ __ bind(L_done); ++ __ mv(x10, count); ++ __ leave(); + __ ret(); -+ } -+ -+ Label copy_f, copy_b; -+ -+ // All-singing all-dancing memory copy. -+ // -+ // Copy count units of memory from s to d. The size of a unit is -+ // step, which can be positive or negative depending on the direction -+ // of copy. If is_aligned is false, we align the source address. -+ // -+ /* -+ * if (is_aligned) { -+ * goto copy_8_bytes; -+ * } -+ * bool is_backwards = step < 0; -+ * int granularity = uabs(step); -+ * count = count * granularity; * count bytes -+ * -+ * if (is_backwards) { -+ * s += count; -+ * d += count; -+ * } -+ * -+ * count limit maybe greater than 16, for better performance -+ * if (count < 16) { -+ * goto copy_small; -+ * } -+ * -+ * if ((dst % 8) == (src % 8)) { -+ * aligned; -+ * goto copy8; -+ * } -+ * -+ * copy_small: -+ * load element one by one; -+ * done; -+ */ -+ -+ typedef void (MacroAssembler::*copy_insn)(Register Rd, const Address &adr, Register temp); -+ -+ void copy_memory_v(Register s, Register d, Register count, Register tmp, int step) { -+ bool is_backward = step < 0; -+ int granularity = uabs(step); -+ -+ const Register src = x30, dst = x31, vl = x14, cnt = x15, tmp1 = x16, tmp2 = x17; -+ assert_different_registers(s, d, cnt, vl, tmp, tmp1, tmp2); -+ Assembler::SEW sew = Assembler::elembytes_to_sew(granularity); -+ Label loop_forward, loop_backward, done; -+ -+ __ mv(dst, d); -+ __ mv(src, s); -+ __ mv(cnt, count); -+ -+ __ bind(loop_forward); -+ __ vsetvli(vl, cnt, sew, Assembler::m8); -+ if (is_backward) { -+ __ bne(vl, cnt, loop_backward); -+ } -+ -+ __ vlex_v(v0, src, sew); -+ __ sub(cnt, cnt, vl); -+ __ slli(vl, vl, (int)sew); -+ __ add(src, src, vl); -+ -+ __ vsex_v(v0, dst, sew); -+ __ add(dst, dst, vl); -+ __ bnez(cnt, loop_forward); -+ -+ if (is_backward) { -+ __ j(done); + -+ __ bind(loop_backward); -+ __ sub(tmp, cnt, vl); -+ __ slli(tmp, tmp, sew); -+ __ add(tmp1, s, tmp); -+ __ vlex_v(v0, tmp1, sew); -+ __ add(tmp2, d, tmp); -+ __ vsex_v(v0, tmp2, sew); -+ __ sub(cnt, cnt, vl); -+ __ bnez(cnt, loop_forward); -+ __ bind(done); -+ } ++ return start; + } + -+ void copy_memory(bool is_aligned, Register s, Register d, -+ Register count, Register tmp, int step) { -+ if (UseRVV) { -+ return copy_memory_v(s, d, count, tmp, step); -+ } -+ -+ bool is_backwards = step < 0; -+ int granularity = uabs(step); -+ -+ const Register src = x30, dst = x31, cnt = x15, tmp3 = x16, tmp4 = x17; -+ -+ Label same_aligned; -+ Label copy8, copy_small, done; -+ -+ copy_insn ld_arr = NULL, st_arr = NULL; -+ switch (granularity) { -+ case 1 : -+ ld_arr = (copy_insn)&MacroAssembler::lbu; -+ st_arr = (copy_insn)&MacroAssembler::sb; -+ break; -+ case 2 : -+ ld_arr = (copy_insn)&MacroAssembler::lhu; -+ st_arr = (copy_insn)&MacroAssembler::sh; -+ break; -+ case 4 : -+ ld_arr = (copy_insn)&MacroAssembler::lwu; -+ st_arr = (copy_insn)&MacroAssembler::sw; -+ break; -+ case 8 : -+ ld_arr = (copy_insn)&MacroAssembler::ld; -+ st_arr = (copy_insn)&MacroAssembler::sd; -+ break; -+ default : -+ ShouldNotReachHere(); -+ } -+ -+ __ beqz(count, done); -+ __ slli(cnt, count, exact_log2(granularity)); -+ if (is_backwards) { -+ __ add(src, s, cnt); -+ __ add(dst, d, cnt); -+ } else { -+ __ mv(src, s); -+ __ mv(dst, d); -+ } -+ -+ if (is_aligned) { -+ __ addi(tmp, cnt, -8); -+ __ bgez(tmp, copy8); -+ __ j(copy_small); -+ } -+ -+ __ mv(tmp, 16); -+ __ blt(cnt, tmp, copy_small); -+ -+ __ xorr(tmp, src, dst); -+ __ andi(tmp, tmp, 0b111); -+ __ bnez(tmp, copy_small); -+ -+ __ bind(same_aligned); -+ __ andi(tmp, src, 0b111); -+ __ beqz(tmp, copy8); -+ if (is_backwards) { -+ __ addi(src, src, step); -+ __ addi(dst, dst, step); -+ } -+ (_masm->*ld_arr)(tmp3, Address(src), t0); -+ (_masm->*st_arr)(tmp3, Address(dst), t0); -+ if (!is_backwards) { -+ __ addi(src, src, step); -+ __ addi(dst, dst, step); -+ } -+ __ addi(cnt, cnt, -granularity); -+ __ beqz(cnt, done); -+ __ j(same_aligned); -+ -+ __ bind(copy8); -+ if (is_backwards) { -+ __ addi(src, src, -wordSize); -+ __ addi(dst, dst, -wordSize); -+ } -+ __ ld(tmp3, Address(src)); -+ __ sd(tmp3, Address(dst)); -+ if (!is_backwards) { -+ __ addi(src, src, wordSize); -+ __ addi(dst, dst, wordSize); -+ } -+ __ addi(cnt, cnt, -wordSize); -+ __ addi(tmp4, cnt, -8); -+ __ bgez(tmp4, copy8); // cnt >= 8, do next loop ++ // Perform range checks on the proposed arraycopy. ++ // Kills temp, but nothing else. ++ // Also, clean the sign bits of src_pos and dst_pos. ++ void arraycopy_range_checks(Register src, // source array oop (c_rarg0) ++ Register src_pos, // source position (c_rarg1) ++ Register dst, // destination array oo (c_rarg2) ++ Register dst_pos, // destination position (c_rarg3) ++ Register length, ++ Register temp, ++ Label& L_failed) { ++ BLOCK_COMMENT("arraycopy_range_checks:"); + -+ __ beqz(cnt, done); ++ assert_different_registers(t0, temp); + -+ __ bind(copy_small); -+ if (is_backwards) { -+ __ addi(src, src, step); -+ __ addi(dst, dst, step); -+ } -+ (_masm->*ld_arr)(tmp3, Address(src), t0); -+ (_masm->*st_arr)(tmp3, Address(dst), t0); -+ if (!is_backwards) { -+ __ addi(src, src, step); -+ __ addi(dst, dst, step); -+ } -+ __ addi(cnt, cnt, -granularity); -+ __ bgtz(cnt, copy_small); ++ // if [src_pos + length > arrayOop(src)->length()] then FAIL ++ __ lwu(t0, Address(src, arrayOopDesc::length_offset_in_bytes())); ++ __ addw(temp, length, src_pos); ++ __ bgtu(temp, t0, L_failed); + -+ __ bind(done); -+ } ++ // if [dst_pos + length > arrayOop(dst)->length()] then FAIL ++ __ lwu(t0, Address(dst, arrayOopDesc::length_offset_in_bytes())); ++ __ addw(temp, length, dst_pos); ++ __ bgtu(temp, t0, L_failed); + -+ // Scan over array at a for count oops, verifying each one. -+ // Preserves a and count, clobbers t0 and t1. -+ void verify_oop_array(size_t size, Register a, Register count, Register temp) { -+ Label loop, end; -+ __ mv(t1, zr); -+ __ slli(t0, count, exact_log2(size)); -+ __ bind(loop); -+ __ bgeu(t1, t0, end); ++ // Have to clean up high 32 bits of 'src_pos' and 'dst_pos'. ++ __ zero_extend(src_pos, src_pos, 32); ++ __ zero_extend(dst_pos, dst_pos, 32); + -+ __ add(temp, a, t1); -+ if (size == (size_t)wordSize) { -+ __ ld(temp, Address(temp, 0)); -+ __ verify_oop(temp); -+ } else { -+ __ lwu(temp, Address(temp, 0)); -+ __ decode_heap_oop(temp); // calls verify_oop -+ } -+ __ add(t1, t1, size); -+ __ j(loop); -+ __ bind(end); ++ BLOCK_COMMENT("arraycopy_range_checks done"); + } + -+ // Arguments: -+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary -+ // ignored -+ // is_oop - true => oop array, so generate store check code -+ // name - stub name string + // -+ // Inputs: -+ // c_rarg0 - source array address -+ // c_rarg1 - destination array address -+ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // Generate 'unsafe' array copy stub ++ // Though just as safe as the other stubs, it takes an unscaled ++ // size_t argument instead of an element count. + // -+ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let -+ // the hardware handle it. The two dwords within qwords that span -+ // cache line boundaries will still be loaded and stored atomicly. ++ // Input: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - byte count, treated as ssize_t, can be zero + // -+ // Side Effects: -+ // disjoint_int_copy_entry is set to the no-overlap entry point -+ // used by generate_conjoint_int_oop_copy(). ++ // Examines the alignment of the operands and dispatches ++ // to a long, int, short, or byte copy loop. + // -+ address generate_disjoint_copy(size_t size, bool aligned, bool is_oop, address* entry, -+ const char* name, bool dest_uninitialized = false) { ++ address generate_unsafe_copy(const char* name, ++ address byte_copy_entry, ++ address short_copy_entry, ++ address int_copy_entry, ++ address long_copy_entry) { ++ assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL && ++ int_copy_entry != NULL && long_copy_entry != NULL); ++ Label L_long_aligned, L_int_aligned, L_short_aligned; + const Register s = c_rarg0, d = c_rarg1, count = c_rarg2; -+ RegSet saved_reg = RegSet::of(s, d, count); ++ + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); -+ __ enter(); -+ -+ if (entry != NULL) { -+ *entry = __ pc(); -+ // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) -+ BLOCK_COMMENT("Entry:"); -+ } -+ -+ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; -+ if (dest_uninitialized) { -+ decorators |= IS_DEST_UNINITIALIZED; -+ } -+ if (aligned) { -+ decorators |= ARRAYCOPY_ALIGNED; -+ } -+ -+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_reg); ++ __ enter(); // required for proper stackwalking of RuntimeStub frame + -+ if (is_oop) { -+ // save regs before copy_memory -+ __ push_reg(RegSet::of(d, count), sp); -+ } ++ // bump this on entry, not on exit: ++ inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr); + -+ { -+ // UnsafeCopyMemory page error: continue after ucm -+ bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size); -+ UnsafeCopyMemoryMark ucmm(this, add_entry, true); -+ copy_memory(aligned, s, d, count, t0, size); -+ } ++ __ orr(t0, s, d); ++ __ orr(t0, t0, count); + -+ if (is_oop) { -+ __ pop_reg(RegSet::of(d, count), sp); -+ if (VerifyOops) { -+ verify_oop_array(size, d, count, t2); -+ } -+ } ++ __ andi(t0, t0, BytesPerLong - 1); ++ __ beqz(t0, L_long_aligned); ++ __ andi(t0, t0, BytesPerInt - 1); ++ __ beqz(t0, L_int_aligned); ++ __ andi(t0, t0, 1); ++ __ beqz(t0, L_short_aligned); ++ __ j(RuntimeAddress(byte_copy_entry)); + -+ bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet()); ++ __ BIND(L_short_aligned); ++ __ srli(count, count, LogBytesPerShort); // size => short_count ++ __ j(RuntimeAddress(short_copy_entry)); ++ __ BIND(L_int_aligned); ++ __ srli(count, count, LogBytesPerInt); // size => int_count ++ __ j(RuntimeAddress(int_copy_entry)); ++ __ BIND(L_long_aligned); ++ __ srli(count, count, LogBytesPerLong); // size => long_count ++ __ j(RuntimeAddress(long_copy_entry)); + -+ __ leave(); -+ __ mv(x10, zr); // return 0 -+ __ ret(); + return start; + } + -+ // Arguments: -+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary -+ // ignored -+ // is_oop - true => oop array, so generate store check code -+ // name - stub name string + // -+ // Inputs: -+ // c_rarg0 - source array address -+ // c_rarg1 - destination array address -+ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // Generate generic array copy stubs + // -+ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let -+ // the hardware handle it. The two dwords within qwords that span -+ // cache line boundaries will still be loaded and stored atomicly. ++ // Input: ++ // c_rarg0 - src oop ++ // c_rarg1 - src_pos (32-bits) ++ // c_rarg2 - dst oop ++ // c_rarg3 - dst_pos (32-bits) ++ // c_rarg4 - element count (32-bits) + // -+ address generate_conjoint_copy(size_t size, bool aligned, bool is_oop, address nooverlap_target, -+ address* entry, const char* name, -+ bool dest_uninitialized = false) { -+ const Register s = c_rarg0, d = c_rarg1, count = c_rarg2; -+ RegSet saved_regs = RegSet::of(s, d, count); -+ StubCodeMark mark(this, "StubRoutines", name); -+ address start = __ pc(); -+ __ enter(); ++ // Output: ++ // x10 == 0 - success ++ // x10 == -1^K - failure, where K is partial transfer count ++ // ++ address generate_generic_copy(const char* name, ++ address byte_copy_entry, address short_copy_entry, ++ address int_copy_entry, address oop_copy_entry, ++ address long_copy_entry, address checkcast_copy_entry) { ++ assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL && ++ int_copy_entry != NULL && oop_copy_entry != NULL && ++ long_copy_entry != NULL && checkcast_copy_entry != NULL); ++ Label L_failed, L_failed_0, L_objArray; ++ Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs; + -+ if (entry != NULL) { -+ *entry = __ pc(); -+ // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) -+ BLOCK_COMMENT("Entry:"); -+ } ++ // Input registers ++ const Register src = c_rarg0; // source array oop ++ const Register src_pos = c_rarg1; // source position ++ const Register dst = c_rarg2; // destination array oop ++ const Register dst_pos = c_rarg3; // destination position ++ const Register length = c_rarg4; + -+ // use fwd copy when (d-s) above_equal (count*size) -+ __ sub(t0, d, s); -+ __ slli(t1, count, exact_log2(size)); -+ __ bgeu(t0, t1, nooverlap_target); ++ // Registers used as temps ++ const Register dst_klass = c_rarg5; + -+ DecoratorSet decorators = IN_HEAP | IS_ARRAY; -+ if (dest_uninitialized) { -+ decorators |= IS_DEST_UNINITIALIZED; -+ } -+ if (aligned) { -+ decorators |= ARRAYCOPY_ALIGNED; -+ } ++ __ align(CodeEntryAlignment); + -+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_regs); ++ StubCodeMark mark(this, "StubRoutines", name); + -+ if (is_oop) { -+ // save regs before copy_memory -+ __ push_reg(RegSet::of(d, count), sp); -+ } -+ -+ { -+ // UnsafeCopyMemory page error: continue after ucm -+ bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size); -+ UnsafeCopyMemoryMark ucmm(this, add_entry, true); -+ copy_memory(aligned, s, d, count, t0, -size); -+ } -+ -+ if (is_oop) { -+ __ pop_reg(RegSet::of(d, count), sp); -+ if (VerifyOops) { -+ verify_oop_array(size, d, count, t2); -+ } -+ } -+ bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet()); -+ __ leave(); -+ __ mv(x10, zr); // return 0 -+ __ ret(); -+ return start; -+ } ++ address start = __ pc(); + -+ // Arguments: -+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary -+ // ignored -+ // name - stub name string -+ // -+ // Inputs: -+ // c_rarg0 - source array address -+ // c_rarg1 - destination array address -+ // c_rarg2 - element count, treated as ssize_t, can be zero -+ // -+ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, -+ // we let the hardware handle it. The one to eight bytes within words, -+ // dwords or qwords that span cache line boundaries will still be loaded -+ // and stored atomically. -+ // -+ // Side Effects: -+ // disjoint_byte_copy_entry is set to the no-overlap entry point // -+ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, -+ // we let the hardware handle it. The one to eight bytes within words, -+ // dwords or qwords that span cache line boundaries will still be loaded -+ // and stored atomically. -+ // -+ // Side Effects: -+ // disjoint_byte_copy_entry is set to the no-overlap entry point -+ // used by generate_conjoint_byte_copy(). -+ // -+ address generate_disjoint_byte_copy(bool aligned, address* entry, const char* name) { -+ const bool not_oop = false; -+ return generate_disjoint_copy(sizeof (jbyte), aligned, not_oop, entry, name); -+ } ++ __ enter(); // required for proper stackwalking of RuntimeStub frame + -+ // Arguments: -+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary -+ // ignored -+ // name - stub name string -+ // -+ // Inputs: -+ // c_rarg0 - source array address -+ // c_rarg1 - destination array address -+ // c_rarg2 - element count, treated as ssize_t, can be zero -+ // -+ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, -+ // we let the hardware handle it. The one to eight bytes within words, -+ // dwords or qwords that span cache line boundaries will still be loaded -+ // and stored atomically. -+ // -+ address generate_conjoint_byte_copy(bool aligned, address nooverlap_target, -+ address* entry, const char* name) { -+ const bool not_oop = false; -+ return generate_conjoint_copy(sizeof (jbyte), aligned, not_oop, nooverlap_target, entry, name); -+ } ++ // bump this on entry, not on exit: ++ inc_counter_np(SharedRuntime::_generic_array_copy_ctr); + -+ // Arguments: -+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary -+ // ignored -+ // name - stub name string -+ // -+ // Inputs: -+ // c_rarg0 - source array address -+ // c_rarg1 - destination array address -+ // c_rarg2 - element count, treated as ssize_t, can be zero -+ // -+ // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we -+ // let the hardware handle it. The two or four words within dwords -+ // or qwords that span cache line boundaries will still be loaded -+ // and stored atomically. -+ // -+ // Side Effects: -+ // disjoint_short_copy_entry is set to the no-overlap entry point -+ // used by generate_conjoint_short_copy(). -+ // -+ address generate_disjoint_short_copy(bool aligned, -+ address* entry, const char* name) { -+ const bool not_oop = false; -+ return generate_disjoint_copy(sizeof (jshort), aligned, not_oop, entry, name); -+ } ++ //----------------------------------------------------------------------- ++ // Assembler stub will be used for this call to arraycopy ++ // if the following conditions are met: ++ // ++ // (1) src and dst must not be null. ++ // (2) src_pos must not be negative. ++ // (3) dst_pos must not be negative. ++ // (4) length must not be negative. ++ // (5) src klass and dst klass should be the same and not NULL. ++ // (6) src and dst should be arrays. ++ // (7) src_pos + length must not exceed length of src. ++ // (8) dst_pos + length must not exceed length of dst. ++ // + -+ // Arguments: -+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary -+ // ignored -+ // name - stub name string -+ // -+ // Inputs: -+ // c_rarg0 - source array address -+ // c_rarg1 - destination array address -+ // c_rarg2 - element count, treated as ssize_t, can be zero -+ // -+ // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we -+ // let the hardware handle it. The two or four words within dwords -+ // or qwords that span cache line boundaries will still be loaded -+ // and stored atomically. -+ // -+ address generate_conjoint_short_copy(bool aligned, address nooverlap_target, -+ address* entry, const char* name) { -+ const bool not_oop = false; -+ return generate_conjoint_copy(sizeof (jshort), aligned, not_oop, nooverlap_target, entry, name); -+ } ++ // if [src == NULL] then return -1 ++ __ beqz(src, L_failed); + -+ // Arguments: -+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary -+ // ignored -+ // name - stub name string -+ // -+ // Inputs: -+ // c_rarg0 - source array address -+ // c_rarg1 - destination array address -+ // c_rarg2 - element count, treated as ssize_t, can be zero -+ // -+ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let -+ // the hardware handle it. The two dwords within qwords that span -+ // cache line boundaries will still be loaded and stored atomicly. -+ // -+ // Side Effects: -+ // disjoint_int_copy_entry is set to the no-overlap entry point -+ // used by generate_conjoint_int_oop_copy(). -+ // -+ address generate_disjoint_int_copy(bool aligned, address* entry, -+ const char* name, bool dest_uninitialized = false) { -+ const bool not_oop = false; -+ return generate_disjoint_copy(sizeof (jint), aligned, not_oop, entry, name); -+ } ++ // if [src_pos < 0] then return -1 ++ // i.e. sign bit set ++ __ andi(t0, src_pos, 1UL << 31); ++ __ bnez(t0, L_failed); + -+ // Arguments: -+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary -+ // ignored -+ // name - stub name string -+ // -+ // Inputs: -+ // c_rarg0 - source array address -+ // c_rarg1 - destination array address -+ // c_rarg2 - element count, treated as ssize_t, can be zero -+ // -+ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let -+ // the hardware handle it. The two dwords within qwords that span -+ // cache line boundaries will still be loaded and stored atomicly. -+ // -+ address generate_conjoint_int_copy(bool aligned, address nooverlap_target, -+ address* entry, const char* name, -+ bool dest_uninitialized = false) { -+ const bool not_oop = false; -+ return generate_conjoint_copy(sizeof (jint), aligned, not_oop, nooverlap_target, entry, name); -+ } ++ // if [dst == NULL] then return -1 ++ __ beqz(dst, L_failed); + ++ // if [dst_pos < 0] then return -1 ++ // i.e. sign bit set ++ __ andi(t0, dst_pos, 1UL << 31); ++ __ bnez(t0, L_failed); + -+ // Arguments: -+ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes -+ // ignored -+ // name - stub name string -+ // -+ // Inputs: -+ // c_rarg0 - source array address -+ // c_rarg1 - destination array address -+ // c_rarg2 - element count, treated as size_t, can be zero -+ // -+ // Side Effects: -+ // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the -+ // no-overlap entry point used by generate_conjoint_long_oop_copy(). -+ // -+ address generate_disjoint_long_copy(bool aligned, address* entry, -+ const char* name, bool dest_uninitialized = false) { -+ const bool not_oop = false; -+ return generate_disjoint_copy(sizeof (jlong), aligned, not_oop, entry, name); -+ } ++ // registers used as temp ++ const Register scratch_length = x28; // elements count to copy ++ const Register scratch_src_klass = x29; // array klass ++ const Register lh = x30; // layout helper + -+ // Arguments: -+ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes -+ // ignored -+ // name - stub name string -+ // -+ // Inputs: -+ // c_rarg0 - source array address -+ // c_rarg1 - destination array address -+ // c_rarg2 - element count, treated as size_t, can be zero -+ // -+ address generate_conjoint_long_copy(bool aligned, -+ address nooverlap_target, address* entry, -+ const char* name, bool dest_uninitialized = false) { -+ const bool not_oop = false; -+ return generate_conjoint_copy(sizeof (jlong), aligned, not_oop, nooverlap_target, entry, name); -+ } ++ // if [length < 0] then return -1 ++ __ addw(scratch_length, length, zr); // length (elements count, 32-bits value) ++ // i.e. sign bit set ++ __ andi(t0, scratch_length, 1UL << 31); ++ __ bnez(t0, L_failed); + -+ // Arguments: -+ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes -+ // ignored -+ // name - stub name string -+ // -+ // Inputs: -+ // c_rarg0 - source array address -+ // c_rarg1 - destination array address -+ // c_rarg2 - element count, treated as size_t, can be zero -+ // -+ // Side Effects: -+ // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the -+ // no-overlap entry point used by generate_conjoint_long_oop_copy(). -+ // -+ address generate_disjoint_oop_copy(bool aligned, address* entry, -+ const char* name, bool dest_uninitialized) { -+ const bool is_oop = true; -+ const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); -+ return generate_disjoint_copy(size, aligned, is_oop, entry, name, dest_uninitialized); -+ } ++ __ load_klass(scratch_src_klass, src); ++#ifdef ASSERT ++ { ++ BLOCK_COMMENT("assert klasses not null {"); ++ Label L1, L2; ++ __ bnez(scratch_src_klass, L2); // it is broken if klass is NULL ++ __ bind(L1); ++ __ stop("broken null klass"); ++ __ bind(L2); ++ __ load_klass(t0, dst); ++ __ beqz(t0, L1); // this would be broken also ++ BLOCK_COMMENT("} assert klasses not null done"); ++ } ++#endif + -+ // Arguments: -+ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes -+ // ignored -+ // name - stub name string -+ // -+ // Inputs: -+ // c_rarg0 - source array address -+ // c_rarg1 - destination array address -+ // c_rarg2 - element count, treated as size_t, can be zero -+ // -+ address generate_conjoint_oop_copy(bool aligned, -+ address nooverlap_target, address* entry, -+ const char* name, bool dest_uninitialized) { -+ const bool is_oop = true; -+ const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); -+ return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry, -+ name, dest_uninitialized); -+ } ++ // Load layout helper (32-bits) ++ // ++ // |array_tag| | header_size | element_type | |log2_element_size| ++ // 32 30 24 16 8 2 0 ++ // ++ // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 ++ // + -+ // Helper for generating a dynamic type check. -+ // Smashes t0, t1. -+ void generate_type_check(Register sub_klass, -+ Register super_check_offset, -+ Register super_klass, -+ Label& L_success) { -+ assert_different_registers(sub_klass, super_check_offset, super_klass); ++ const int lh_offset = in_bytes(Klass::layout_helper_offset()); + -+ BLOCK_COMMENT("type_check:"); ++ // Handle objArrays completely differently... ++ const jint objArray_lh = Klass::array_layout_helper(T_OBJECT); ++ __ lw(lh, Address(scratch_src_klass, lh_offset)); ++ __ mvw(t0, objArray_lh); ++ __ beq(lh, t0, L_objArray); + -+ Label L_miss; ++ // if [src->klass() != dst->klass()] then return -1 ++ __ load_klass(t1, dst); ++ __ bne(t1, scratch_src_klass, L_failed); + -+ __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, NULL, super_check_offset); -+ __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL); ++ // if [src->is_Array() != NULL] then return -1 ++ // i.e. (lh >= 0) ++ __ andi(t0, lh, 1UL << 31); ++ __ beqz(t0, L_failed); + -+ // Fall through on failure! -+ __ BIND(L_miss); -+ } ++ // At this point, it is known to be a typeArray (array_tag 0x3). ++#ifdef ASSERT ++ { ++ BLOCK_COMMENT("assert primitive array {"); ++ Label L; ++ __ mvw(t1, Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift); ++ __ bge(lh, t1, L); ++ __ stop("must be a primitive array"); ++ __ bind(L); ++ BLOCK_COMMENT("} assert primitive array done"); ++ } ++#endif + -+ // -+ // Generate checkcasting array copy stub -+ // -+ // Input: -+ // c_rarg0 - source array address -+ // c_rarg1 - destination array address -+ // c_rarg2 - element count, treated as ssize_t, can be zero -+ // c_rarg3 - size_t ckoff (super_check_offset) -+ // c_rarg4 - oop ckval (super_klass) -+ // -+ // Output: -+ // x10 == 0 - success -+ // x10 == -1^K - failure, where K is partial transfer count -+ // -+ address generate_checkcast_copy(const char* name, address* entry, -+ bool dest_uninitialized = false) { -+ Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop; ++ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, ++ t1, L_failed); + -+ // Input registers (after setup_arg_regs) -+ const Register from = c_rarg0; // source array address -+ const Register to = c_rarg1; // destination array address -+ const Register count = c_rarg2; // elementscount -+ const Register ckoff = c_rarg3; // super_check_offset -+ const Register ckval = c_rarg4; // super_klass ++ // TypeArrayKlass ++ // ++ // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize) ++ // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize) ++ // + -+ RegSet wb_pre_saved_regs = RegSet::range(c_rarg0, c_rarg4); -+ RegSet wb_post_saved_regs = RegSet::of(count); ++ const Register t0_offset = t0; // array offset ++ const Register x22_elsize = lh; // element size + -+ // Registers used as temps (x7, x9, x18 are save-on-entry) -+ const Register count_save = x19; // orig elementscount -+ const Register start_to = x18; // destination array start address -+ const Register copied_oop = x7; // actual oop copied -+ const Register r9_klass = x9; // oop._klass ++ // Get array_header_in_bytes() ++ int lh_header_size_width = exact_log2(Klass::_lh_header_size_mask + 1); ++ int lh_header_size_msb = Klass::_lh_header_size_shift + lh_header_size_width; ++ __ slli(t0_offset, lh, XLEN - lh_header_size_msb); // left shift to remove 24 ~ 32; ++ __ srli(t0_offset, t0_offset, XLEN - lh_header_size_width); // array_offset + -+ //--------------------------------------------------------------- -+ // Assembler stub will be used for this call to arraycopy -+ // if the two arrays are subtypes of Object[] but the -+ // destination array type is not equal to or a supertype -+ // of the source type. Each element must be separately -+ // checked. ++ __ add(src, src, t0_offset); // src array offset ++ __ add(dst, dst, t0_offset); // dst array offset ++ BLOCK_COMMENT("choose copy loop based on element size"); + -+ assert_different_registers(from, to, count, ckoff, ckval, start_to, -+ copied_oop, r9_klass, count_save); ++ // next registers should be set before the jump to corresponding stub ++ const Register from = c_rarg0; // source array address ++ const Register to = c_rarg1; // destination array address ++ const Register count = c_rarg2; // elements count + -+ __ align(CodeEntryAlignment); -+ StubCodeMark mark(this, "StubRoutines", name); -+ address start = __ pc(); ++ // 'from', 'to', 'count' registers should be set in such order ++ // since they are the same as 'src', 'src_pos', 'dst'. + -+ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ assert(Klass::_lh_log2_element_size_shift == 0, "fix this code"); + -+ // Caller of this entry point must set up the argument registers. -+ if (entry != NULL) { -+ *entry = __ pc(); -+ BLOCK_COMMENT("Entry:"); -+ } ++ // The possible values of elsize are 0-3, i.e. exact_log2(element ++ // size in bytes). We do a simple bitwise binary search. ++ __ BIND(L_copy_bytes); ++ __ andi(t0, x22_elsize, 2); ++ __ bnez(t0, L_copy_ints); ++ __ andi(t0, x22_elsize, 1); ++ __ bnez(t0, L_copy_shorts); ++ __ add(from, src, src_pos); // src_addr ++ __ add(to, dst, dst_pos); // dst_addr ++ __ addw(count, scratch_length, zr); // length ++ __ j(RuntimeAddress(byte_copy_entry)); + -+ // Empty array: Nothing to do -+ __ beqz(count, L_done); ++ __ BIND(L_copy_shorts); ++ __ shadd(from, src_pos, src, t0, 1); // src_addr ++ __ shadd(to, dst_pos, dst, t0, 1); // dst_addr ++ __ addw(count, scratch_length, zr); // length ++ __ j(RuntimeAddress(short_copy_entry)); + -+ __ push_reg(RegSet::of(x7, x9, x18, x19), sp); ++ __ BIND(L_copy_ints); ++ __ andi(t0, x22_elsize, 1); ++ __ bnez(t0, L_copy_longs); ++ __ shadd(from, src_pos, src, t0, 2); // src_addr ++ __ shadd(to, dst_pos, dst, t0, 2); // dst_addr ++ __ addw(count, scratch_length, zr); // length ++ __ j(RuntimeAddress(int_copy_entry)); + ++ __ BIND(L_copy_longs); +#ifdef ASSERT -+ BLOCK_COMMENT("assert consistent ckoff/ckval"); -+ // The ckoff and ckval must be mutually consistent, -+ // even though caller generates both. -+ { Label L; -+ int sco_offset = in_bytes(Klass::super_check_offset_offset()); -+ __ lwu(start_to, Address(ckval, sco_offset)); -+ __ beq(ckoff, start_to, L); -+ __ stop("super_check_offset inconsistent"); ++ { ++ BLOCK_COMMENT("assert long copy {"); ++ Label L; ++ __ andi(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> x22_elsize ++ __ addw(lh, lh, zr); ++ __ mvw(t0, LogBytesPerLong); ++ __ beq(x22_elsize, t0, L); ++ __ stop("must be long copy, but elsize is wrong"); + __ bind(L); ++ BLOCK_COMMENT("} assert long copy done"); + } -+#endif //ASSERT -+ -+ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT; -+ bool is_oop = true; -+ if (dest_uninitialized) { -+ decorators |= IS_DEST_UNINITIALIZED; -+ } ++#endif ++ __ shadd(from, src_pos, src, t0, 3); // src_addr ++ __ shadd(to, dst_pos, dst, t0, 3); // dst_addr ++ __ addw(count, scratch_length, zr); // length ++ __ j(RuntimeAddress(long_copy_entry)); + -+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ bs->arraycopy_prologue(_masm, decorators, is_oop, from, to, count, wb_pre_saved_regs); ++ // ObjArrayKlass ++ __ BIND(L_objArray); ++ // live at this point: scratch_src_klass, scratch_length, src[_pos], dst[_pos] + -+ // save the original count -+ __ mv(count_save, count); ++ Label L_plain_copy, L_checkcast_copy; ++ // test array classes for subtyping ++ __ load_klass(t2, dst); ++ __ bne(scratch_src_klass, t2, L_checkcast_copy); // usual case is exact equality + -+ // Copy from low to high addresses -+ __ mv(start_to, to); // Save destination array start address -+ __ j(L_load_element); ++ // Identically typed arrays can be copied without element-wise checks. ++ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, ++ t1, L_failed); + -+ // ======== begin loop ======== -+ // (Loop is rotated; its entry is L_load_element.) -+ // Loop control: -+ // for count to 0 do -+ // copied_oop = load_heap_oop(from++) -+ // ... generate_type_check ... -+ // store_heap_oop(to++, copied_oop) -+ // end ++ __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop); ++ __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop); ++ __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ __ addw(count, scratch_length, zr); // length ++ __ BIND(L_plain_copy); ++ __ j(RuntimeAddress(oop_copy_entry)); + -+ __ align(OptoLoopAlignment); ++ __ BIND(L_checkcast_copy); ++ // live at this point: scratch_src_klass, scratch_length, t2 (dst_klass) ++ { ++ // Before looking at dst.length, make sure dst is also an objArray. ++ __ lwu(t0, Address(t2, lh_offset)); ++ __ mvw(t1, objArray_lh); ++ __ bne(t0, t1, L_failed); + -+ __ BIND(L_store_element); -+ __ store_heap_oop(Address(to, 0), copied_oop, noreg, noreg, AS_RAW); // store the oop -+ __ add(to, to, UseCompressedOops ? 4 : 8); -+ __ sub(count, count, 1); -+ __ beqz(count, L_do_card_marks); ++ // It is safe to examine both src.length and dst.length. ++ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, ++ t2, L_failed); + -+ // ======== loop entry is here ======== -+ __ BIND(L_load_element); -+ __ load_heap_oop(copied_oop, Address(from, 0), noreg, noreg, AS_RAW); // load the oop -+ __ add(from, from, UseCompressedOops ? 4 : 8); -+ __ beqz(copied_oop, L_store_element); -+ -+ __ load_klass(r9_klass, copied_oop);// query the object klass -+ generate_type_check(r9_klass, ckoff, ckval, L_store_element); -+ // ======== end loop ======== -+ -+ // It was a real error; we must depend on the caller to finish the job. -+ // Register count = remaining oops, count_orig = total oops. -+ // Emit GC store barriers for the oops we have copied and report -+ // their number to the caller. -+ -+ __ sub(count, count_save, count); // K = partially copied oop count -+ __ xori(count, count, -1); // report (-1^K) to caller -+ __ beqz(count, L_done_pop); -+ -+ __ BIND(L_do_card_marks); -+ bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, t0, wb_post_saved_regs); -+ -+ __ bind(L_done_pop); -+ __ pop_reg(RegSet::of(x7, x9, x18, x19), sp); -+ inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); -+ -+ __ bind(L_done); -+ __ mv(x10, count); -+ __ leave(); -+ __ ret(); -+ -+ return start; -+ } -+ -+ // Perform range checks on the proposed arraycopy. -+ // Kills temp, but nothing else. -+ // Also, clean the sign bits of src_pos and dst_pos. -+ void arraycopy_range_checks(Register src, // source array oop (c_rarg0) -+ Register src_pos, // source position (c_rarg1) -+ Register dst, // destination array oo (c_rarg2) -+ Register dst_pos, // destination position (c_rarg3) -+ Register length, -+ Register temp, -+ Label& L_failed) { -+ BLOCK_COMMENT("arraycopy_range_checks:"); -+ -+ assert_different_registers(t0, temp); -+ -+ // if [src_pos + length > arrayOop(src)->length()] then FAIL -+ __ lwu(t0, Address(src, arrayOopDesc::length_offset_in_bytes())); -+ __ addw(temp, length, src_pos); -+ __ bgtu(temp, t0, L_failed); -+ -+ // if [dst_pos + length > arrayOop(dst)->length()] then FAIL -+ __ lwu(t0, Address(dst, arrayOopDesc::length_offset_in_bytes())); -+ __ addw(temp, length, dst_pos); -+ __ bgtu(temp, t0, L_failed); -+ -+ // Have to clean up high 32 bits of 'src_pos' and 'dst_pos'. -+ __ zero_extend(src_pos, src_pos, 32); -+ __ zero_extend(dst_pos, dst_pos, 32); -+ -+ BLOCK_COMMENT("arraycopy_range_checks done"); -+ } -+ -+ // -+ // Generate 'unsafe' array copy stub -+ // Though just as safe as the other stubs, it takes an unscaled -+ // size_t argument instead of an element count. -+ // -+ // Input: -+ // c_rarg0 - source array address -+ // c_rarg1 - destination array address -+ // c_rarg2 - byte count, treated as ssize_t, can be zero -+ // -+ // Examines the alignment of the operands and dispatches -+ // to a long, int, short, or byte copy loop. -+ // -+ address generate_unsafe_copy(const char* name, -+ address byte_copy_entry, -+ address short_copy_entry, -+ address int_copy_entry, -+ address long_copy_entry) { -+ assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL && -+ int_copy_entry != NULL && long_copy_entry != NULL); -+ Label L_long_aligned, L_int_aligned, L_short_aligned; -+ const Register s = c_rarg0, d = c_rarg1, count = c_rarg2; -+ -+ __ align(CodeEntryAlignment); -+ StubCodeMark mark(this, "StubRoutines", name); -+ address start = __ pc(); -+ __ enter(); // required for proper stackwalking of RuntimeStub frame -+ -+ // bump this on entry, not on exit: -+ inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr); -+ -+ __ orr(t0, s, d); -+ __ orr(t0, t0, count); -+ -+ __ andi(t0, t0, BytesPerLong - 1); -+ __ beqz(t0, L_long_aligned); -+ __ andi(t0, t0, BytesPerInt - 1); -+ __ beqz(t0, L_int_aligned); -+ __ andi(t0, t0, 1); -+ __ beqz(t0, L_short_aligned); -+ __ j(RuntimeAddress(byte_copy_entry)); -+ -+ __ BIND(L_short_aligned); -+ __ srli(count, count, LogBytesPerShort); // size => short_count -+ __ j(RuntimeAddress(short_copy_entry)); -+ __ BIND(L_int_aligned); -+ __ srli(count, count, LogBytesPerInt); // size => int_count -+ __ j(RuntimeAddress(int_copy_entry)); -+ __ BIND(L_long_aligned); -+ __ srli(count, count, LogBytesPerLong); // size => long_count -+ __ j(RuntimeAddress(long_copy_entry)); -+ -+ return start; -+ } -+ -+ // -+ // Generate generic array copy stubs -+ // -+ // Input: -+ // c_rarg0 - src oop -+ // c_rarg1 - src_pos (32-bits) -+ // c_rarg2 - dst oop -+ // c_rarg3 - dst_pos (32-bits) -+ // c_rarg4 - element count (32-bits) -+ // -+ // Output: -+ // x10 == 0 - success -+ // x10 == -1^K - failure, where K is partial transfer count -+ // -+ address generate_generic_copy(const char* name, -+ address byte_copy_entry, address short_copy_entry, -+ address int_copy_entry, address oop_copy_entry, -+ address long_copy_entry, address checkcast_copy_entry) { -+ assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL && -+ int_copy_entry != NULL && oop_copy_entry != NULL && -+ long_copy_entry != NULL && checkcast_copy_entry != NULL); -+ Label L_failed, L_failed_0, L_objArray; -+ Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs; -+ -+ // Input registers -+ const Register src = c_rarg0; // source array oop -+ const Register src_pos = c_rarg1; // source position -+ const Register dst = c_rarg2; // destination array oop -+ const Register dst_pos = c_rarg3; // destination position -+ const Register length = c_rarg4; -+ -+ // Registers used as temps -+ const Register dst_klass = c_rarg5; -+ -+ __ align(CodeEntryAlignment); -+ -+ StubCodeMark mark(this, "StubRoutines", name); -+ -+ address start = __ pc(); -+ -+ __ enter(); // required for proper stackwalking of RuntimeStub frame -+ -+ // bump this on entry, not on exit: -+ inc_counter_np(SharedRuntime::_generic_array_copy_ctr); -+ -+ //----------------------------------------------------------------------- -+ // Assembler stub will be used for this call to arraycopy -+ // if the following conditions are met: -+ // -+ // (1) src and dst must not be null. -+ // (2) src_pos must not be negative. -+ // (3) dst_pos must not be negative. -+ // (4) length must not be negative. -+ // (5) src klass and dst klass should be the same and not NULL. -+ // (6) src and dst should be arrays. -+ // (7) src_pos + length must not exceed length of src. -+ // (8) dst_pos + length must not exceed length of dst. -+ // -+ -+ // if [src == NULL] then return -1 -+ __ beqz(src, L_failed); -+ -+ // if [src_pos < 0] then return -1 -+ // i.e. sign bit set -+ __ andi(t0, src_pos, 1UL << 31); -+ __ bnez(t0, L_failed); -+ -+ // if [dst == NULL] then return -1 -+ __ beqz(dst, L_failed); -+ -+ // if [dst_pos < 0] then return -1 -+ // i.e. sign bit set -+ __ andi(t0, dst_pos, 1UL << 31); -+ __ bnez(t0, L_failed); -+ -+ // registers used as temp -+ const Register scratch_length = x28; // elements count to copy -+ const Register scratch_src_klass = x29; // array klass -+ const Register lh = x30; // layout helper -+ -+ // if [length < 0] then return -1 -+ __ addw(scratch_length, length, zr); // length (elements count, 32-bits value) -+ // i.e. sign bit set -+ __ andi(t0, scratch_length, 1UL << 31); -+ __ bnez(t0, L_failed); -+ -+ __ load_klass(scratch_src_klass, src); -+#ifdef ASSERT -+ { -+ BLOCK_COMMENT("assert klasses not null {"); -+ Label L1, L2; -+ __ bnez(scratch_src_klass, L2); // it is broken if klass is NULL -+ __ bind(L1); -+ __ stop("broken null klass"); -+ __ bind(L2); -+ __ load_klass(t0, dst); -+ __ beqz(t0, L1); // this would be broken also -+ BLOCK_COMMENT("} assert klasses not null done"); -+ } -+#endif -+ -+ // Load layout helper (32-bits) -+ // -+ // |array_tag| | header_size | element_type | |log2_element_size| -+ // 32 30 24 16 8 2 0 -+ // -+ // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 -+ // -+ -+ const int lh_offset = in_bytes(Klass::layout_helper_offset()); -+ -+ // Handle objArrays completely differently... -+ const jint objArray_lh = Klass::array_layout_helper(T_OBJECT); -+ __ lw(lh, Address(scratch_src_klass, lh_offset)); -+ __ mvw(t0, objArray_lh); -+ __ beq(lh, t0, L_objArray); -+ -+ // if [src->klass() != dst->klass()] then return -1 -+ __ load_klass(t1, dst); -+ __ bne(t1, scratch_src_klass, L_failed); -+ -+ // if [src->is_Array() != NULL] then return -1 -+ // i.e. (lh >= 0) -+ __ andi(t0, lh, 1UL << 31); -+ __ beqz(t0, L_failed); -+ -+ // At this point, it is known to be a typeArray (array_tag 0x3). -+#ifdef ASSERT -+ { -+ BLOCK_COMMENT("assert primitive array {"); -+ Label L; -+ __ mvw(t1, Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift); -+ __ bge(lh, t1, L); -+ __ stop("must be a primitive array"); -+ __ bind(L); -+ BLOCK_COMMENT("} assert primitive array done"); -+ } -+#endif -+ -+ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, -+ t1, L_failed); -+ -+ // TypeArrayKlass -+ // -+ // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize) -+ // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize) -+ // -+ -+ const Register t0_offset = t0; // array offset -+ const Register x22_elsize = lh; // element size -+ -+ // Get array_header_in_bytes() -+ int lh_header_size_width = exact_log2(Klass::_lh_header_size_mask + 1); -+ int lh_header_size_msb = Klass::_lh_header_size_shift + lh_header_size_width; -+ __ slli(t0_offset, lh, XLEN - lh_header_size_msb); // left shift to remove 24 ~ 32; -+ __ srli(t0_offset, t0_offset, XLEN - lh_header_size_width); // array_offset -+ -+ __ add(src, src, t0_offset); // src array offset -+ __ add(dst, dst, t0_offset); // dst array offset -+ BLOCK_COMMENT("choose copy loop based on element size"); -+ -+ // next registers should be set before the jump to corresponding stub -+ const Register from = c_rarg0; // source array address -+ const Register to = c_rarg1; // destination array address -+ const Register count = c_rarg2; // elements count -+ -+ // 'from', 'to', 'count' registers should be set in such order -+ // since they are the same as 'src', 'src_pos', 'dst'. -+ -+ assert(Klass::_lh_log2_element_size_shift == 0, "fix this code"); -+ -+ // The possible values of elsize are 0-3, i.e. exact_log2(element -+ // size in bytes). We do a simple bitwise binary search. -+ __ BIND(L_copy_bytes); -+ __ andi(t0, x22_elsize, 2); -+ __ bnez(t0, L_copy_ints); -+ __ andi(t0, x22_elsize, 1); -+ __ bnez(t0, L_copy_shorts); -+ __ add(from, src, src_pos); // src_addr -+ __ add(to, dst, dst_pos); // dst_addr -+ __ addw(count, scratch_length, zr); // length -+ __ j(RuntimeAddress(byte_copy_entry)); -+ -+ __ BIND(L_copy_shorts); -+ __ shadd(from, src_pos, src, t0, 1); // src_addr -+ __ shadd(to, dst_pos, dst, t0, 1); // dst_addr -+ __ addw(count, scratch_length, zr); // length -+ __ j(RuntimeAddress(short_copy_entry)); -+ -+ __ BIND(L_copy_ints); -+ __ andi(t0, x22_elsize, 1); -+ __ bnez(t0, L_copy_longs); -+ __ shadd(from, src_pos, src, t0, 2); // src_addr -+ __ shadd(to, dst_pos, dst, t0, 2); // dst_addr -+ __ addw(count, scratch_length, zr); // length -+ __ j(RuntimeAddress(int_copy_entry)); -+ -+ __ BIND(L_copy_longs); -+#ifdef ASSERT -+ { -+ BLOCK_COMMENT("assert long copy {"); -+ Label L; -+ __ andi(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> x22_elsize -+ __ addw(lh, lh, zr); -+ __ mvw(t0, LogBytesPerLong); -+ __ beq(x22_elsize, t0, L); -+ __ stop("must be long copy, but elsize is wrong"); -+ __ bind(L); -+ BLOCK_COMMENT("} assert long copy done"); -+ } -+#endif -+ __ shadd(from, src_pos, src, t0, 3); // src_addr -+ __ shadd(to, dst_pos, dst, t0, 3); // dst_addr -+ __ addw(count, scratch_length, zr); // length -+ __ j(RuntimeAddress(long_copy_entry)); -+ -+ // ObjArrayKlass -+ __ BIND(L_objArray); -+ // live at this point: scratch_src_klass, scratch_length, src[_pos], dst[_pos] -+ -+ Label L_plain_copy, L_checkcast_copy; -+ // test array classes for subtyping -+ __ load_klass(t2, dst); -+ __ bne(scratch_src_klass, t2, L_checkcast_copy); // usual case is exact equality -+ -+ // Identically typed arrays can be copied without element-wise checks. -+ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, -+ t1, L_failed); -+ -+ __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop); -+ __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); -+ __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop); -+ __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); -+ __ addw(count, scratch_length, zr); // length -+ __ BIND(L_plain_copy); -+ __ j(RuntimeAddress(oop_copy_entry)); -+ -+ __ BIND(L_checkcast_copy); -+ // live at this point: scratch_src_klass, scratch_length, t2 (dst_klass) -+ { -+ // Before looking at dst.length, make sure dst is also an objArray. -+ __ lwu(t0, Address(t2, lh_offset)); -+ __ mvw(t1, objArray_lh); -+ __ bne(t0, t1, L_failed); -+ -+ // It is safe to examine both src.length and dst.length. -+ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, -+ t2, L_failed); -+ -+ __ load_klass(dst_klass, dst); // reload ++ __ load_klass(dst_klass, dst); // reload + + // Marshal the base address arguments now, freeing registers. + __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop); @@ -48206,50 +43491,6 @@ index 00000000000..b3fdd04db1b + return entry; + } + -+ address generate_method_entry_barrier() { -+ __ align(CodeEntryAlignment); -+ StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier"); -+ -+ Label deoptimize_label; -+ -+ address start = __ pc(); -+ -+ __ set_last_Java_frame(sp, fp, ra, t0); -+ -+ __ enter(); -+ __ add(t1, sp, wordSize); -+ -+ __ sub(sp, sp, 4 * wordSize); -+ -+ __ push_call_clobbered_registers(); -+ -+ __ mv(c_rarg0, t1); -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetNMethod::nmethod_stub_entry_barrier), 1); -+ -+ __ reset_last_Java_frame(true); -+ -+ __ mv(t0, x10); -+ -+ __ pop_call_clobbered_registers(); -+ -+ __ bnez(t0, deoptimize_label); -+ -+ __ leave(); -+ __ ret(); -+ -+ __ BIND(deoptimize_label); -+ -+ __ ld(t0, Address(sp, 0)); -+ __ ld(fp, Address(sp, wordSize)); -+ __ ld(ra, Address(sp, wordSize * 2)); -+ __ ld(t1, Address(sp, wordSize * 3)); -+ -+ __ mv(sp, t0); -+ __ jr(t1); -+ -+ return start; -+ } -+ + // x10 = result + // x11 = str1 + // x12 = cnt1 @@ -48686,111 +43927,6 @@ index 00000000000..b3fdd04db1b + + return entry; + } -+ -+ // Arguments: -+ // -+ // Input: -+ // c_rarg0 - newArr address -+ // c_rarg1 - oldArr address -+ // c_rarg2 - newIdx -+ // c_rarg3 - shiftCount -+ // c_rarg4 - numIter -+ // -+ address generate_bigIntegerLeftShift() { -+ __ align(CodeEntryAlignment); -+ StubCodeMark mark(this, "StubRoutines", "bigIntegerLeftShiftWorker"); -+ address entry = __ pc(); -+ -+ Label loop, exit; -+ -+ Register newArr = c_rarg0; -+ Register oldArr = c_rarg1; -+ Register newIdx = c_rarg2; -+ Register shiftCount = c_rarg3; -+ Register numIter = c_rarg4; -+ -+ Register shiftRevCount = c_rarg5; -+ Register oldArrNext = t1; -+ -+ __ beqz(numIter, exit); -+ __ shadd(newArr, newIdx, newArr, t0, 2); -+ -+ __ li(shiftRevCount, 32); -+ __ sub(shiftRevCount, shiftRevCount, shiftCount); -+ -+ __ bind(loop); -+ __ addi(oldArrNext, oldArr, 4); -+ __ vsetvli(t0, numIter, Assembler::e32, Assembler::m4); -+ __ vle32_v(v0, oldArr); -+ __ vle32_v(v4, oldArrNext); -+ __ vsll_vx(v0, v0, shiftCount); -+ __ vsrl_vx(v4, v4, shiftRevCount); -+ __ vor_vv(v0, v0, v4); -+ __ vse32_v(v0, newArr); -+ __ sub(numIter, numIter, t0); -+ __ shadd(oldArr, t0, oldArr, t1, 2); -+ __ shadd(newArr, t0, newArr, t1, 2); -+ __ bnez(numIter, loop); -+ -+ __ bind(exit); -+ __ ret(); -+ -+ return entry; -+ } -+ -+ // Arguments: -+ // -+ // Input: -+ // c_rarg0 - newArr address -+ // c_rarg1 - oldArr address -+ // c_rarg2 - newIdx -+ // c_rarg3 - shiftCount -+ // c_rarg4 - numIter -+ // -+ address generate_bigIntegerRightShift() { -+ __ align(CodeEntryAlignment); -+ StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker"); -+ address entry = __ pc(); -+ -+ Label loop, exit; -+ -+ Register newArr = c_rarg0; -+ Register oldArr = c_rarg1; -+ Register newIdx = c_rarg2; -+ Register shiftCount = c_rarg3; -+ Register numIter = c_rarg4; -+ Register idx = numIter; -+ -+ Register shiftRevCount = c_rarg5; -+ Register oldArrNext = c_rarg6; -+ Register newArrCur = t0; -+ Register oldArrCur = t1; -+ -+ __ beqz(idx, exit); -+ __ shadd(newArr, newIdx, newArr, t0, 2); -+ -+ __ li(shiftRevCount, 32); -+ __ sub(shiftRevCount, shiftRevCount, shiftCount); -+ -+ __ bind(loop); -+ __ vsetvli(t0, idx, Assembler::e32, Assembler::m4); -+ __ sub(idx, idx, t0); -+ __ shadd(oldArrNext, idx, oldArr, t1, 2); -+ __ shadd(newArrCur, idx, newArr, t1, 2); -+ __ addi(oldArrCur, oldArrNext, 4); -+ __ vle32_v(v0, oldArrCur); -+ __ vle32_v(v4, oldArrNext); -+ __ vsrl_vx(v0, v0, shiftCount); -+ __ vsll_vx(v4, v4, shiftRevCount); -+ __ vor_vv(v0, v0, v4); -+ __ vse32_v(v0, newArrCur); -+ __ bnez(idx, loop); -+ -+ __ bind(exit); -+ __ ret(); -+ -+ return entry; -+ } +#endif + +#ifdef COMPILER2 @@ -49656,22 +44792,12 @@ index 00000000000..b3fdd04db1b + MontgomeryMultiplyGenerator g(_masm, /*squaring*/true); + StubRoutines::_montgomerySquare = g.generate_square(); + } -+ -+ if (UseRVVForBigIntegerShiftIntrinsics) { -+ StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift(); -+ StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift(); -+ } +#endif + + generate_compare_long_strings(); + + generate_string_indexof_stubs(); + -+ BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); -+ if (bs_nm != NULL) { -+ StubRoutines::riscv::_method_entry_barrier = generate_method_entry_barrier(); -+ } -+ + StubRoutines::riscv::set_completed(); + } + @@ -49687,20 +44813,15 @@ index 00000000000..b3fdd04db1b + ~StubGenerator() {} +}; // end class declaration + -+#define UCM_TABLE_MAX_ENTRIES 8 +void StubGenerator_generate(CodeBuffer* code, bool all) { -+ if (UnsafeCopyMemory::_table == NULL) { -+ UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES); -+ } -+ + StubGenerator g(code, all); +} diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp new file mode 100644 -index 00000000000..395a2d338e4 +index 0000000000..9202d9ec4b --- /dev/null +++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp -@@ -0,0 +1,58 @@ +@@ -0,0 +1,57 @@ +/* + * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. @@ -49756,15 +44877,14 @@ index 00000000000..395a2d338e4 +address StubRoutines::riscv::_string_indexof_linear_uu = NULL; +address StubRoutines::riscv::_string_indexof_linear_ul = NULL; +address StubRoutines::riscv::_large_byte_array_inflate = NULL; -+address StubRoutines::riscv::_method_entry_barrier = NULL; + +bool StubRoutines::riscv::_completed = false; diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp new file mode 100644 -index 00000000000..51f07819c33 +index 0000000000..0c9445e18a --- /dev/null +++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp -@@ -0,0 +1,161 @@ +@@ -0,0 +1,155 @@ +/* + * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. @@ -49834,8 +44954,6 @@ index 00000000000..51f07819c33 + static address _string_indexof_linear_ul; + static address _large_byte_array_inflate; + -+ static address _method_entry_barrier; -+ + static bool _completed; + + public: @@ -49912,10 +45030,6 @@ index 00000000000..51f07819c33 + return _large_byte_array_inflate; + } + -+ static address method_entry_barrier() { -+ return _method_entry_barrier; -+ } -+ + static bool complete() { + return _completed; + } @@ -49928,10 +45042,10 @@ index 00000000000..51f07819c33 +#endif // CPU_RISCV_STUBROUTINES_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp new file mode 100644 -index 00000000000..6537b2dbd94 +index 0000000000..e639fa7e12 --- /dev/null +++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -@@ -0,0 +1,1794 @@ +@@ -0,0 +1,1833 @@ +/* + * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. @@ -49985,7 +45099,6 @@ index 00000000000..6537b2dbd94 +#include "runtime/timer.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/debug.hpp" -+#include "utilities/powerOfTwo.hpp" +#include + +#ifndef PRODUCT @@ -50491,31 +45604,81 @@ index 00000000000..6537b2dbd94 +// +// xmethod: method +// -+void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow) { ++void TemplateInterpreterGenerator::generate_counter_incr( ++ Label* overflow, ++ Label* profile_method, ++ Label* profile_method_continue) { + Label done; + // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not. -+ int increment = InvocationCounter::count_increment; -+ Label no_mdo; -+ if (ProfileInterpreter) { -+ // Are we profiling? -+ __ ld(x10, Address(xmethod, Method::method_data_offset())); -+ __ beqz(x10, no_mdo); -+ // Increment counter in the MDO -+ const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) + -+ in_bytes(InvocationCounter::counter_offset())); -+ const Address mask(x10, in_bytes(MethodData::invoke_mask_offset())); -+ __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow); -+ __ j(done); ++ if (TieredCompilation) { ++ int increment = InvocationCounter::count_increment; ++ Label no_mdo; ++ if (ProfileInterpreter) { ++ // Are we profiling? ++ __ ld(x10, Address(xmethod, Method::method_data_offset())); ++ __ beqz(x10, no_mdo); ++ // Increment counter in the MDO ++ const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) + ++ in_bytes(InvocationCounter::counter_offset())); ++ const Address mask(x10, in_bytes(MethodData::invoke_mask_offset())); ++ __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow); ++ __ j(done); ++ } ++ __ bind(no_mdo); ++ // Increment counter in MethodCounters ++ const Address invocation_counter(t1, ++ MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset()); ++ __ get_method_counters(xmethod, t1, done); ++ const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset())); ++ __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow); ++ __ bind(done); ++ } else { // not TieredCompilation ++ const Address backedge_counter(t1, ++ MethodCounters::backedge_counter_offset() + ++ InvocationCounter::counter_offset()); ++ const Address invocation_counter(t1, ++ MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset()); ++ ++ __ get_method_counters(xmethod, t1, done); ++ ++ if (ProfileInterpreter) { // %%% Merge this into MethodData* ++ __ lwu(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset())); ++ __ addw(x11, x11, 1); ++ __ sw(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset())); ++ } ++ // Update standard invocation counters ++ __ lwu(x11, invocation_counter); ++ __ lwu(x10, backedge_counter); ++ ++ __ addw(x11, x11, InvocationCounter::count_increment); ++ __ andi(x10, x10, InvocationCounter::count_mask_value); ++ ++ __ sw(x11, invocation_counter); ++ __ addw(x10, x10, x11); // add both counters ++ ++ // profile_method is non-null only for interpreted method so ++ // profile_method != NULL == !native_call ++ ++ if (ProfileInterpreter && profile_method != NULL) { ++ // Test to see if we should create a method data oop ++ __ ld(t1, Address(xmethod, Method::method_counters_offset())); ++ __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset()))); ++ __ blt(x10, t1, *profile_method_continue); ++ ++ // if no method data exists, go to profile_method ++ __ test_method_data_pointer(t1, *profile_method); ++ } ++ ++ { ++ __ ld(t1, Address(xmethod, Method::method_counters_offset())); ++ __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_invocation_limit_offset()))); ++ __ bltu(x10, t1, done); ++ __ j(*overflow); ++ } ++ __ bind(done); + } -+ __ bind(no_mdo); -+ // Increment counter in MethodCounters -+ const Address invocation_counter(t1, -+ MethodCounters::invocation_counter_offset() + -+ InvocationCounter::counter_offset()); -+ __ get_method_counters(xmethod, t1, done); -+ const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset())); -+ __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow); -+ __ bind(done); +} + +void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) { @@ -50699,9 +45862,18 @@ index 00000000000..6537b2dbd94 + __ sd(ProfileInterpreter ? t0 : zr, Address(sp, 6 * wordSize)); + + // Get mirror and store it in the frame as GC root for this Method* -+ __ load_mirror(t2, xmethod); -+ __ sd(zr, Address(sp, 5 * wordSize)); -+ __ sd(t2, Address(sp, 4 * wordSize)); ++#if INCLUDE_SHENANDOAHGC ++ if (UseShenandoahGC) { ++ __ load_mirror(x28, xmethod); ++ __ sd(zr, Address(sp, 5 * wordSize)); ++ __ sd(x28, Address(sp, 4 * wordSize)); ++ } else ++#endif ++ { ++ __ load_mirror(t2, xmethod); ++ __ sd(zr, Address(sp, 5 * wordSize)); ++ __ sd(t2, Address(sp, 4 * wordSize)); ++ } + + __ ld(xcpool, Address(xmethod, Method::const_offset())); + __ ld(xcpool, Address(xcpool, ConstMethod::constants_offset())); @@ -50770,7 +45942,7 @@ index 00000000000..6537b2dbd94 + + address entry = __ pc(); + -+ const int referent_offset = java_lang_ref_Reference::referent_offset(); ++ const int referent_offset = java_lang_ref_Reference::referent_offset; + guarantee(referent_offset > 0, "referent offset not initialized"); + + Label slow_path; @@ -50829,42 +46001,16 @@ index 00000000000..6537b2dbd94 +} + +void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) { -+ // See more discussion in stackOverflow.hpp. -+ -+ const int shadow_zone_size = checked_cast(StackOverflow::stack_shadow_zone_size()); ++ // Bang each page in the shadow zone. We can't assume it's been done for ++ // an interpreter frame with greater than a page of locals, so each page ++ // needs to be checked. Only true for non-native. ++ const int n_shadow_pages = JavaThread::stack_shadow_zone_size() / os::vm_page_size(); ++ const int start_page = native_call ? n_shadow_pages : 1; + const int page_size = os::vm_page_size(); -+ const int n_shadow_pages = shadow_zone_size / page_size; -+ -+#ifdef ASSERT -+ Label L_good_limit; -+ __ ld(t0, Address(xthread, JavaThread::shadow_zone_safe_limit())); -+ __ bnez(t0, L_good_limit); -+ __ stop("shadow zone safe limit is not initialized"); -+ __ bind(L_good_limit); -+ -+ Label L_good_watermark; -+ __ ld(t0, Address(xthread, JavaThread::shadow_zone_growth_watermark())); -+ __ bnez(t0, L_good_watermark); -+ __ stop("shadow zone growth watermark is not initialized"); -+ __ bind(L_good_watermark); -+#endif -+ -+ Label L_done; -+ -+ __ ld(t0, Address(xthread, JavaThread::shadow_zone_growth_watermark())); -+ __ bgtu(sp, t0, L_done); -+ -+ for (int p = 1; p <= n_shadow_pages; p++) { -+ __ bang_stack_with_offset(p * page_size); ++ for (int pages = start_page; pages <= n_shadow_pages ; pages++) { ++ __ sub(t0, sp, pages * page_size); ++ __ sd(zr, Address(t0)); + } -+ -+ // Record the new watermark, but only if the update is above the safe limit. -+ // Otherwise, the next time around the check above would pass the safe limit. -+ __ ld(t0, Address(xthread, JavaThread::shadow_zone_safe_limit())); -+ __ bleu(sp, t0, L_done); -+ __ sd(sp, Address(xthread, JavaThread::shadow_zone_growth_watermark())); -+ -+ __ bind(L_done); +} + +// Interpreter stub for calling a native method. (asm interpreter) @@ -50929,7 +46075,7 @@ index 00000000000..6537b2dbd94 + // increment invocation count & check for overflow + Label invocation_counter_overflow; + if (inc_counter) { -+ generate_counter_incr(&invocation_counter_overflow); ++ generate_counter_incr(&invocation_counter_overflow, NULL, NULL); + } + + Label continue_after_compile; @@ -51094,16 +46240,7 @@ index 00000000000..6537b2dbd94 + // check for safepoint operation in progress and/or pending suspend requests + { + Label L, Continue; -+ -+ // We need an acquire here to ensure that any subsequent load of the -+ // global SafepointSynchronize::_state flag is ordered after this load -+ // of the thread-local polling word. We don't want this poll to -+ // return false (i.e. not safepointing) and a later poll of the global -+ // SafepointSynchronize::_state spuriously to return true. -+ // -+ // This is to avoid a race when we're in a native->Java transition -+ // racing the code which wakes up from a safepoint. -+ __ safepoint_poll(L, true /* at_return */, true /* acquire */, false /* in_nmethod */); ++ __ safepoint_poll_acquire(L); + __ lwu(t1, Address(xthread, JavaThread::suspend_flags_offset())); + __ beqz(t1, Continue); + __ bind(L); @@ -51159,7 +46296,7 @@ index 00000000000..6537b2dbd94 + { + Label no_reguard; + __ lwu(t0, Address(xthread, in_bytes(JavaThread::stack_guard_state_offset()))); -+ __ addi(t1, zr, (u1)StackOverflow::stack_guard_yellow_reserved_disabled); ++ __ addi(t1, zr, (u1)JavaThread::stack_guard_yellow_reserved_disabled); + __ bne(t0, t1, no_reguard); + + __ pusha(); // only save smashed registers @@ -51350,8 +46487,15 @@ index 00000000000..6537b2dbd94 + + // increment invocation count & check for overflow + Label invocation_counter_overflow; ++ Label profile_method; ++ Label profile_method_continue; + if (inc_counter) { -+ generate_counter_incr(&invocation_counter_overflow); ++ generate_counter_incr(&invocation_counter_overflow, ++ &profile_method, ++ &profile_method_continue); ++ if (ProfileInterpreter) { ++ __ bind(profile_method_continue); ++ } + } + + Label continue_after_compile; @@ -51388,6 +46532,15 @@ index 00000000000..6537b2dbd94 + + // invocation counter overflow + if (inc_counter) { ++ if (ProfileInterpreter) { ++ // We have decided to profile this method in the interpreter ++ __ bind(profile_method); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); ++ __ set_method_data_pointer_for_bcp(); ++ // don't think we need this ++ __ get_method(x11); ++ __ j(profile_method_continue); ++ } + // Handle overflow of counter and compile method + __ bind(invocation_counter_overflow); + generate_counter_overflow(continue_after_compile); @@ -51728,10 +46881,10 @@ index 00000000000..6537b2dbd94 +#endif // !PRODUCT diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp new file mode 100644 -index 00000000000..d2a301c6e74 +index 0000000000..84b1afc7dc --- /dev/null +++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp -@@ -0,0 +1,3951 @@ +@@ -0,0 +1,4006 @@ +/* + * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. @@ -51762,7 +46915,6 @@ index 00000000000..d2a301c6e74 +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/collectedHeap.hpp" -+#include "gc/shared/tlab_globals.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" @@ -51778,10 +46930,15 @@ index 00000000000..d2a301c6e74 +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" -+#include "utilities/powerOfTwo.hpp" + +#define __ _masm-> + ++// Platform-dependent initialization ++ ++void TemplateTable::pd_initialize() { ++ // No RISC-V specific initialization ++} ++ +// Address computation: local variables + +static inline Address iaddress(int n) { @@ -52139,7 +47296,6 @@ index 00000000000..d2a301c6e74 + int32_t offset = 0; + __ movptr_with_offset(rarg, Universe::the_null_sentinel_addr(), offset); + __ ld(tmp, Address(rarg, offset)); -+ __ resolve_oop_handle(tmp); + __ bne(result, tmp, notNull); + __ mv(result, zr); // NULL object reference + __ bind(notNull); @@ -53475,6 +48631,7 @@ index 00000000000..d2a301c6e74 + assert(UseLoopCounter || !UseOnStackReplacement, + "on-stack-replacement requires loop counters"); + Label backedge_counter_overflow; ++ Label profile_method; + Label dispatch; + if (UseLoopCounter) { + // increment backedge counter for backward branches @@ -53499,31 +48656,75 @@ index 00000000000..d2a301c6e74 + __ beqz(t0, dispatch); // No MethodCounters allocated, OutOfMemory + __ bind(has_counters); + -+ Label no_mdo; -+ int increment = InvocationCounter::count_increment; -+ if (ProfileInterpreter) { -+ // Are we profiling? -+ __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset()))); -+ __ beqz(x11, no_mdo); -+ // Increment the MDO backedge counter -+ const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) + -+ in_bytes(InvocationCounter::counter_offset())); -+ const Address mask(x11, in_bytes(MethodData::backedge_mask_offset())); -+ __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, -+ x10, t0, false, ++ if (TieredCompilation) { ++ Label no_mdo; ++ int increment = InvocationCounter::count_increment; ++ if (ProfileInterpreter) { ++ // Are we profiling? ++ __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset()))); ++ __ beqz(x11, no_mdo); ++ // Increment the MDO backedge counter ++ const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) + ++ in_bytes(InvocationCounter::counter_offset())); ++ const Address mask(x11, in_bytes(MethodData::backedge_mask_offset())); ++ __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, ++ x10, t0, false, ++ UseOnStackReplacement ? &backedge_counter_overflow : &dispatch); ++ __ j(dispatch); ++ } ++ __ bind(no_mdo); ++ // Increment backedge counter in MethodCounters* ++ __ ld(t0, Address(xmethod, Method::method_counters_offset())); ++ const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset())); ++ __ increment_mask_and_jump(Address(t0, be_offset), increment, mask, ++ x10, t1, false, + UseOnStackReplacement ? &backedge_counter_overflow : &dispatch); -+ __ j(dispatch); ++ } else { // not TieredCompilation ++ // increment counter ++ __ ld(t1, Address(xmethod, Method::method_counters_offset())); ++ __ lwu(x10, Address(t1, be_offset)); // load backedge counter ++ __ addw(t0, x10, InvocationCounter::count_increment); // increment counter ++ __ sw(t0, Address(t1, be_offset)); // store counter ++ ++ __ lwu(x10, Address(t1, inv_offset)); // load invocation counter ++ __ andi(x10, x10, (unsigned)InvocationCounter::count_mask_value, x13); // and the status bits ++ __ addw(x10, x10, t0); // add both counters ++ ++ if (ProfileInterpreter) { ++ // Test to see if we should create a method data oop ++ __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset()))); ++ __ blt(x10, t0, dispatch); ++ ++ // if no method data exists, go to profile method ++ __ test_method_data_pointer(x10, profile_method); ++ ++ if (UseOnStackReplacement) { ++ // check for overflow against x11 which is the MDO taken count ++ __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset()))); ++ __ bltu(x11, t0, dispatch); // Intel == Assembler::below, lo:unsigned lower ++ ++ // When ProfileInterpreter is on, the backedge_count comes ++ // from the MethodData*, which value does not get reset on ++ // the call to frequency_counter_overflow(). To avoid ++ // excessive calls to the overflow routine while the method is ++ // being compiled, add a second test to make sure the overflow ++ // function is called only once every overflow_frequency. ++ const int overflow_frequency = 1024; ++ __ andi(x11, x11, overflow_frequency - 1); ++ __ beqz(x11, backedge_counter_overflow); ++ ++ } ++ } else { ++ if (UseOnStackReplacement) { ++ // check for overflow against x10, which is the sum of the ++ // counters ++ __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset()))); ++ __ bgeu(x10, t0, backedge_counter_overflow); // Intel == Assembler::aboveEqual ++ } ++ } + } -+ __ bind(no_mdo); -+ // Increment backedge counter in MethodCounters* -+ __ ld(t0, Address(xmethod, Method::method_counters_offset())); -+ const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset())); -+ __ increment_mask_and_jump(Address(t0, be_offset), increment, mask, -+ x10, t1, false, -+ UseOnStackReplacement ? &backedge_counter_overflow : &dispatch); + __ bind(dispatch); + } -+ + // Pre-load the next target bytecode into t0 + __ load_unsigned_byte(t0, Address(xbcp, 0)); + @@ -53532,52 +48733,63 @@ index 00000000000..d2a301c6e74 + // xbcp: target bcp + __ dispatch_only(vtos, /*generate_poll*/true); + -+ if (UseLoopCounter && UseOnStackReplacement) { -+ // invocation counter overflow -+ __ bind(backedge_counter_overflow); -+ __ neg(x12, x12); -+ __ add(x12, x12, xbcp); // branch xbcp -+ // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp) -+ __ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::frequency_counter_overflow), -+ x12); -+ __ load_unsigned_byte(x11, Address(xbcp, 0)); // restore target bytecode -+ -+ // x10: osr nmethod (osr ok) or NULL (osr not possible) -+ // w11: target bytecode -+ // x12: temporary -+ __ beqz(x10, dispatch); // test result -- no osr if null -+ // nmethod may have been invalidated (VM may block upon call_VM return) -+ __ lbu(x12, Address(x10, nmethod::state_offset())); -+ if (nmethod::in_use != 0) { -+ __ sub(x12, x12, nmethod::in_use); ++ if (UseLoopCounter) { ++ if (ProfileInterpreter && !TieredCompilation) { ++ // Out-of-line code to allocate method data oop. ++ __ bind(profile_method); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); ++ __ load_unsigned_byte(x11, Address(xbcp, 0)); // restore target bytecode ++ __ set_method_data_pointer_for_bcp(); ++ __ j(dispatch); + } -+ __ bnez(x12, dispatch); + -+ // We have the address of an on stack replacement routine in x10 -+ // We need to prepare to execute the OSR method. First we must -+ // migrate the locals and monitors off of the stack. ++ if (UseOnStackReplacement) { ++ // invocation counter overflow ++ __ bind(backedge_counter_overflow); ++ __ neg(x12, x12); ++ __ add(x12, x12, xbcp); // branch xbcp ++ // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp) ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::frequency_counter_overflow), ++ x12); ++ __ load_unsigned_byte(x11, Address(xbcp, 0)); // restore target bytecode ++ ++ // x10: osr nmethod (osr ok) or NULL (osr not possible) ++ // w11: target bytecode ++ // x12: temporary ++ __ beqz(x10, dispatch); // test result -- no osr if null ++ // nmethod may have been invalidated (VM may block upon call_VM return) ++ __ lbu(x12, Address(x10, nmethod::state_offset())); ++ if (nmethod::in_use != 0) { ++ __ sub(x12, x12, nmethod::in_use); ++ } ++ __ bnez(x12, dispatch); ++ ++ // We have the address of an on stack replacement routine in x10 ++ // We need to prepare to execute the OSR method. First we must ++ // migrate the locals and monitors off of the stack. + -+ __ mv(x9, x10); // save the nmethod ++ __ mv(x9, x10); // save the nmethod + -+ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); + -+ // x10 is OSR buffer, move it to expected parameter location -+ __ mv(j_rarg0, x10); ++ // x10 is OSR buffer, move it to expected parameter location ++ __ mv(j_rarg0, x10); + -+ // remove activation -+ // get sender esp -+ __ ld(esp, -+ Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); -+ // remove frame anchor -+ __ leave(); -+ // Ensure compiled code always sees stack at proper alignment -+ __ andi(sp, esp, -16); ++ // remove activation ++ // get sender esp ++ __ ld(esp, ++ Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); ++ // remove frame anchor ++ __ leave(); ++ // Ensure compiled code always sees stack at proper alignment ++ __ andi(sp, esp, -16); + -+ // and begin the OSR nmethod -+ __ ld(t0, Address(x9, nmethod::osr_entry_point_offset())); -+ __ jr(t0); ++ // and begin the OSR nmethod ++ __ ld(t0, Address(x9, nmethod::osr_entry_point_offset())); ++ __ jr(t0); ++ } + } +} + @@ -53981,7 +49193,7 @@ index 00000000000..d2a301c6e74 + const Register temp = x9; + assert_different_registers(Rcache, index, temp); + -+ Label resolved, clinit_barrier_slow; ++ Label resolved; + + Bytecodes::Code code = bytecode(); + switch (code) { @@ -53995,10 +49207,6 @@ index 00000000000..d2a301c6e74 + __ mv(t0, (int) code); + __ beq(temp, t0, resolved); + -+ // resolve first time through -+ // Class initialization barrier slow path lands here as well. -+ __ bind(clinit_barrier_slow); -+ + address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache); + __ mv(temp, (int) code); + __ call_VM(noreg, entry, temp); @@ -54008,13 +49216,6 @@ index 00000000000..d2a301c6e74 + // n.b. unlike x86 Rcache is now rcpool plus the indexed offset + // so all clients ofthis method must be modified accordingly + __ bind(resolved); -+ -+ // Class initialization barrier for static methods -+ if (VM_Version::supports_fast_class_init_checks() && bytecode() == Bytecodes::_invokestatic) { -+ __ load_resolved_method_at_index(byte_no, temp, Rcache); -+ __ load_method_holder(temp, temp); -+ __ clinit_barrier(temp, t0, NULL, &clinit_barrier_slow); -+ } +} + +// The Rcache and index registers must be set before call @@ -54921,6 +50122,7 @@ index 00000000000..d2a301c6e74 + // since the parameter_size includes it. + __ push_reg(x9); + __ mv(x9, index); ++ assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0"); + __ load_resolved_reference_at_index(index, x9); + __ pop_reg(x9); + __ push_reg(index); // push appendix (MethodType, CallSite, etc.) @@ -55104,7 +50306,9 @@ index 00000000000..d2a301c6e74 + __ profile_virtual_call(x13, x30, x9); + + // Get declaring interface class from method, and itable index -+ __ load_method_holder(x10, xmethod); ++ __ ld(x10, Address(xmethod, Method::const_offset())); ++ __ ld(x10, Address(x10, ConstMethod::constants_offset())); ++ __ ld(x10, Address(x10, ConstantPool::pool_holder_offset_in_bytes())); + __ lwu(xmethod, Address(xmethod, Method::itable_index_offset())); + __ subw(xmethod, xmethod, Method::itable_index_max); + __ negw(xmethod, xmethod); @@ -55291,9 +50495,13 @@ index 00000000000..d2a301c6e74 + __ bnez(x13, loop); + } + -+ // initialize object hader only. ++ // initialize object header only. + __ bind(initialize_header); -+ __ mv(t0, (intptr_t)markWord::prototype().value()); ++ if (UseBiasedLocking) { ++ __ ld(t0, Address(x14, Klass::prototype_header_offset())); ++ } else { ++ __ mv(t0, (intptr_t)markOopDesc::prototype()); ++ } + __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes())); + __ store_klass_gap(x10, zr); // zero klass gap for compressed oops + __ store_klass(x10, x14); // store klass last @@ -55302,7 +50510,7 @@ index 00000000000..d2a301c6e74 + SkipIfEqual skip(_masm, &DTraceAllocProbes, false); + // Trigger dtrace event for fastpath + __ push(atos); // save the return value -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast(SharedRuntime::dtrace_object_alloc)), x10); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), x10); + __ pop(atos); // restore the return value + } + __ j(done); @@ -55685,7 +50893,7 @@ index 00000000000..d2a301c6e74 +} diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.hpp b/src/hotspot/cpu/riscv/templateTable_riscv.hpp new file mode 100644 -index 00000000000..fcc86108d28 +index 0000000000..fcc86108d2 --- /dev/null +++ b/src/hotspot/cpu/riscv/templateTable_riscv.hpp @@ -0,0 +1,42 @@ @@ -55731,14 +50939,14 @@ index 00000000000..fcc86108d28 +static void index_check(Register array, Register index); + +#endif // CPU_RISCV_TEMPLATETABLE_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp b/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp +diff --git a/src/hotspot/cpu/riscv/vmStructs_riscv.hpp b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp new file mode 100644 -index 00000000000..4f50adb05c3 +index 0000000000..6c89133de0 --- /dev/null -+++ b/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp -@@ -0,0 +1,33 @@ ++++ b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp +@@ -0,0 +1,42 @@ +/* -+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -55762,22 +50970,31 @@ index 00000000000..4f50adb05c3 + * + */ + -+#include "precompiled.hpp" -+#include "prims/universalNativeInvoker.hpp" -+#include "utilities/debug.hpp" ++#ifndef CPU_RISCV_VMSTRUCTS_RISCV_HPP ++#define CPU_RISCV_VMSTRUCTS_RISCV_HPP + -+address ProgrammableInvoker::generate_adapter(jobject jabi, jobject jlayout) { -+ Unimplemented(); -+ return nullptr; -+} -diff --git a/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp b/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp ++// These are the CPU-specific fields, types and integer ++// constants required by the Serviceability Agent. This file is ++// referenced by vmStructs.cpp. ++ ++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ ++ volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) ++ ++#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) ++ ++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ ++#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ ++#endif // CPU_RISCV_VMSTRUCTS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp new file mode 100644 -index 00000000000..ce70da72f2e +index 0000000000..6bdce51506 --- /dev/null -+++ b/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp -@@ -0,0 +1,42 @@ ++++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp +@@ -0,0 +1,87 @@ +/* -+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -55802,31 +51019,76 @@ index 00000000000..ce70da72f2e + */ + +#include "precompiled.hpp" -+#include "prims/universalUpcallHandler.hpp" -+#include "utilities/debug.hpp" ++#include "memory/allocation.hpp" ++#include "memory/allocation.inline.hpp" ++#include "runtime/os.inline.hpp" ++#include "vm_version_ext_riscv.hpp" + -+address ProgrammableUpcallHandler::generate_upcall_stub(jobject jrec, jobject jabi, jobject jlayout) { -+ Unimplemented(); -+ return nullptr; ++// VM_Version_Ext statics ++int VM_Version_Ext::_no_of_threads = 0; ++int VM_Version_Ext::_no_of_cores = 0; ++int VM_Version_Ext::_no_of_sockets = 0; ++bool VM_Version_Ext::_initialized = false; ++char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0}; ++char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0}; ++ ++void VM_Version_Ext::initialize_cpu_information(void) { ++ // do nothing if cpu info has been initialized ++ if (_initialized) { ++ return; ++ } ++ ++ _no_of_cores = os::processor_count(); ++ _no_of_threads = _no_of_cores; ++ _no_of_sockets = _no_of_cores; ++ snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64"); ++ snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string); ++ _initialized = true; +} + -+address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject mh, Method* entry, jobject jabi, jobject jconv) { -+ ShouldNotCallThis(); -+ return nullptr; ++int VM_Version_Ext::number_of_threads(void) { ++ initialize_cpu_information(); ++ return _no_of_threads; +} + -+bool ProgrammableUpcallHandler::supports_optimized_upcalls() { -+ return false; ++int VM_Version_Ext::number_of_cores(void) { ++ initialize_cpu_information(); ++ return _no_of_cores; +} -diff --git a/src/hotspot/cpu/riscv/vmStructs_riscv.hpp b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp ++ ++int VM_Version_Ext::number_of_sockets(void) { ++ initialize_cpu_information(); ++ return _no_of_sockets; ++} ++ ++const char* VM_Version_Ext::cpu_name(void) { ++ initialize_cpu_information(); ++ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing); ++ if (NULL == tmp) { ++ return NULL; ++ } ++ strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE); ++ return tmp; ++} ++ ++const char* VM_Version_Ext::cpu_description(void) { ++ initialize_cpu_information(); ++ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing); ++ if (NULL == tmp) { ++ return NULL; ++ } ++ strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); ++ return tmp; ++} +diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp new file mode 100644 -index 00000000000..6c89133de02 +index 0000000000..711e4aeaf6 --- /dev/null -+++ b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp -@@ -0,0 +1,42 @@ ++++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp +@@ -0,0 +1,55 @@ +/* -+ * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -55849,29 +51111,42 @@ index 00000000000..6c89133de02 + * + */ + -+#ifndef CPU_RISCV_VMSTRUCTS_RISCV_HPP -+#define CPU_RISCV_VMSTRUCTS_RISCV_HPP ++#ifndef CPU_RISCV_VM_VERSION_EXT_RISCV_HPP ++#define CPU_RISCV_VM_VERSION_EXT_RISCV_HPP + -+// These are the CPU-specific fields, types and integer -+// constants required by the Serviceability Agent. This file is -+// referenced by vmStructs.cpp. ++#include "runtime/vm_version.hpp" ++#include "utilities/macros.hpp" + -+#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ -+ volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) ++class VM_Version_Ext : public VM_Version { ++ private: ++ static const size_t CPU_TYPE_DESC_BUF_SIZE = 256; ++ static const size_t CPU_DETAILED_DESC_BUF_SIZE = 4096; + -+#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) ++ static int _no_of_threads; ++ static int _no_of_cores; ++ static int _no_of_sockets; ++ static bool _initialized; ++ static char _cpu_name[CPU_TYPE_DESC_BUF_SIZE]; ++ static char _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE]; + -+#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ public: ++ static int number_of_threads(void); ++ static int number_of_cores(void); ++ static int number_of_sockets(void); + -+#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ static const char* cpu_name(void); ++ static const char* cpu_description(void); ++ static void initialize_cpu_information(void); + -+#endif // CPU_RISCV_VMSTRUCTS_RISCV_HPP ++}; ++ ++#endif // CPU_RISCV_VM_VERSION_EXT_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp new file mode 100644 -index 00000000000..768c7633ca6 +index 0000000000..0e8f526bd9 --- /dev/null +++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp -@@ -0,0 +1,230 @@ +@@ -0,0 +1,209 @@ +/* + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. @@ -55907,11 +51182,19 @@ index 00000000000..768c7633ca6 +#include OS_HEADER_INLINE(os) + +const char* VM_Version::_uarch = ""; ++const char* VM_Version::_vm_mode = ""; +uint32_t VM_Version::_initial_vector_length = 0; + +void VM_Version::initialize() { + get_os_cpu_info(); + ++ // check if satp.mode is supported, currently supports up to SV48(RV64) ++ if (get_satp_mode() > VM_SV48) { ++ vm_exit_during_initialization( ++ err_msg("Unsupported satp mode: %s. Only satp modes up to sv48 are supported for now.", ++ _vm_mode)); ++ } ++ + if (FLAG_IS_DEFAULT(UseFMA)) { + FLAG_SET_DEFAULT(UseFMA, true); + } @@ -55956,11 +51239,6 @@ index 00000000000..768c7633ca6 + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); + } + -+ if (UseSHA3Intrinsics) { -+ warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); -+ FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); -+ } -+ + if (UseCRC32Intrinsics) { + warning("CRC32 intrinsics are not available on this CPU."); + FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); @@ -55971,11 +51249,6 @@ index 00000000000..768c7633ca6 + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); + } + -+ if (UseMD5Intrinsics) { -+ warning("MD5 intrinsics are not available on this CPU."); -+ FLAG_SET_DEFAULT(UseMD5Intrinsics, false); -+ } -+ + if (UseRVV) { + if (!(_features & CPU_V)) { + warning("RVV is not supported on this CPU"); @@ -55986,11 +51259,6 @@ index 00000000000..768c7633ca6 + } + } + -+ if (UseRVB && !(_features & CPU_B)) { -+ warning("RVB is not supported on this CPU"); -+ FLAG_SET_DEFAULT(UseRVB, false); -+ } -+ + if (UseRVC && !(_features & CPU_C)) { + warning("RVC is not supported on this CPU"); + FLAG_SET_DEFAULT(UseRVC, false); @@ -56000,7 +51268,7 @@ index 00000000000..768c7633ca6 + FLAG_SET_DEFAULT(AvoidUnalignedAccesses, true); + } + -+ if (UseRVB) { ++ if (UseZbb) { + if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { + FLAG_SET_DEFAULT(UsePopCountInstruction, true); + } @@ -56021,6 +51289,10 @@ index 00000000000..768c7633ca6 +#ifdef COMPILER2 + c2_initialize(); +#endif // COMPILER2 ++ ++ UNSUPPORTED_OPTION(CriticalJNINatives); ++ ++ FLAG_SET_DEFAULT(UseMembar, true); +} + +#ifdef COMPILER2 @@ -56041,10 +51313,6 @@ index 00000000000..768c7633ca6 + FLAG_SET_DEFAULT(MaxVectorSize, 0); + } + -+ if (!UseRVV) { -+ FLAG_SET_DEFAULT(UseRVVForBigIntegerShiftIntrinsics, false); -+ } -+ + if (UseRVV) { + if (FLAG_IS_DEFAULT(MaxVectorSize)) { + MaxVectorSize = _initial_vector_length; @@ -56088,26 +51356,12 @@ index 00000000000..768c7633ca6 + } +} +#endif // COMPILER2 -+ -+void VM_Version::initialize_cpu_information(void) { -+ // do nothing if cpu info has been initialized -+ if (_initialized) { -+ return; -+ } -+ -+ _no_of_cores = os::processor_count(); -+ _no_of_threads = _no_of_cores; -+ _no_of_sockets = _no_of_cores; -+ snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64"); -+ snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string); -+ _initialized = true; -+} diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp new file mode 100644 -index 00000000000..8e35530359a +index 0000000000..875511f522 --- /dev/null +++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp -@@ -0,0 +1,72 @@ +@@ -0,0 +1,80 @@ +/* + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. @@ -56148,18 +51402,27 @@ index 00000000000..8e35530359a + static void c2_initialize(); +#endif // COMPILER2 + ++// VM modes (satp.mode) privileged ISA 1.10 ++enum VM_MODE { ++ VM_MBARE = 0, ++ VM_SV39 = 8, ++ VM_SV48 = 9, ++ VM_SV57 = 10, ++ VM_SV64 = 11 ++}; ++ +protected: + static const char* _uarch; ++ static const char* _vm_mode; + static uint32_t _initial_vector_length; + static void get_os_cpu_info(); + static uint32_t get_current_vector_length(); ++ static VM_MODE get_satp_mode(); + +public: + // Initialization + static void initialize(); + -+ constexpr static bool supports_stack_watermark_barrier() { return true; } -+ + enum Feature_Flag { +#define CPU_FEATURE_FLAGS(decl) \ + decl(I, "i", 8) \ @@ -56168,8 +51431,7 @@ index 00000000000..8e35530359a + decl(F, "f", 5) \ + decl(D, "d", 3) \ + decl(C, "c", 2) \ -+ decl(V, "v", 21) \ -+ decl(B, "b", 1) ++ decl(V, "v", 21) + +#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1 << bit), + CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG) @@ -56182,10 +51444,10 @@ index 00000000000..8e35530359a +#endif // CPU_RISCV_VM_VERSION_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp new file mode 100644 -index 00000000000..aa7222dc64a +index 0000000000..c4338715f9 --- /dev/null +++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp -@@ -0,0 +1,64 @@ +@@ -0,0 +1,51 @@ +/* + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. @@ -56228,34 +51490,21 @@ index 00000000000..aa7222dc64a + FloatRegister freg = ::as_FloatRegister(0); + for ( ; i < ConcreteRegisterImpl::max_fpr ; ) { + for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) { -+ regName[i++] = reg->name(); ++ regName[i++] = freg->name(); + } + freg = freg->successor(); + } + -+ VectorRegister vreg = ::as_VectorRegister(0); -+ for ( ; i < ConcreteRegisterImpl::max_vpr ; ) { -+ for (int j = 0 ; j < VectorRegisterImpl::max_slots_per_register ; j++) { -+ regName[i++] = reg->name(); -+ } -+ vreg = vreg->successor(); -+ } -+ + for ( ; i < ConcreteRegisterImpl::number_of_registers ; i++) { -+ regName[i] = "NON-GPR-FPR-VPR"; ++ regName[i] = "NON-GPR-FPR"; + } +} -+ -+VMReg VMRegImpl::vmStorageToVMReg(int type, int index) { -+ Unimplemented(); -+ return VMRegImpl::Bad(); -+} diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.hpp new file mode 100644 -index 00000000000..9e611b1f671 +index 0000000000..6f613a8f11 --- /dev/null +++ b/src/hotspot/cpu/riscv/vmreg_riscv.hpp -@@ -0,0 +1,68 @@ +@@ -0,0 +1,53 @@ +/* + * Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. @@ -56292,10 +51541,6 @@ index 00000000000..9e611b1f671 + return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr; +} + -+inline bool is_VectorRegister() { -+ return value() >= ConcreteRegisterImpl::max_fpr && value() < ConcreteRegisterImpl::max_vpr; -+} -+ +inline Register as_Register() { + assert(is_Register(), "must be"); + return ::as_Register(value() / RegisterImpl::max_slots_per_register); @@ -56307,26 +51552,15 @@ index 00000000000..9e611b1f671 + FloatRegisterImpl::max_slots_per_register); +} + -+inline VectorRegister as_VectorRegister() { -+ assert(is_VectorRegister() && ((value() & (VectorRegisterImpl::max_slots_per_register - 1)) == 0), "must be"); -+ return ::as_VectorRegister((value() - ConcreteRegisterImpl::max_fpr) / -+ VectorRegisterImpl::max_slots_per_register); -+} -+ +inline bool is_concrete() { + assert(is_reg(), "must be"); -+ if (is_VectorRegister()) { -+ int base = value() - ConcreteRegisterImpl::max_fpr; -+ return (base % VectorRegisterImpl::max_slots_per_register) == 0; -+ } else { -+ return is_even(value()); -+ } ++ return is_even(value()); +} + +#endif // CPU_RISCV_VMREG_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp new file mode 100644 -index 00000000000..06b70020b4b +index 0000000000..06b70020b4 --- /dev/null +++ b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp @@ -0,0 +1,46 @@ @@ -56378,7 +51612,7 @@ index 00000000000..06b70020b4b +#endif // CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP diff --git a/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp new file mode 100644 -index 00000000000..78b81138003 +index 0000000000..0d205240a5 --- /dev/null +++ b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp @@ -0,0 +1,260 @@ @@ -56555,7 +51789,7 @@ index 00000000000..78b81138003 + assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0"); + + // Entry arguments: -+ // t2: CompiledICHolder ++ // t1: CompiledICHolder + // j_rarg0: Receiver + + // This stub is called from compiled code which has no callee-saved registers, @@ -56642,59 +51876,11 @@ index 00000000000..78b81138003 + const unsigned int icache_line_size = wordSize; + return icache_line_size; +} -diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp -index 897be2209e2..ee298f56653 100644 ---- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp -+++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp -@@ -1,6 +1,6 @@ - /* -- * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved. -- * Copyright (c) 2016, 2019, SAP SE. All rights reserved. -+ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2016, 2019 SAP SE. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -1447,7 +1447,10 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op - } - - // result = condition ? opr1 : opr2 --void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { -+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, -+ LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { -+ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on s390"); -+ - Assembler::branch_condition acond = Assembler::bcondEqual, ncond = Assembler::bcondNotEqual; - switch (condition) { - case lir_cond_equal: acond = Assembler::bcondEqual; ncond = Assembler::bcondNotEqual; break; -diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp -index cee3140f4f7..82e9de5a06f 100644 ---- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp -+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -1970,7 +1970,10 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { - } - } - --void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { -+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, -+ LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { -+ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on x86"); -+ - Assembler::Condition acond, ncond; - switch (condition) { - case lir_cond_equal: acond = Assembler::equal; ncond = Assembler::notEqual; break; diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp -index 3799adf5dd9..6f75e623a9a 100644 +index 2842a11f92..208a374eea 100644 --- a/src/hotspot/os/linux/os_linux.cpp +++ b/src/hotspot/os/linux/os_linux.cpp -@@ -2845,6 +2845,8 @@ void os::get_summary_cpu_info(char* cpuinfo, size_t length) { +@@ -2829,6 +2829,8 @@ void os::get_summary_cpu_info(char* cpuinfo, size_t length) { strncpy(cpuinfo, "IA64", length); #elif defined(PPC) strncpy(cpuinfo, "PPC64", length); @@ -56703,9 +51889,19 @@ index 3799adf5dd9..6f75e623a9a 100644 #elif defined(S390) strncpy(cpuinfo, "S390", length); #elif defined(SPARC) +@@ -4060,7 +4062,8 @@ size_t os::Linux::find_large_page_size() { + IA64_ONLY(256 * M) + PPC_ONLY(4 * M) + S390_ONLY(1 * M) +- SPARC_ONLY(4 * M); ++ SPARC_ONLY(4 * M) ++ RISCV64_ONLY(2 * M); + #endif // ZERO + + FILE *fp = fopen("/proc/meminfo", "r"); diff --git a/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp new file mode 100644 -index 00000000000..f2610af6cdd +index 0000000000..f2610af6cd --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp @@ -0,0 +1,26 @@ @@ -56737,10 +51933,10 @@ index 00000000000..f2610af6cdd +// nothing required here diff --git a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp new file mode 100644 -index 00000000000..761da5d743e +index 0000000000..4a1ebee8b0 --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp -@@ -0,0 +1,134 @@ +@@ -0,0 +1,189 @@ +/* + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. @@ -56776,26 +51972,96 @@ index 00000000000..761da5d743e +// Note that memory_order_conservative requires a full barrier after atomic stores. +// See https://patchwork.kernel.org/patch/3575821/ + ++#if defined(__clang_major__) ++#define FULL_COMPILER_ATOMIC_SUPPORT ++#elif (__GNUC__ > 13) || ((__GNUC__ == 13) && (__GNUC_MINOR__ >= 2)) ++#define FULL_COMPILER_ATOMIC_SUPPORT ++#endif ++ ++#define FULL_MEM_BARRIER __sync_synchronize() ++#define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE); ++#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE); ++ +template -+struct Atomic::PlatformAdd { -+ template -+ D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const { ++struct Atomic::PlatformAdd ++ : Atomic::FetchAndAdd > ++{ ++ template ++ D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const { ++#ifndef FULL_COMPILER_ATOMIC_SUPPORT ++ // If we add add and fetch for sub word and are using older compiler ++ // it must be added here due to not using lib atomic. ++ STATIC_ASSERT(byte_size >= 4); ++#endif ++ + D res = __atomic_add_fetch(dest, add_value, __ATOMIC_RELEASE); + FULL_MEM_BARRIER; + return res; + } + -+ template -+ D fetch_and_add(D volatile* dest, I add_value, atomic_memory_order order) const { -+ return add_and_fetch(dest, add_value, order) - add_value; ++ template ++ D fetch_and_add(I add_value, D volatile* dest, atomic_memory_order order) const { ++ return add_and_fetch(add_value, dest, order) - add_value; + } +}; + ++#ifndef FULL_COMPILER_ATOMIC_SUPPORT ++template<> ++template ++inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, ++ T volatile* dest __attribute__((unused)), ++ T compare_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(1 == sizeof(T)); ++ ++ if (order != memory_order_relaxed) { ++ FULL_MEM_BARRIER; ++ } ++ ++ uint32_t volatile* aligned_dst = (uint32_t volatile*)(((uintptr_t)dest) & (~((uintptr_t)0x3))); ++ int shift = 8 * (((uintptr_t)dest) - ((uintptr_t)aligned_dst)); // 0, 8, 16, 24 ++ ++ uint64_t mask = 0xfful << shift; // 0x00000000..FF.. ++ uint64_t remask = ~mask; // 0xFFFFFFFF..00.. ++ ++ uint64_t w_cv = ((uint64_t)(unsigned char)compare_value) << shift; // widen to 64-bit 0x00000000..CC.. ++ uint64_t w_ev = ((uint64_t)(unsigned char)exchange_value) << shift; // widen to 64-bit 0x00000000..EE.. ++ ++ uint64_t old_value; ++ uint64_t rc_temp; ++ ++ __asm__ __volatile__ ( ++ "1: lr.w %0, %2 \n\t" ++ " and %1, %0, %5 \n\t" // ignore unrelated bytes and widen to 64-bit 0x00000000..XX.. ++ " bne %1, %3, 2f \n\t" // compare 64-bit w_cv ++ " and %1, %0, %6 \n\t" // remove old byte ++ " or %1, %1, %4 \n\t" // add new byte ++ " sc.w %1, %1, %2 \n\t" // store new word ++ " bnez %1, 1b \n\t" ++ "2: \n\t" ++ : /*%0*/"=&r" (old_value), /*%1*/"=&r" (rc_temp), /*%2*/"+A" (*aligned_dst) ++ : /*%3*/"r" (w_cv), /*%4*/"r" (w_ev), /*%5*/"r" (mask), /*%6*/"r" (remask) ++ : "memory" ); ++ ++ if (order != memory_order_relaxed) { ++ FULL_MEM_BARRIER; ++ } ++ ++ return (T)((old_value & mask) >> shift); ++} ++#endif ++ +template +template -+inline T Atomic::PlatformXchg::operator()(T volatile* dest, -+ T exchange_value, ++inline T Atomic::PlatformXchg::operator()(T exchange_value, ++ T volatile* dest, + atomic_memory_order order) const { ++#ifndef FULL_COMPILER_ATOMIC_SUPPORT ++ // If we add xchg for sub word and are using older compiler ++ // it must be added here due to not using lib atomic. ++ STATIC_ASSERT(byte_size >= 4); ++#endif ++ + STATIC_ASSERT(byte_size == sizeof(T)); + T res = __atomic_exchange_n(dest, exchange_value, __ATOMIC_RELEASE); + FULL_MEM_BARRIER; @@ -56805,10 +52071,15 @@ index 00000000000..761da5d743e +// __attribute__((unused)) on dest is to get rid of spurious GCC warnings. +template +template -+inline T Atomic::PlatformCmpxchg::operator()(T volatile* dest __attribute__((unused)), ++inline T Atomic::PlatformCmpxchg::operator()(T exchange_value, ++ T volatile* dest __attribute__((unused)), + T compare_value, -+ T exchange_value, + atomic_memory_order order) const { ++ ++#ifndef FULL_COMPILER_ATOMIC_SUPPORT ++ STATIC_ASSERT(byte_size >= 4); ++#endif ++ + STATIC_ASSERT(byte_size == sizeof(T)); + T value = compare_value; + if (order != memory_order_relaxed) { @@ -56826,9 +52097,9 @@ index 00000000000..761da5d743e + +template<> +template -+inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest __attribute__((unused)), ++inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, ++ T volatile* dest __attribute__((unused)), + T compare_value, -+ T exchange_value, + atomic_memory_order order) const { + STATIC_ASSERT(4 == sizeof(T)); + if (order != memory_order_relaxed) { @@ -56853,31 +52124,11 @@ index 00000000000..761da5d743e + return rv; +} + -+template -+struct Atomic::PlatformOrderedLoad -+{ -+ template -+ T operator()(const volatile T* p) const { T data; __atomic_load(const_cast(p), &data, __ATOMIC_ACQUIRE); return data; } -+}; -+ -+template -+struct Atomic::PlatformOrderedStore -+{ -+ template -+ void operator()(volatile T* p, T v) const { __atomic_store(const_cast(p), &v, __ATOMIC_RELEASE); } -+}; -+ -+template -+struct Atomic::PlatformOrderedStore -+{ -+ template -+ void operator()(volatile T* p, T v) const { release_store(p, v); OrderAccess::fence(); } -+}; -+ ++#undef FULL_COMPILER_ATOMIC_SUPPORT +#endif // OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP diff --git a/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp new file mode 100644 -index 00000000000..28868c76406 +index 0000000000..28868c7640 --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp @@ -0,0 +1,45 @@ @@ -56926,12 +52177,12 @@ index 00000000000..28868c76406 +} + +#endif // OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP -diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp +diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp new file mode 100644 -index 00000000000..147cfdf3c10 +index 0000000000..bdf36d6b4c --- /dev/null -+++ b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp -@@ -0,0 +1,31 @@ ++++ b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp +@@ -0,0 +1,124 @@ +/* + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. @@ -56957,63 +52208,108 @@ index 00000000000..147cfdf3c10 + * + */ + -+#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP -+#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP ++#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP ++#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP + -+// Empty for build system ++static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ (void)memmove(to, from, count * HeapWordSize); ++} + -+#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP -diff --git a/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp -new file mode 100644 -index 00000000000..1aa58f27871 ---- /dev/null -+++ b/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp -@@ -0,0 +1,42 @@ -+/* -+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ ++static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ switch (count) { ++ case 8: to[7] = from[7]; // fall through ++ case 7: to[6] = from[6]; // fall through ++ case 6: to[5] = from[5]; // fall through ++ case 5: to[4] = from[4]; // fall through ++ case 4: to[3] = from[3]; // fall through ++ case 3: to[2] = from[2]; // fall through ++ case 2: to[1] = from[1]; // fall through ++ case 1: to[0] = from[0]; // fall through ++ case 0: break; ++ default: ++ memcpy(to, from, count * HeapWordSize); ++ break; ++ } ++} + -+#ifndef OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP -+#define OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP ++static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { ++ switch (count) { ++ case 8: to[7] = from[7]; ++ case 7: to[6] = from[6]; ++ case 6: to[5] = from[5]; ++ case 5: to[4] = from[4]; ++ case 4: to[3] = from[3]; ++ case 3: to[2] = from[2]; ++ case 2: to[1] = from[1]; ++ case 1: to[0] = from[0]; ++ case 0: break; ++ default: ++ while (count-- > 0) { ++ *to++ = *from++; ++ } ++ break; ++ } ++} + -+#include ++static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_words(from, to, count); ++} + -+// -+// Support for building on older Linux systems -+// ++static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_disjoint_words(from, to, count); ++} + -+#ifndef SYS_memfd_create -+#define SYS_memfd_create 279 -+#endif -+#ifndef SYS_fallocate -+#define SYS_fallocate 47 -+#endif ++static void pd_conjoint_bytes(const void* from, void* to, size_t count) { ++ (void)memmove(to, from, count); ++} ++ ++static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) { ++ pd_conjoint_bytes(from, to, count); ++} ++ ++static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { ++ _Copy_conjoint_jshorts_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { ++ _Copy_conjoint_jints_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { ++ _Copy_conjoint_jlongs_atomic(from, to, count); ++} ++ ++static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { ++ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size."); ++ _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count); ++} ++ ++static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { ++ _Copy_arrayof_conjoint_bytes(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) { ++ _Copy_arrayof_conjoint_jshorts(from, to, count); ++} + -+#endif // OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP ++static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) { ++ _Copy_arrayof_conjoint_jints(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) { ++ _Copy_arrayof_conjoint_jlongs(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) { ++ assert(!UseCompressedOops, "foo!"); ++ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); ++ _Copy_arrayof_conjoint_jlongs(from, to, count); ++} ++ ++#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP diff --git a/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp new file mode 100644 -index 00000000000..297414bfcd5 +index 0000000000..297414bfcd --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp @@ -0,0 +1,43 @@ @@ -57062,10 +52358,10 @@ index 00000000000..297414bfcd5 +#endif // OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP diff --git a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp new file mode 100644 -index 00000000000..1c33dc1e87f +index 0000000000..5b5d35553f --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp -@@ -0,0 +1,63 @@ +@@ -0,0 +1,74 @@ +/* + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. @@ -57105,10 +52401,6 @@ index 00000000000..1c33dc1e87f +inline void OrderAccess::loadstore() { acquire(); } +inline void OrderAccess::storeload() { fence(); } + -+#define FULL_MEM_BARRIER __sync_synchronize() -+#define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE); -+#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE); -+ +inline void OrderAccess::acquire() { + READ_MEM_BARRIER; +} @@ -57121,20 +52413,35 @@ index 00000000000..1c33dc1e87f + FULL_MEM_BARRIER; +} + -+inline void OrderAccess::cross_modify_fence_impl() { -+ asm volatile("fence.i" : : : "memory"); -+ if (UseConservativeFence) { -+ asm volatile("fence ir, ir" : : : "memory"); -+ } -+} ++ ++template ++struct OrderAccess::PlatformOrderedLoad ++{ ++ template ++ T operator()(const volatile T* p) const { T data; __atomic_load(const_cast(p), &data, __ATOMIC_ACQUIRE); return data; } ++}; ++ ++template ++struct OrderAccess::PlatformOrderedStore ++{ ++ template ++ void operator()(T v, volatile T* p) const { __atomic_store(const_cast(p), &v, __ATOMIC_RELEASE); } ++}; ++ ++template ++struct OrderAccess::PlatformOrderedStore ++{ ++ template ++ void operator()(T v, volatile T* p) const { release_store(p, v); OrderAccess::fence(); } ++}; + +#endif // OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp new file mode 100644 -index 00000000000..1f46bbab0a2 +index 0000000000..8b772892b4 --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp -@@ -0,0 +1,466 @@ +@@ -0,0 +1,624 @@ +/* + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. @@ -57174,6 +52481,7 @@ index 00000000000..1f46bbab0a2 +#include "prims/jniFastGetField.hpp" +#include "prims/jvm_misc.hpp" +#include "runtime/arguments.hpp" ++#include "runtime/extendedPC.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/java.hpp" @@ -57185,7 +52493,6 @@ index 00000000000..1f46bbab0a2 +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" +#include "runtime/timer.hpp" -+#include "signals_posix.hpp" +#include "utilities/debug.hpp" +#include "utilities/events.hpp" +#include "utilities/vmError.hpp" @@ -57223,11 +52530,11 @@ index 00000000000..1f46bbab0a2 + return (char*) -1; +} + -+address os::Posix::ucontext_get_pc(const ucontext_t * uc) { ++address os::Linux::ucontext_get_pc(const ucontext_t * uc) { + return (address)uc->uc_mcontext.__gregs[REG_PC]; +} + -+void os::Posix::ucontext_set_pc(ucontext_t * uc, address pc) { ++void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) { + uc->uc_mcontext.__gregs[REG_PC] = (intptr_t)pc; +} + @@ -57239,13 +52546,29 @@ index 00000000000..1f46bbab0a2 + return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP]; +} + -+address os::fetch_frame_from_context(const void* ucVoid, -+ intptr_t** ret_sp, intptr_t** ret_fp) { -+ address epc; ++// For Forte Analyzer AsyncGetCallTrace profiling support - thread ++// is currently interrupted by SIGPROF. ++// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal ++// frames. Currently we don't do that on Linux, so it's the same as ++// os::fetch_frame_from_context(). ++ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread, ++ const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) { ++ ++ assert(thread != NULL, "just checking"); ++ assert(ret_sp != NULL, "just checking"); ++ assert(ret_fp != NULL, "just checking"); ++ ++ return os::fetch_frame_from_context(uc, ret_sp, ret_fp); ++} ++ ++ExtendedPC os::fetch_frame_from_context(const void* ucVoid, ++ intptr_t** ret_sp, intptr_t** ret_fp) { ++ ++ ExtendedPC epc; + const ucontext_t* uc = (const ucontext_t*)ucVoid; + + if (uc != NULL) { -+ epc = os::Posix::ucontext_get_pc(uc); ++ epc = ExtendedPC(os::Linux::ucontext_get_pc(uc)); + if (ret_sp != NULL) { + *ret_sp = os::Linux::ucontext_get_sp(uc); + } @@ -57253,7 +52576,8 @@ index 00000000000..1f46bbab0a2 + *ret_fp = os::Linux::ucontext_get_fp(uc); + } + } else { -+ epc = NULL; ++ // construct empty ExtendedPC for return value checking ++ epc = ExtendedPC(NULL); + if (ret_sp != NULL) { + *ret_sp = (intptr_t *)NULL; + } @@ -57265,23 +52589,51 @@ index 00000000000..1f46bbab0a2 + return epc; +} + -+frame os::fetch_compiled_frame_from_context(const void* ucVoid) { -+ const ucontext_t* uc = (const ucontext_t*)ucVoid; -+ // In compiled code, the stack banging is performed before RA -+ // has been saved in the frame. RA is live, and SP and FP -+ // belong to the caller. -+ intptr_t* frame_fp = os::Linux::ucontext_get_fp(uc); -+ intptr_t* frame_sp = os::Linux::ucontext_get_sp(uc); -+ address frame_pc = (address)(uc->uc_mcontext.__gregs[REG_LR] -+ - NativeInstruction::instruction_size); -+ return frame(frame_sp, frame_fp, frame_pc); -+} -+ +frame os::fetch_frame_from_context(const void* ucVoid) { + intptr_t* frame_sp = NULL; + intptr_t* frame_fp = NULL; -+ address epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp); -+ return frame(frame_sp, frame_fp, epc); ++ ExtendedPC epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp); ++ return frame(frame_sp, frame_fp, epc.pc()); ++} ++ ++bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) { ++ address pc = (address) os::Linux::ucontext_get_pc(uc); ++ if (Interpreter::contains(pc)) { ++ // interpreter performs stack banging after the fixed frame header has ++ // been generated while the compilers perform it before. To maintain ++ // semantic consistency between interpreted and compiled frames, the ++ // method returns the Java sender of the current frame. ++ *fr = os::fetch_frame_from_context(uc); ++ if (!fr->is_first_java_frame()) { ++ assert(fr->safe_for_sender(thread), "Safety check"); ++ *fr = fr->java_sender(); ++ } ++ } else { ++ // more complex code with compiled code ++ assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above"); ++ CodeBlob* cb = CodeCache::find_blob(pc); ++ if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) { ++ // Not sure where the pc points to, fallback to default ++ // stack overflow handling ++ return false; ++ } else { ++ // In compiled code, the stack banging is performed before RA ++ // has been saved in the frame. RA is live, and SP and FP ++ // belong to the caller. ++ intptr_t* fp = os::Linux::ucontext_get_fp(uc); ++ intptr_t* sp = os::Linux::ucontext_get_sp(uc); ++ address pc = (address)(uc->uc_mcontext.__gregs[REG_LR] ++ - NativeInstruction::instruction_size); ++ *fr = frame(sp, fp, pc); ++ if (!fr->is_java_frame()) { ++ assert(fr->safe_for_sender(thread), "Safety check"); ++ assert(!fr->is_first_frame(), "Safety check"); ++ *fr = fr->java_sender(); ++ } ++ } ++ } ++ assert(fr->is_java_frame(), "Safety check"); ++ return true; +} + +// By default, gcc always saves frame pointer rfp on this stack. This @@ -57309,31 +52661,138 @@ index 00000000000..1f46bbab0a2 +} + +// Utility functions -+bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, -+ ucontext_t* uc, JavaThread* thread) { ++extern "C" JNIEXPORT int ++JVM_handle_linux_signal(int sig, ++ siginfo_t* info, ++ void* ucVoid, ++ int abort_if_unrecognized) { ++ ucontext_t* uc = (ucontext_t*) ucVoid; ++ ++ Thread* t = Thread::current_or_null_safe(); ++ ++ // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away ++ // (no destructors can be run) ++ os::ThreadCrashProtection::check_crash_protection(sig, t); ++ ++ SignalHandlerMark shm(t); ++ ++ // Note: it's not uncommon that JNI code uses signal/sigset to install ++ // then restore certain signal handler (e.g. to temporarily block SIGPIPE, ++ // or have a SIGILL handler when detecting CPU type). When that happens, ++ // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To ++ // avoid unnecessary crash when libjsig is not preloaded, try handle signals ++ // that do not require siginfo/ucontext first. ++ ++ if (sig == SIGPIPE || sig == SIGXFSZ) { ++ // allow chained handler to go first ++ if (os::Linux::chained_handler(sig, info, ucVoid)) { ++ return true; ++ } else { ++ // Ignoring SIGPIPE/SIGXFSZ - see bugs 4229104 or 6499219 ++ return true; ++ } ++ } ++ ++#ifdef CAN_SHOW_REGISTERS_ON_ASSERT ++ if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) { ++ if (handle_assert_poison_fault(ucVoid, info->si_addr)) { ++ return 1; ++ } ++ } ++#endif ++ ++ JavaThread* thread = NULL; ++ VMThread* vmthread = NULL; ++ if (os::Linux::signal_handlers_are_installed) { ++ if (t != NULL ){ ++ if(t->is_Java_thread()) { ++ thread = (JavaThread *) t; ++ } ++ else if(t->is_VM_thread()){ ++ vmthread = (VMThread *)t; ++ } ++ } ++ } ++ ++ // Handle SafeFetch faults ++ if ((sig == SIGSEGV || sig == SIGBUS) && uc != NULL) { ++ address const pc = (address) os::Linux::ucontext_get_pc(uc); ++ if (pc && StubRoutines::is_safefetch_fault(pc)) { ++ os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc)); ++ return 1; ++ } ++ } + + // decide if this trap can be handled by a stub + address stub = NULL; + -+ address pc = NULL; ++ address pc = NULL; + + //%note os_trap_1 + if (info != NULL && uc != NULL && thread != NULL) { -+ pc = (address) os::Posix::ucontext_get_pc(uc); -+ -+ address addr = (address) info->si_addr; -+ -+ // Make sure the high order byte is sign extended, as it may be masked away by the hardware. -+ if ((uintptr_t(addr) & (uintptr_t(1) << 55)) != 0) { -+ addr = address(uintptr_t(addr) | (uintptr_t(0xFF) << 56)); -+ } ++ pc = (address) os::Linux::ucontext_get_pc(uc); + + // Handle ALL stack overflow variations here + if (sig == SIGSEGV) { ++ address addr = (address) info->si_addr; ++ + // check if fault address is within thread stack -+ if (thread->is_in_full_stack(addr)) { -+ if (os::Posix::handle_stack_overflow(thread, addr, pc, uc, &stub)) { -+ return true; // continue ++ if (thread->on_local_stack(addr)) { ++ // stack overflow ++ if (thread->in_stack_yellow_reserved_zone(addr)) { ++ if (thread->thread_state() == _thread_in_Java) { ++ if (thread->in_stack_reserved_zone(addr)) { ++ frame fr; ++ if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) { ++ assert(fr.is_java_frame(), "Must be a Java frame"); ++ frame activation = ++ SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr); ++ if (activation.sp() != NULL) { ++ thread->disable_stack_reserved_zone(); ++ if (activation.is_interpreted_frame()) { ++ thread->set_reserved_stack_activation((address)( ++ activation.fp() + frame::interpreter_frame_initial_sp_offset)); ++ } else { ++ thread->set_reserved_stack_activation((address)activation.unextended_sp()); ++ } ++ return 1; ++ } ++ } ++ } ++ // Throw a stack overflow exception. Guard pages will be reenabled ++ // while unwinding the stack. ++ thread->disable_stack_yellow_reserved_zone(); ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW); ++ } else { ++ // Thread was in the vm or native code. Return and try to finish. ++ thread->disable_stack_yellow_reserved_zone(); ++ return 1; ++ } ++ } else if (thread->in_stack_red_zone(addr)) { ++ // Fatal red zone violation. Disable the guard pages and fall through ++ // to handle_unexpected_exception way down below. ++ thread->disable_stack_red_zone(); ++ tty->print_raw_cr("An irrecoverable stack overflow has occurred."); ++ ++ // This is a likely cause, but hard to verify. Let's just print ++ // it as a hint. ++ tty->print_raw_cr("Please check if any of your loaded .so files has " ++ "enabled executable stack (see man page execstack(8))"); ++ } else { ++ // Accessing stack address below sp may cause SEGV if current ++ // thread has MAP_GROWSDOWN stack. This should only happen when ++ // current thread was created by user code with MAP_GROWSDOWN flag ++ // and then attached to VM. See notes in os_linux.cpp. ++ if (thread->osthread()->expanding_stack() == 0) { ++ thread->osthread()->set_expanding_stack(); ++ if (os::Linux::manually_expand_stack(thread, addr)) { ++ thread->osthread()->clear_expanding_stack(); ++ return 1; ++ } ++ thread->osthread()->clear_expanding_stack(); ++ } else { ++ fatal("recursive segv. expanding stack."); ++ } + } + } + } @@ -57349,7 +52808,7 @@ index 00000000000..1f46bbab0a2 + tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL"); + } + stub = SharedRuntime::get_handle_wrong_method_stub(); -+ } else if (sig == SIGSEGV && SafepointMechanism::is_poll_address((address)info->si_addr)) { ++ } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) { + stub = SharedRuntime::get_poll_stub(pc); + } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) { + // BugId 4454115: A read from a MappedByteBuffer can fault @@ -57357,34 +52816,12 @@ index 00000000000..1f46bbab0a2 + // Do not crash the VM in such a case. + CodeBlob* cb = CodeCache::find_blob_unsafe(pc); + CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; -+ bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc)); -+ if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) { ++ if (nm != NULL && nm->has_unsafe_access()) { + address next_pc = pc + NativeCall::instruction_size; -+ if (is_unsafe_arraycopy) { -+ next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); -+ } + stub = SharedRuntime::handle_unsafe_access(thread, next_pc); + } -+ } else if (sig == SIGILL && nativeInstruction_at(pc)->is_stop()) { -+ // Pull a pointer to the error message out of the instruction -+ // stream. -+ const uint64_t *detail_msg_ptr -+ = (uint64_t*)(pc + NativeInstruction::instruction_size); -+ const char *detail_msg = (const char *)*detail_msg_ptr; -+ const char *msg = "stop"; -+ if (TraceTraps) { -+ tty->print_cr("trap: %s: (SIGILL)", msg); -+ } -+ -+ // End life with a fatal error, message and detail message and the context. -+ // Note: no need to do any post-processing here (e.g. signal chaining) -+ va_list va_dummy; -+ VMError::report_and_die(thread, uc, NULL, 0, msg, detail_msg, va_dummy); -+ va_end(va_dummy); -+ -+ ShouldNotReachHere(); + } else if (sig == SIGFPE && -+ (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) { ++ (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) { + stub = + SharedRuntime:: + continuation_for_implicit_exception(thread, @@ -57392,42 +52829,70 @@ index 00000000000..1f46bbab0a2 + SharedRuntime:: + IMPLICIT_DIVIDE_BY_ZERO); + } else if (sig == SIGSEGV && -+ MacroAssembler::uses_implicit_null_check((void*)addr)) { ++ !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) { + // Determination of interpreter/vtable stub/compiled code null exception + stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); + } -+ } else if ((thread->thread_state() == _thread_in_vm || -+ thread->thread_state() == _thread_in_native) && -+ sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ -+ thread->doing_unsafe_access()) { ++ } else if (thread->thread_state() == _thread_in_vm && ++ sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ ++ thread->doing_unsafe_access()) { + address next_pc = pc + NativeCall::instruction_size; -+ if (UnsafeCopyMemory::contains_pc(pc)) { -+ next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); -+ } + stub = SharedRuntime::handle_unsafe_access(thread, next_pc); + } + + // jni_fast_GetField can trap at certain pc's if a GC kicks in + // and the heap gets shrunk before the field access. + if ((sig == SIGSEGV) || (sig == SIGBUS)) { -+ address addr_slow = JNI_FastGetField::find_slowcase_pc(pc); -+ if (addr_slow != (address)-1) { -+ stub = addr_slow; ++ address addr = JNI_FastGetField::find_slowcase_pc(pc); ++ if (addr != (address)-1) { ++ stub = addr; + } + } ++ ++ // Check to see if we caught the safepoint code in the ++ // process of write protecting the memory serialization page. ++ // It write enables the page immediately after protecting it ++ // so we can just return to retry the write. ++ if ((sig == SIGSEGV) && ++ os::is_memory_serialize_page(thread, (address) info->si_addr)) { ++ // Block current thread until the memory serialize page permission restored. ++ os::block_on_serialize_page_trap(); ++ return true; ++ } + } + + if (stub != NULL) { + // save all thread context in case we need to restore it -+ if (thread != NULL) { -+ thread->set_saved_exception_pc(pc); -+ } ++ if (thread != NULL) thread->set_saved_exception_pc(pc); + -+ os::Posix::ucontext_set_pc(uc, stub); ++ os::Linux::ucontext_set_pc(uc, stub); + return true; + } + -+ return false; // Mute compiler ++ // signal-chaining ++ if (os::Linux::chained_handler(sig, info, ucVoid)) { ++ return true; ++ } ++ ++ if (!abort_if_unrecognized) { ++ // caller wants another chance, so give it to him ++ return false; ++ } ++ ++ if (pc == NULL && uc != NULL) { ++ pc = os::Linux::ucontext_get_pc(uc); ++ } ++ ++ // unmask current signal ++ sigset_t newset; ++ sigemptyset(&newset); ++ sigaddset(&newset, sig); ++ sigprocmask(SIG_UNBLOCK, &newset, NULL); ++ ++ VMError::report_and_die(t, sig, pc, info, ucVoid); ++ ++ ShouldNotReachHere(); ++ return true; // Mute compiler +} + +void os::Linux::init_thread_fpu_state(void) { @@ -57490,7 +52955,7 @@ index 00000000000..1f46bbab0a2 + // Note: it may be unsafe to inspect memory near pc. For example, pc may + // point to garbage if entry point in an nmethod is corrupted. Leave + // this at the end, and hope for the best. -+ address pc = os::Posix::ucontext_get_pc(uc); ++ address pc = os::Linux::ucontext_get_pc(uc); + print_instructions(st, pc, sizeof(char)); + st->cr(); +} @@ -57603,10 +53068,10 @@ index 00000000000..1f46bbab0a2 +}; diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp new file mode 100644 -index 00000000000..6d415630661 +index 0000000000..f3e3a73bc5 --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp -@@ -0,0 +1,59 @@ +@@ -0,0 +1,40 @@ +/* + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. @@ -57646,29 +53111,10 @@ index 00000000000..6d415630661 + *(jlong *) dst = *(const jlong *) src; + } + -+ // SYSCALL_RISCV_FLUSH_ICACHE is used to flush instruction cache. The "fence.i" instruction -+ // only work on the current hart, so kernel provides the icache flush syscall to flush icache -+ // on each hart. You can pass a flag to determine a global or local icache flush. -+ static void icache_flush(long int start, long int end) -+ { -+ const int SYSCALL_RISCV_FLUSH_ICACHE = 259; -+ register long int __a7 asm ("a7") = SYSCALL_RISCV_FLUSH_ICACHE; -+ register long int __a0 asm ("a0") = start; -+ register long int __a1 asm ("a1") = end; -+ // the flush can be applied to either all threads or only the current. -+ // 0 means a global icache flush, and the icache flush will be applied -+ // to other harts concurrently executing. -+ register long int __a2 asm ("a2") = 0; -+ __asm__ volatile ("ecall\n\t" -+ : "+r" (__a0) -+ : "r" (__a0), "r" (__a1), "r" (__a2), "r" (__a7) -+ : "memory"); -+ } -+ +#endif // OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP diff --git a/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp new file mode 100644 -index 00000000000..a6432c84ec7 +index 0000000000..2bd48e09c3 --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp @@ -0,0 +1,38 @@ @@ -57703,19 +53149,147 @@ index 00000000000..a6432c84ec7 +#include "runtime/prefetch.hpp" + + -+inline void Prefetch::read (const void *loc, intx interval) { ++inline void Prefetch::read (void *loc, intx interval) { +} + +inline void Prefetch::write(void *loc, intx interval) { +} + +#endif // OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.cpp b/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.cpp +new file mode 100644 +index 0000000000..ffcd819487 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.cpp +@@ -0,0 +1,77 @@ ++/* ++ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2023, Rivos Inc. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "logging/log.hpp" ++#include "riscv_flush_icache.hpp" ++#include "runtime/os.hpp" ++#include "runtime/vm_version.hpp" ++#include "utilities/debug.hpp" ++ ++#include ++#include ++ ++#define check_with_errno(check_type, cond, msg) \ ++ do { \ ++ int err = errno; \ ++ check_type(cond, "%s; error='%s' (errno=%s)", msg, os::strerror(err), \ ++ os::errno_name(err)); \ ++} while (false) ++ ++#define assert_with_errno(cond, msg) check_with_errno(assert, cond, msg) ++#define guarantee_with_errno(cond, msg) check_with_errno(guarantee, cond, msg) ++ ++#ifndef NR_riscv_flush_icache ++#ifndef NR_arch_specific_syscall ++#define NR_arch_specific_syscall 244 ++#endif ++#define NR_riscv_flush_icache (NR_arch_specific_syscall + 15) ++#endif ++ ++#define SYS_RISCV_FLUSH_ICACHE_LOCAL 1UL ++#define SYS_RISCV_FLUSH_ICACHE_ALL 0UL ++ ++static long sys_flush_icache(uintptr_t start, uintptr_t end , uintptr_t flags) { ++ return syscall(NR_riscv_flush_icache, start, end, flags); ++} ++ ++bool RiscvFlushIcache::test() { ++ ATTRIBUTE_ALIGNED(64) char memory[64]; ++ long ret = sys_flush_icache((uintptr_t)&memory[0], ++ (uintptr_t)&memory[sizeof(memory) - 1], ++ SYS_RISCV_FLUSH_ICACHE_ALL); ++ if (ret == 0) { ++ return true; ++ } ++ int err = errno; \ ++ log_error(os)("Syscall: RISCV_FLUSH_ICACHE not available; error='%s' (errno=%s)", ++ os::strerror(err), os::errno_name(err)); ++ return false; ++} ++ ++void RiscvFlushIcache::flush(uintptr_t start, uintptr_t end) { ++ long ret = sys_flush_icache(start, end, SYS_RISCV_FLUSH_ICACHE_ALL); ++ guarantee_with_errno(ret == 0, "riscv_flush_icache failed"); ++} +diff --git a/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.hpp b/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.hpp +new file mode 100644 +index 0000000000..f4e7263b39 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.hpp +@@ -0,0 +1,39 @@ ++/* ++ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2023, Rivos Inc. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_LINUX_RISCV_FLUSH_ICACHE_LINUX_HPP ++#define OS_LINUX_RISCV_FLUSH_ICACHE_LINUX_HPP ++ ++#include "memory/allocation.hpp" ++#include "runtime/vm_version.hpp" ++#include "utilities/growableArray.hpp" ++ ++class RiscvFlushIcache: public AllStatic { ++ public: ++ static bool test(); ++ static void flush(uintptr_t start, uintptr_t end); ++}; ++ ++#endif // OS_LINUX_RISCV_FLUSH_ICACHE_LINUX_HPP diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp new file mode 100644 -index 00000000000..3100572e9fd +index 0000000000..ccceed643e --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp -@@ -0,0 +1,92 @@ +@@ -0,0 +1,100 @@ +/* + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. @@ -57742,6 +53316,7 @@ index 00000000000..3100572e9fd + */ + +#include "precompiled.hpp" ++#include "memory/metaspaceShared.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/thread.inline.hpp" + @@ -57779,16 +53354,23 @@ index 00000000000..3100572e9fd + + intptr_t* ret_fp = NULL; + intptr_t* ret_sp = NULL; -+ address addr = os::fetch_frame_from_context(uc, &ret_sp, &ret_fp); -+ if (addr == NULL || ret_sp == NULL ) { ++ ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc, ++ &ret_sp, &ret_fp); ++ if (addr.pc() == NULL || ret_sp == NULL ) { + // ucontext wasn't useful + return false; + } + -+ frame ret_frame(ret_sp, ret_fp, addr); -+ if (!ret_frame.safe_for_sender(this)) { ++ if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) { ++ // In the middle of a trampoline call. Bail out for safety. ++ // This happens rarely so shouldn't affect profiling. ++ return false; ++ } ++ ++ frame ret_frame(ret_sp, ret_fp, addr.pc()); ++ if (!ret_frame.safe_for_sender(this)) { +#ifdef COMPILER2 -+ frame ret_frame2(ret_sp, NULL, addr); ++ frame ret_frame2(ret_sp, NULL, addr.pc()); + if (!ret_frame2.safe_for_sender(this)) { + // nothing else to try if the frame isn't good + return false; @@ -57810,10 +53392,10 @@ index 00000000000..3100572e9fd +void JavaThread::cache_global_variables() { } diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp new file mode 100644 -index 00000000000..61e2cf85b63 +index 0000000000..4b91fa855a --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp -@@ -0,0 +1,48 @@ +@@ -0,0 +1,67 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. @@ -57850,21 +53432,40 @@ index 00000000000..61e2cf85b63 + frame pd_last_frame(); + + public: ++ // Mutators are highly dangerous.... ++ intptr_t* last_Java_fp() { return _anchor.last_Java_fp(); } ++ void set_last_Java_fp(intptr_t* fp) { _anchor.set_last_Java_fp(fp); } ++ ++ void set_base_of_stack_pointer(intptr_t* base_sp) { ++ } ++ + static ByteSize last_Java_fp_offset() { + return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset(); + } + ++ intptr_t* base_of_stack_pointer() { ++ return NULL; ++ } ++ void record_base_of_stack_pointer() { ++ } ++ + bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, + bool isInJava); + + bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava); +private: + bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava); ++public: ++ // These routines are only used on cpu architectures that ++ // have separate register stacks (Itanium). ++ static bool register_stack_overflow() { return false; } ++ static void enable_register_stack_guard() {} ++ static void disable_register_stack_guard() {} + +#endif // OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP diff --git a/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp new file mode 100644 -index 00000000000..6cf7683a586 +index 0000000000..6cf7683a58 --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp @@ -0,0 +1,55 @@ @@ -57925,10 +53526,10 @@ index 00000000000..6cf7683a586 +#endif // OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp new file mode 100644 -index 00000000000..4623dbfad42 +index 0000000000..8bcc949fed --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp -@@ -0,0 +1,118 @@ +@@ -0,0 +1,137 @@ +/* + * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. @@ -57991,10 +53592,6 @@ index 00000000000..4623dbfad42 +#define HWCAP_ISA_V (1 << ('V' - 'A')) +#endif + -+#ifndef HWCAP_ISA_B -+#define HWCAP_ISA_B (1 << ('B' - 'A')) -+#endif -+ +#define read_csr(csr) \ +({ \ + register unsigned long __v; \ @@ -58010,18 +53607,35 @@ index 00000000000..4623dbfad42 + return (uint32_t)read_csr(CSR_VLENB); +} + ++VM_Version::VM_MODE VM_Version::get_satp_mode() { ++ if (!strcmp(_vm_mode, "sv39")) { ++ return VM_SV39; ++ } else if (!strcmp(_vm_mode, "sv48")) { ++ return VM_SV48; ++ } else if (!strcmp(_vm_mode, "sv57")) { ++ return VM_SV57; ++ } else if (!strcmp(_vm_mode, "sv64")) { ++ return VM_SV64; ++ } else { ++ return VM_MBARE; ++ } ++} ++ +void VM_Version::get_os_cpu_info() { + + uint64_t auxv = getauxval(AT_HWCAP); + -+ static_assert(CPU_I == HWCAP_ISA_I, "Flag CPU_I must follow Linux HWCAP"); -+ static_assert(CPU_M == HWCAP_ISA_M, "Flag CPU_M must follow Linux HWCAP"); -+ static_assert(CPU_A == HWCAP_ISA_A, "Flag CPU_A must follow Linux HWCAP"); -+ static_assert(CPU_F == HWCAP_ISA_F, "Flag CPU_F must follow Linux HWCAP"); -+ static_assert(CPU_D == HWCAP_ISA_D, "Flag CPU_D must follow Linux HWCAP"); -+ static_assert(CPU_C == HWCAP_ISA_C, "Flag CPU_C must follow Linux HWCAP"); -+ static_assert(CPU_V == HWCAP_ISA_V, "Flag CPU_V must follow Linux HWCAP"); -+ static_assert(CPU_B == HWCAP_ISA_B, "Flag CPU_B must follow Linux HWCAP"); ++ STATIC_ASSERT(CPU_I == HWCAP_ISA_I); ++ STATIC_ASSERT(CPU_M == HWCAP_ISA_M); ++ STATIC_ASSERT(CPU_A == HWCAP_ISA_A); ++ STATIC_ASSERT(CPU_F == HWCAP_ISA_F); ++ STATIC_ASSERT(CPU_D == HWCAP_ISA_D); ++ STATIC_ASSERT(CPU_C == HWCAP_ISA_C); ++ STATIC_ASSERT(CPU_V == HWCAP_ISA_V); ++ ++ // RISC-V has four bit-manipulation ISA-extensions: Zba/Zbb/Zbc/Zbs. ++ // Availability for those extensions could not be queried from HWCAP. ++ // TODO: Add proper detection for those extensions. + _features = auxv & ( + HWCAP_ISA_I | + HWCAP_ISA_M | @@ -58029,14 +53643,20 @@ index 00000000000..4623dbfad42 + HWCAP_ISA_F | + HWCAP_ISA_D | + HWCAP_ISA_C | -+ HWCAP_ISA_V | -+ HWCAP_ISA_B); ++ HWCAP_ISA_V); + + if (FILE *f = fopen("/proc/cpuinfo", "r")) { + char buf[512], *p; + while (fgets(buf, sizeof (buf), f) != NULL) { + if ((p = strchr(buf, ':')) != NULL) { -+ if (strncmp(buf, "uarch", sizeof "uarch" - 1) == 0) { ++ if (strncmp(buf, "mmu", sizeof "mmu" - 1) == 0) { ++ if (_vm_mode[0] != '\0') { ++ continue; ++ } ++ char* vm_mode = os::strdup(p + 2); ++ vm_mode[strcspn(vm_mode, "\n")] = '\0'; ++ _vm_mode = vm_mode; ++ } else if (strncmp(buf, "uarch", sizeof "uarch" - 1) == 0) { + char* uarch = os::strdup(p + 2); + uarch[strcspn(uarch, "\n")] = '\0'; + _uarch = uarch; @@ -58048,7 +53668,7 @@ index 00000000000..4623dbfad42 + } +} diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp -index e30d39f73d1..733ee9e654c 100644 +index e30d39f73d..c640c546b1 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp @@ -1,5 +1,5 @@ @@ -58058,48 +53678,59 @@ index e30d39f73d1..733ee9e654c 100644 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -199,7 +199,6 @@ bool LIR_OprDesc::is_oop() const { - void LIR_Op2::verify() const { +@@ -200,6 +200,9 @@ void LIR_Op2::verify() const { #ifdef ASSERT switch (code()) { -- case lir_cmove: + case lir_cmove: ++#ifdef RISCV ++ assert(false, "lir_cmove is LIR_Op4 on RISCV"); ++#endif case lir_xchg: break; -@@ -252,9 +251,7 @@ void LIR_Op2::verify() const { +@@ -252,9 +255,13 @@ void LIR_Op2::verify() const { LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block) -- : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) -- , _cond(cond) -- , _type(type) ++#ifdef RISCV + : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type) ++#else + : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) + , _cond(cond) + , _type(type) ++#endif , _label(block->label()) , _block(block) , _ublock(NULL) -@@ -262,9 +259,7 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block +@@ -262,9 +269,13 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block } LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) : -- LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) -- , _cond(cond) -- , _type(type) ++#ifdef RISCV + LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type) ++#else + LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) + , _cond(cond) + , _type(type) ++#endif , _label(stub->entry()) , _block(NULL) , _ublock(NULL) -@@ -272,9 +267,7 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) : +@@ -272,9 +283,13 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) : } LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock) -- : LIR_Op(lir_cond_float_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) -- , _cond(cond) -- , _type(type) ++#ifdef RISCV + : LIR_Op2(lir_cond_float_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type) ++#else + : LIR_Op(lir_cond_float_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) + , _cond(cond) + , _type(type) ++#endif , _label(block->label()) , _block(block) , _ublock(ublock) -@@ -296,13 +289,13 @@ void LIR_OpBranch::change_ublock(BlockBegin* b) { +@@ -296,13 +311,13 @@ void LIR_OpBranch::change_ublock(BlockBegin* b) { } void LIR_OpBranch::negate_cond() { @@ -58120,61 +53751,66 @@ index e30d39f73d1..733ee9e654c 100644 default: ShouldNotReachHere(); } } -@@ -525,6 +518,13 @@ void LIR_OpVisitState::visit(LIR_Op* op) { +@@ -525,6 +540,15 @@ void LIR_OpVisitState::visit(LIR_Op* op) { assert(op->as_OpBranch() != NULL, "must be"); LIR_OpBranch* opBranch = (LIR_OpBranch*)op; ++#ifdef RISCV + assert(opBranch->_tmp1->is_illegal() && opBranch->_tmp2->is_illegal() && + opBranch->_tmp3->is_illegal() && opBranch->_tmp4->is_illegal() && + opBranch->_tmp5->is_illegal(), "not used"); + + if (opBranch->_opr1->is_valid()) do_input(opBranch->_opr1); + if (opBranch->_opr2->is_valid()) do_input(opBranch->_opr2); ++#endif + if (opBranch->_info != NULL) do_info(opBranch->_info); assert(opBranch->_result->is_illegal(), "not used"); if (opBranch->_stub != NULL) opBranch->stub()->visit(this); -@@ -615,17 +615,19 @@ void LIR_OpVisitState::visit(LIR_Op* op) { +@@ -615,6 +639,21 @@ void LIR_OpVisitState::visit(LIR_Op* op) { // to the result operand, otherwise the backend fails case lir_cmove: { -- assert(op->as_Op2() != NULL, "must be"); -- LIR_Op2* op2 = (LIR_Op2*)op; ++#ifdef RISCV + assert(op->as_Op4() != NULL, "must be"); + LIR_Op4* op4 = (LIR_Op4*)op; - -- assert(op2->_info == NULL && op2->_tmp1->is_illegal() && op2->_tmp2->is_illegal() && -- op2->_tmp3->is_illegal() && op2->_tmp4->is_illegal() && op2->_tmp5->is_illegal(), "not used"); -- assert(op2->_opr1->is_valid() && op2->_opr2->is_valid() && op2->_result->is_valid(), "used"); ++ + assert(op4->_info == NULL && op4->_tmp1->is_illegal() && op4->_tmp2->is_illegal() && + op4->_tmp3->is_illegal() && op4->_tmp4->is_illegal() && op4->_tmp5->is_illegal(), "not used"); + assert(op4->_opr1->is_valid() && op4->_opr2->is_valid() && op4->_result->is_valid(), "used"); - -- do_input(op2->_opr1); -- do_input(op2->_opr2); -- do_temp(op2->_opr2); -- do_output(op2->_result); ++ + do_input(op4->_opr1); + do_input(op4->_opr2); + if (op4->_opr3->is_valid()) do_input(op4->_opr3); + if (op4->_opr4->is_valid()) do_input(op4->_opr4); + do_temp(op4->_opr2); + do_output(op4->_result); ++#else + assert(op->as_Op2() != NULL, "must be"); + LIR_Op2* op2 = (LIR_Op2*)op; + +@@ -626,6 +665,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) { + do_input(op2->_opr2); + do_temp(op2->_opr2); + do_output(op2->_result); ++#endif break; } -@@ -1048,6 +1050,10 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) { +@@ -1048,6 +1088,12 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) { masm->emit_op3(this); } ++#ifdef RISCV +void LIR_Op4::emit_code(LIR_Assembler* masm) { + masm->emit_op4(this); +} ++#endif + void LIR_OpLock::emit_code(LIR_Assembler* masm) { masm->emit_lock(this); if (stub()) { -@@ -1084,6 +1090,10 @@ LIR_List::LIR_List(Compilation* compilation, BlockBegin* block) +@@ -1084,6 +1130,10 @@ LIR_List::LIR_List(Compilation* compilation, BlockBegin* block) , _file(NULL) , _line(0) #endif @@ -58185,7 +53821,7 @@ index e30d39f73d1..733ee9e654c 100644 { } -@@ -1101,6 +1111,38 @@ void LIR_List::set_file_and_line(const char * file, int line) { +@@ -1101,6 +1151,38 @@ void LIR_List::set_file_and_line(const char * file, int line) { } #endif @@ -58224,45 +53860,34 @@ index e30d39f73d1..733ee9e654c 100644 void LIR_List::append(LIR_InsertionBuffer* buffer) { assert(this == buffer->lir_list(), "wrong lir list"); -@@ -1680,7 +1722,6 @@ const char * LIR_Op::name() const { - case lir_cmp_l2i: s = "cmp_l2i"; break; - case lir_ucmp_fd2i: s = "ucomp_fd2i"; break; - case lir_cmp_fd2i: s = "comp_fd2i"; break; -- case lir_cmove: s = "cmove"; break; - case lir_add: s = "add"; break; - case lir_sub: s = "sub"; break; - case lir_mul: s = "mul"; break; -@@ -1705,6 +1746,8 @@ const char * LIR_Op::name() const { - case lir_irem: s = "irem"; break; - case lir_fmad: s = "fmad"; break; - case lir_fmaf: s = "fmaf"; break; -+ // LIR_Op4 -+ case lir_cmove: s = "cmove"; break; - // LIR_OpJavaCall - case lir_static_call: s = "static"; break; - case lir_optvirtual_call: s = "optvirtual"; break; -@@ -1841,6 +1884,8 @@ void LIR_Op1::print_patch_code(outputStream* out, LIR_PatchCode code) { +@@ -1841,6 +1923,10 @@ void LIR_Op1::print_patch_code(outputStream* out, LIR_PatchCode code) { // LIR_OpBranch void LIR_OpBranch::print_instr(outputStream* out) const { print_condition(out, cond()); out->print(" "); ++#ifdef RISCV + in_opr1()->print(out); out->print(" "); + in_opr2()->print(out); out->print(" "); ++#endif if (block() != NULL) { out->print("[B%d] ", block()->block_id()); } else if (stub() != NULL) { -@@ -1927,7 +1972,7 @@ void LIR_OpRoundFP::print_instr(outputStream* out) const { +@@ -1927,7 +2013,11 @@ void LIR_OpRoundFP::print_instr(outputStream* out) const { // LIR_Op2 void LIR_Op2::print_instr(outputStream* out) const { -- if (code() == lir_cmove || code() == lir_cmp) { ++#ifdef RISCV + if (code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch) { ++#else + if (code() == lir_cmove || code() == lir_cmp) { ++#endif print_condition(out, condition()); out->print(" "); } in_opr1()->print(out); out->print(" "); -@@ -1978,6 +2023,15 @@ void LIR_Op3::print_instr(outputStream* out) const { +@@ -1978,6 +2068,17 @@ void LIR_Op3::print_instr(outputStream* out) const { result_opr()->print(out); } ++#ifdef RISCV +// LIR_Op4 +void LIR_Op4::print_instr(outputStream* out) const { + print_condition(out, condition()); out->print(" "); @@ -58272,11 +53897,12 @@ index e30d39f73d1..733ee9e654c 100644 + in_opr4()->print(out); out->print(" "); + result_opr()->print(out); +} ++#endif void LIR_OpLock::print_instr(outputStream* out) const { hdr_opr()->print(out); out->print(" "); diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp -index 3234ca018b7..efff6bf7a30 100644 +index 3234ca018b..33943e369d 100644 --- a/src/hotspot/share/c1/c1_LIR.hpp +++ b/src/hotspot/share/c1/c1_LIR.hpp @@ -1,5 +1,5 @@ @@ -58286,52 +53912,62 @@ index 3234ca018b7..efff6bf7a30 100644 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -867,6 +867,7 @@ class LIR_Op2; +@@ -867,6 +867,9 @@ class LIR_Op2; class LIR_OpDelay; class LIR_Op3; class LIR_OpAllocArray; ++#ifdef RISCV +class LIR_Op4; ++#endif class LIR_OpCall; class LIR_OpJavaCall; class LIR_OpRTCall; -@@ -916,8 +917,6 @@ enum LIR_Code { +@@ -916,8 +919,10 @@ enum LIR_Code { , lir_null_check , lir_return , lir_leal -- , lir_branch -- , lir_cond_float_branch ++#ifndef RISCV + , lir_branch + , lir_cond_float_branch ++#endif , lir_move , lir_convert , lir_alloc_object -@@ -929,11 +928,12 @@ enum LIR_Code { +@@ -929,11 +934,17 @@ enum LIR_Code { , lir_unwind , end_op1 , begin_op2 ++#ifdef RISCV + , lir_branch + , lir_cond_float_branch ++#endif , lir_cmp , lir_cmp_l2i , lir_ucmp_fd2i , lir_cmp_fd2i -- , lir_cmove ++#ifndef RISCV + , lir_cmove ++#endif , lir_add , lir_sub , lir_mul -@@ -964,6 +964,9 @@ enum LIR_Code { +@@ -964,6 +975,11 @@ enum LIR_Code { , lir_fmad , lir_fmaf , end_op3 ++#ifdef RISCV + , begin_op4 + , lir_cmove + , end_op4 ++#endif , begin_opJavaCall , lir_static_call , lir_optvirtual_call -@@ -1001,6 +1004,11 @@ enum LIR_Code { +@@ -1001,6 +1017,11 @@ enum LIR_Code { , begin_opAssert , lir_assert , end_opAssert -+#ifdef INCLUDE_ZGC ++#if defined(RISCV) && defined(INCLUDE_ZGC) + , begin_opZLoadBarrierTest + , lir_zloadbarrier_test + , end_opZLoadBarrierTest @@ -58339,15 +53975,17 @@ index 3234ca018b7..efff6bf7a30 100644 }; -@@ -1134,6 +1142,7 @@ class LIR_Op: public CompilationResourceObj { +@@ -1134,6 +1155,9 @@ class LIR_Op: public CompilationResourceObj { virtual LIR_Op1* as_Op1() { return NULL; } virtual LIR_Op2* as_Op2() { return NULL; } virtual LIR_Op3* as_Op3() { return NULL; } ++#ifdef RISCV + virtual LIR_Op4* as_Op4() { return NULL; } ++#endif virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; } virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; } virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; } -@@ -1410,51 +1419,6 @@ class LIR_OpRTCall: public LIR_OpCall { +@@ -1410,51 +1434,6 @@ class LIR_OpRTCall: public LIR_OpCall { virtual void verify() const; }; @@ -58399,7 +54037,7 @@ index 3234ca018b7..efff6bf7a30 100644 class ConversionStub; class LIR_OpConvert: public LIR_Op1 { -@@ -1614,19 +1578,19 @@ class LIR_Op2: public LIR_Op { +@@ -1614,19 +1593,19 @@ class LIR_Op2: public LIR_Op { void verify() const; public: @@ -58420,11 +54058,11 @@ index 3234ca018b7..efff6bf7a30 100644 - assert(code == lir_cmp || code == lir_assert, "code check"); + , _tmp5(LIR_OprFact::illegalOpr) + , _condition(condition) { -+ assert(code == lir_cmp || code == lir_branch || code == lir_cond_float_branch || code == lir_assert, "code check"); ++ assert(code == lir_cmp || code == lir_assert RISCV_ONLY(|| code == lir_branch || code == lir_cond_float_branch), "code check"); } LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) -@@ -1651,14 +1615,14 @@ class LIR_Op2: public LIR_Op { +@@ -1651,14 +1630,14 @@ class LIR_Op2: public LIR_Op { , _opr1(opr1) , _opr2(opr2) , _type(type) @@ -58438,11 +54076,11 @@ index 3234ca018b7..efff6bf7a30 100644 - assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check"); + , _tmp5(LIR_OprFact::illegalOpr) + , _condition(lir_cond_unknown) { -+ assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check"); ++ assert(code != lir_cmp && RISCV_ONLY(code != lir_branch && code != lir_cond_float_branch &&) is_in_range(code, begin_op2, end_op2), "code check"); } LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr, -@@ -1667,14 +1631,14 @@ class LIR_Op2: public LIR_Op { +@@ -1667,14 +1646,14 @@ class LIR_Op2: public LIR_Op { , _opr1(opr1) , _opr2(opr2) , _type(T_ILLEGAL) @@ -58456,31 +54094,45 @@ index 3234ca018b7..efff6bf7a30 100644 - assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check"); + , _tmp5(tmp5) + , _condition(lir_cond_unknown) { -+ assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check"); ++ assert(code != lir_cmp && RISCV_ONLY(code != lir_branch && code != lir_cond_float_branch &&) is_in_range(code, begin_op2, end_op2), "code check"); } LIR_Opr in_opr1() const { return _opr1; } -@@ -1686,10 +1650,10 @@ class LIR_Op2: public LIR_Op { +@@ -1686,10 +1665,18 @@ class LIR_Op2: public LIR_Op { LIR_Opr tmp4_opr() const { return _tmp4; } LIR_Opr tmp5_opr() const { return _tmp5; } LIR_Condition condition() const { -- assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition; ++#ifdef RISCV + assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch || code() == lir_assert, "only valid for branch and assert"); return _condition; ++#else + assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition; ++#endif } void set_condition(LIR_Condition condition) { -- assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove"); _condition = condition; ++#ifdef RISCV + assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch, "only valid for branch"); _condition = condition; ++#else + assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove"); _condition = condition; ++#endif } void set_fpu_stack_size(int size) { _fpu_stack_size = size; } -@@ -1703,6 +1667,51 @@ class LIR_Op2: public LIR_Op { +@@ -1703,6 +1690,65 @@ class LIR_Op2: public LIR_Op { virtual void print_instr(outputStream* out) const PRODUCT_RETURN; }; ++#ifdef RISCV +class LIR_OpBranch: public LIR_Op2 { ++#else ++class LIR_OpBranch: public LIR_Op { ++#endif + friend class LIR_OpVisitState; + + private: ++#ifndef RISCV ++ LIR_Condition _cond; ++ BasicType _type; ++#endif + Label* _label; + BlockBegin* _block; // if this is a branch to a block, this is the block + BlockBegin* _ublock; // if this is a float-branch, this is the unorderd block @@ -58488,7 +54140,13 @@ index 3234ca018b7..efff6bf7a30 100644 + + public: + LIR_OpBranch(LIR_Condition cond, BasicType type, Label* lbl) ++#ifdef RISCV + : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL, type) ++#else ++ : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL) ++ , _cond(cond) ++ , _type(type) ++#endif + , _label(lbl) + , _block(NULL) + , _ublock(NULL) @@ -58500,14 +54158,14 @@ index 3234ca018b7..efff6bf7a30 100644 + // for unordered comparisons + LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock); + -+ LIR_Condition cond() const { -+ return condition(); -+ } -+ -+ void set_cond(LIR_Condition cond) { -+ set_condition(cond); -+ } -+ ++#ifdef RISCV ++ LIR_Condition cond() const { return condition(); } ++ void set_cond(LIR_Condition cond) { set_condition(cond); } ++#else ++ LIR_Condition cond() const { return _cond; } ++ void set_cond(LIR_Condition cond) { _cond = cond; } ++#endif ++ BasicType type() const { return _type; } + Label* label() const { return _label; } + BlockBegin* block() const { return _block; } + BlockBegin* ublock() const { return _ublock; } @@ -58525,10 +54183,11 @@ index 3234ca018b7..efff6bf7a30 100644 class LIR_OpAllocArray : public LIR_Op { friend class LIR_OpVisitState; -@@ -1766,6 +1775,63 @@ class LIR_Op3: public LIR_Op { +@@ -1766,6 +1812,65 @@ class LIR_Op3: public LIR_Op { virtual void print_instr(outputStream* out) const PRODUCT_RETURN; }; ++#ifdef RISCV +class LIR_Op4: public LIR_Op { + friend class LIR_OpVisitState; + protected: @@ -58586,10 +54245,11 @@ index 3234ca018b7..efff6bf7a30 100644 + + virtual void print_instr(outputStream* out) const PRODUCT_RETURN; +}; ++#endif //-------------------------------- class LabelObj: public CompilationResourceObj { -@@ -1988,6 +2054,10 @@ class LIR_List: public CompilationResourceObj { +@@ -1988,6 +2093,10 @@ class LIR_List: public CompilationResourceObj { const char * _file; int _line; #endif @@ -58600,7 +54260,7 @@ index 3234ca018b7..efff6bf7a30 100644 public: void append(LIR_Op* op) { -@@ -2000,6 +2070,12 @@ class LIR_List: public CompilationResourceObj { +@@ -2000,6 +2109,12 @@ class LIR_List: public CompilationResourceObj { } #endif // PRODUCT @@ -58613,7 +54273,7 @@ index 3234ca018b7..efff6bf7a30 100644 _operations.append(op); #ifdef ASSERT -@@ -2016,6 +2092,10 @@ class LIR_List: public CompilationResourceObj { +@@ -2016,6 +2131,10 @@ class LIR_List: public CompilationResourceObj { void set_file_and_line(const char * file, int line); #endif @@ -58624,37 +54284,44 @@ index 3234ca018b7..efff6bf7a30 100644 //---------- accessors --------------- LIR_OpList* instructions_list() { return &_operations; } int length() const { return _operations.length(); } -@@ -2149,8 +2229,9 @@ class LIR_List: public CompilationResourceObj { +@@ -2149,9 +2268,16 @@ class LIR_List: public CompilationResourceObj { void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info); void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* addr, CodeEmitInfo* info); -- void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { -- append(new LIR_Op2(lir_cmove, condition, src1, src2, dst, type)); ++#ifdef RISCV + void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type, + LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr) { + append(new LIR_Op4(lir_cmove, condition, src1, src2, cmp_opr1, cmp_opr2, dst, type)); ++ } ++#else + void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { + append(new LIR_Op2(lir_cmove, condition, src1, src2, dst, type)); } ++#endif void cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value, + LIR_Opr t1, LIR_Opr t2, LIR_Opr result = LIR_OprFact::illegalOpr); diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp -index 160483d5f74..42a0350f7d9 100644 +index 160483d5f7..68aec26c1e 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.cpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp -@@ -709,10 +709,6 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { +@@ -709,9 +709,11 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { comp_fl2i(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op); break; -- case lir_cmove: -- cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type()); -- break; -- ++#ifndef RISCV + case lir_cmove: + cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type()); + break; ++#endif + case lir_shl: case lir_shr: - case lir_ushr: -@@ -776,6 +772,17 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { +@@ -776,6 +778,19 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { } } ++#ifdef RISCV +void LIR_Assembler::emit_op4(LIR_Op4* op) { + switch(op->code()) { + case lir_cmove: @@ -58666,34 +54333,40 @@ index 160483d5f74..42a0350f7d9 100644 + break; + } +} ++#endif void LIR_Assembler::build_frame() { _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp -index 44a5bcbe542..c677bd346fc 100644 +index 44a5bcbe54..baeb4aa442 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.hpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp -@@ -190,6 +190,7 @@ class LIR_Assembler: public CompilationResourceObj { +@@ -190,6 +190,9 @@ class LIR_Assembler: public CompilationResourceObj { void emit_op1(LIR_Op1* op); void emit_op2(LIR_Op2* op); void emit_op3(LIR_Op3* op); ++#ifdef RISCV + void emit_op4(LIR_Op4* op); ++#endif void emit_opBranch(LIR_OpBranch* op); void emit_opLabel(LIR_OpLabel* op); void emit_arraycopy(LIR_OpArrayCopy* op); -@@ -222,8 +223,8 @@ class LIR_Assembler: public CompilationResourceObj { +@@ -222,8 +225,12 @@ class LIR_Assembler: public CompilationResourceObj { void volatile_move_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info); void comp_mem_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info); // info set for null exceptions void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op); -- void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type); -- ++#ifdef RISCV + void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type, + LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr); ++#else + void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type); +- ++#endif void call( LIR_OpJavaCall* op, relocInfo::relocType rtype); void ic_call( LIR_OpJavaCall* op); void vtable_call( LIR_OpJavaCall* op); diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp -index c28055fd996..a4dfe8552ae 100644 +index acc969ac9c..512b63c744 100644 --- a/src/hotspot/share/c1/c1_LinearScan.cpp +++ b/src/hotspot/share/c1/c1_LinearScan.cpp @@ -1,5 +1,5 @@ @@ -58703,22 +54376,21 @@ index c28055fd996..a4dfe8552ae 100644 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -1242,11 +1242,11 @@ void LinearScan::add_register_hints(LIR_Op* op) { +@@ -1242,8 +1242,13 @@ void LinearScan::add_register_hints(LIR_Op* op) { break; } case lir_cmove: { -- assert(op->as_Op2() != NULL, "lir_cmove must be LIR_Op2"); -- LIR_Op2* cmove = (LIR_Op2*)op; ++#ifdef RISCV + assert(op->as_Op4() != NULL, "lir_cmove must be LIR_Op4"); + LIR_Op4* cmove = (LIR_Op4*)op; ++#else + assert(op->as_Op2() != NULL, "lir_cmove must be LIR_Op2"); + LIR_Op2* cmove = (LIR_Op2*)op; ++#endif LIR_Opr move_from = cmove->in_opr1(); -- LIR_Opr move_to = cmove->result_opr(); -+ LIR_Opr move_to = cmove->result_opr(); - - if (move_to->is_register() && move_from->is_register()) { - Interval* from = interval_at(reg_num(move_from)); -@@ -3140,6 +3140,9 @@ void LinearScan::do_linear_scan() { + LIR_Opr move_to = cmove->result_opr(); +@@ -3148,6 +3153,9 @@ void LinearScan::do_linear_scan() { } } @@ -58728,7 +54400,7 @@ index c28055fd996..a4dfe8552ae 100644 { TIME_LINEAR_SCAN(timer_optimize_lir); EdgeMoveOptimizer::optimize(ir()->code()); -@@ -3147,6 +3150,7 @@ void LinearScan::do_linear_scan() { +@@ -3155,6 +3163,7 @@ void LinearScan::do_linear_scan() { // check that cfg is still correct after optimizations ir()->verify(); } @@ -58736,26 +54408,32 @@ index c28055fd996..a4dfe8552ae 100644 NOT_PRODUCT(print_lir(1, "Before Code Generation", false)); NOT_PRODUCT(LinearScanStatistic::compute(this, _stat_final)); -@@ -6284,14 +6288,14 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) { +@@ -6292,14 +6301,23 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) { // There might be a cmove inserted for profiling which depends on the same // compare. If we change the condition of the respective compare, we have // to take care of this cmove as well. -- LIR_Op2* prev_cmove = NULL; ++#ifdef RISCV + LIR_Op4* prev_cmove = NULL; ++#else + LIR_Op2* prev_cmove = NULL; ++#endif for(int j = instructions->length() - 3; j >= 0 && prev_cmp == NULL; j--) { prev_op = instructions->at(j); // check for the cmove if (prev_op->code() == lir_cmove) { -- assert(prev_op->as_Op2() != NULL, "cmove must be of type LIR_Op2"); -- prev_cmove = (LIR_Op2*)prev_op; ++#ifdef RISCV + assert(prev_op->as_Op4() != NULL, "cmove must be of type LIR_Op4"); + prev_cmove = (LIR_Op4*)prev_op; ++#else + assert(prev_op->as_Op2() != NULL, "cmove must be of type LIR_Op2"); + prev_cmove = (LIR_Op2*)prev_op; ++#endif assert(prev_branch->cond() == prev_cmove->condition(), "should be the same"); } if (prev_op->code() == lir_cmp) { diff --git a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp -index 4771a8b8652..6d377fa005d 100644 +index 4771a8b865..6d377fa005 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp @@ -1,5 +1,5 @@ @@ -58775,7 +54453,7 @@ index 4771a8b8652..6d377fa005d 100644 #endif diff --git a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp -index 9f8ce742433..f36dd612eff 100644 +index 9f8ce74243..125cc169be 100644 --- a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp +++ b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp @@ -1,5 +1,5 @@ @@ -58785,17 +54463,20 @@ index 9f8ce742433..f36dd612eff 100644 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -100,7 +100,7 @@ class LIR_OpZLoadBarrierTest : public LIR_Op { +@@ -100,7 +100,11 @@ private: public: LIR_OpZLoadBarrierTest(LIR_Opr opr) : -- LIR_Op(), ++#ifdef RISCV + LIR_Op(lir_zloadbarrier_test, LIR_OprFact::illegalOpr, NULL), ++#else + LIR_Op(), ++#endif _opr(opr) {} virtual void visit(LIR_OpVisitState* state) { diff --git a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp -index e01a242a57e..ff16de0e778 100644 +index e01a242a57..ff16de0e77 100644 --- a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp +++ b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp @@ -102,7 +102,7 @@ inline T JfrBigEndian::read_unaligned(const address location) { @@ -58807,19 +54488,8 @@ index e01a242a57e..ff16de0e778 100644 return false; #else #warning "Unconfigured platform" -diff --git a/src/hotspot/share/opto/regmask.hpp b/src/hotspot/share/opto/regmask.hpp -index c64d0879592..bc856d4b617 100644 ---- a/src/hotspot/share/opto/regmask.hpp -+++ b/src/hotspot/share/opto/regmask.hpp -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it diff --git a/src/hotspot/share/runtime/abstract_vm_version.cpp b/src/hotspot/share/runtime/abstract_vm_version.cpp -index c46247f2bdb..b5e64b65ff7 100644 +index a383297611..5e9228e705 100644 --- a/src/hotspot/share/runtime/abstract_vm_version.cpp +++ b/src/hotspot/share/runtime/abstract_vm_version.cpp @@ -196,7 +196,8 @@ const char* Abstract_VM_Version::jre_release_version() { @@ -58832,22 +54502,11 @@ index c46247f2bdb..b5e64b65ff7 100644 #endif // !ZERO #endif // !CPU -diff --git a/src/hotspot/share/runtime/synchronizer.cpp b/src/hotspot/share/runtime/synchronizer.cpp -index e7b32723e47..434826853ee 100644 ---- a/src/hotspot/share/runtime/synchronizer.cpp -+++ b/src/hotspot/share/runtime/synchronizer.cpp -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 1998, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 1998, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it diff --git a/src/hotspot/share/runtime/thread.hpp b/src/hotspot/share/runtime/thread.hpp -index aa914eccafc..a2f98e6a251 100644 +index 34c8d98362..7cf95058fe 100644 --- a/src/hotspot/share/runtime/thread.hpp +++ b/src/hotspot/share/runtime/thread.hpp -@@ -1234,7 +1234,7 @@ class JavaThread: public Thread { +@@ -1259,7 +1259,7 @@ class JavaThread: public Thread { address last_Java_pc(void) { return _anchor.last_Java_pc(); } // Safepoint support @@ -58857,7 +54516,7 @@ index aa914eccafc..a2f98e6a251 100644 void set_thread_state(JavaThreadState s) { assert(current_or_null() == NULL || current_or_null() == this, diff --git a/src/hotspot/share/runtime/thread.inline.hpp b/src/hotspot/share/runtime/thread.inline.hpp -index dee8534f739..9af07aeb459 100644 +index dee8534f73..9af07aeb45 100644 --- a/src/hotspot/share/runtime/thread.inline.hpp +++ b/src/hotspot/share/runtime/thread.inline.hpp @@ -1,5 +1,5 @@ @@ -58877,10 +54536,10 @@ index dee8534f739..9af07aeb459 100644 return (JavaThreadState) OrderAccess::load_acquire((volatile jint*)&_thread_state); } diff --git a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp -index cf802538689..e8ab3097ac7 100644 +index 6605ab367c..7f1bcff6b3 100644 --- a/src/hotspot/share/utilities/macros.hpp +++ b/src/hotspot/share/utilities/macros.hpp -@@ -597,6 +597,32 @@ +@@ -601,6 +601,32 @@ #define MACOS_AARCH64_ONLY(x) MACOS_ONLY(AARCH64_ONLY(x)) @@ -58914,7 +54573,7 @@ index cf802538689..e8ab3097ac7 100644 #define LITTLE_ENDIAN_ONLY(code) code #define BIG_ENDIAN_ONLY(code) diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c -index 0d834302c57..45a927fb5ee 100644 +index 0d834302c5..45a927fb5e 100644 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c @@ -1,5 +1,5 @@ @@ -59000,7 +54659,7 @@ index 0d834302c57..45a927fb5ee 100644 #define REG_INDEX(reg) sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_##reg diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h -index 8318e8e0213..ab092d4ee33 100644 +index 8318e8e021..ab092d4ee3 100644 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h @@ -1,5 +1,5 @@ @@ -59020,10 +54679,10 @@ index 8318e8e0213..ab092d4ee33 100644 // This C bool type must be int for compatibility with Linux calls and diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java -index 0f5f0119c73..9bff9ee9b15 100644 +index 0f5f0119c7..9bff9ee9b1 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java -@@ -36,6 +36,7 @@ +@@ -36,6 +36,7 @@ import sun.jvm.hotspot.debugger.MachineDescription; import sun.jvm.hotspot.debugger.MachineDescriptionAMD64; import sun.jvm.hotspot.debugger.MachineDescriptionPPC64; import sun.jvm.hotspot.debugger.MachineDescriptionAArch64; @@ -59031,7 +54690,7 @@ index 0f5f0119c73..9bff9ee9b15 100644 import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86; import sun.jvm.hotspot.debugger.MachineDescriptionSPARC32Bit; import sun.jvm.hotspot.debugger.MachineDescriptionSPARC64Bit; -@@ -598,6 +599,8 @@ private void setupDebuggerLinux() { +@@ -598,6 +599,8 @@ public class HotSpotAgent { } else { machDesc = new MachineDescriptionSPARC32Bit(); } @@ -59042,7 +54701,7 @@ index 0f5f0119c73..9bff9ee9b15 100644 machDesc = (MachineDescription) diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java new file mode 100644 -index 00000000000..a972516dee3 +index 0000000000..a972516dee --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java @@ -0,0 +1,40 @@ @@ -59087,7 +54746,7 @@ index 00000000000..a972516dee3 + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java -index 5e5a6bb7141..dc0bcb3da94 100644 +index 5e5a6bb714..dc0bcb3da9 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java @@ -1,5 +1,5 @@ @@ -59097,7 +54756,7 @@ index 5e5a6bb7141..dc0bcb3da94 100644 * Copyright (c) 2015, Red Hat Inc. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * -@@ -34,12 +34,14 @@ +@@ -34,12 +34,14 @@ import sun.jvm.hotspot.debugger.x86.*; import sun.jvm.hotspot.debugger.amd64.*; import sun.jvm.hotspot.debugger.aarch64.*; import sun.jvm.hotspot.debugger.sparc.*; @@ -59112,7 +54771,7 @@ index 5e5a6bb7141..dc0bcb3da94 100644 import sun.jvm.hotspot.utilities.*; class LinuxCDebugger implements CDebugger { -@@ -116,7 +118,14 @@ public CFrame topFrameForThread(ThreadProxy thread) throws DebuggerException { +@@ -116,7 +118,14 @@ class LinuxCDebugger implements CDebugger { Address pc = context.getRegisterAsAddress(AARCH64ThreadContext.PC); if (pc == null) return null; return new LinuxAARCH64CFrame(dbg, fp, pc); @@ -59130,7 +54789,7 @@ index 5e5a6bb7141..dc0bcb3da94 100644 return context.getTopFrame(dbg); diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java new file mode 100644 -index 00000000000..f06da24bd0e +index 0000000000..f06da24bd0 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java @@ -0,0 +1,90 @@ @@ -59226,7 +54885,7 @@ index 00000000000..f06da24bd0e +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java new file mode 100644 -index 00000000000..fdb841ccf3d +index 0000000000..fdb841ccf3 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java @@ -0,0 +1,48 @@ @@ -59280,7 +54939,7 @@ index 00000000000..fdb841ccf3d +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java new file mode 100644 -index 00000000000..96d5dee47ce +index 0000000000..96d5dee47c --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java @@ -0,0 +1,88 @@ @@ -59374,7 +55033,7 @@ index 00000000000..96d5dee47ce +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java new file mode 100644 -index 00000000000..f2aa845e665 +index 0000000000..f2aa845e66 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java @@ -0,0 +1,48 @@ @@ -59428,7 +55087,7 @@ index 00000000000..f2aa845e665 +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java new file mode 100644 -index 00000000000..19f64b8ce2d +index 0000000000..19f64b8ce2 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java @@ -0,0 +1,46 @@ @@ -59480,7 +55139,7 @@ index 00000000000..19f64b8ce2d +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java new file mode 100644 -index 00000000000..aecbda59023 +index 0000000000..aecbda5902 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java @@ -0,0 +1,55 @@ @@ -59541,7 +55200,7 @@ index 00000000000..aecbda59023 +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java new file mode 100644 -index 00000000000..1d3da6be5af +index 0000000000..1d3da6be5a --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java @@ -0,0 +1,48 @@ @@ -59595,7 +55254,7 @@ index 00000000000..1d3da6be5af +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java new file mode 100644 -index 00000000000..725b94e25a3 +index 0000000000..725b94e25a --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java @@ -0,0 +1,46 @@ @@ -59645,11 +55304,11 @@ index 00000000000..725b94e25a3 + return new RemoteRISCV64Thread(debugger, id); + } +} -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java new file mode 100644 -index 00000000000..fb60a70427a +index 0000000000..fb60a70427 --- /dev/null -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. @@ -59824,7 +55483,7 @@ index 00000000000..fb60a70427a + public abstract Address getRegisterAsAddress(int index); +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java -index 190062785a7..89d676fe3b9 100644 +index 190062785a..89d676fe3b 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java @@ -1,5 +1,5 @@ @@ -59834,7 +55493,7 @@ index 190062785a7..89d676fe3b9 100644 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -38,6 +38,7 @@ +@@ -38,6 +38,7 @@ import sun.jvm.hotspot.runtime.win32_aarch64.Win32AARCH64JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_x86.LinuxX86JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess; @@ -59842,7 +55501,7 @@ index 190062785a7..89d676fe3b9 100644 import sun.jvm.hotspot.runtime.linux_ppc64.LinuxPPC64JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_sparc.LinuxSPARCJavaThreadPDAccess; import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess; -@@ -99,6 +100,8 @@ private static synchronized void initialize(TypeDataBase db) { +@@ -99,6 +100,8 @@ public class Threads { access = new LinuxPPC64JavaThreadPDAccess(); } else if (cpu.equals("aarch64")) { access = new LinuxAARCH64JavaThreadPDAccess(); @@ -59853,10 +55512,10 @@ index 190062785a7..89d676fe3b9 100644 access = (JavaThreadPDAccess) diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java new file mode 100644 -index 00000000000..f2e224f28ee +index 0000000000..5c2b6e0e3e --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java -@@ -0,0 +1,134 @@ +@@ -0,0 +1,132 @@ +/* + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. @@ -59893,8 +55552,6 @@ index 00000000000..f2e224f28ee +import sun.jvm.hotspot.runtime.riscv64.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.utilities.*; -+import sun.jvm.hotspot.utilities.Observable; -+import sun.jvm.hotspot.utilities.Observer; + +public class LinuxRISCV64JavaThreadPDAccess implements JavaThreadPDAccess { + private static AddressField lastJavaFPField; @@ -59993,7 +55650,7 @@ index 00000000000..f2e224f28ee +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java new file mode 100644 -index 00000000000..34701c6922f +index 0000000000..34701c6922 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java @@ -0,0 +1,223 @@ @@ -60222,10 +55879,10 @@ index 00000000000..34701c6922f +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java new file mode 100644 -index 00000000000..df280005d72 +index 0000000000..e372bc5f7b --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java -@@ -0,0 +1,556 @@ +@@ -0,0 +1,554 @@ +/* + * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2019, Red Hat Inc. @@ -60262,8 +55919,6 @@ index 00000000000..df280005d72 +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.utilities.*; -+import sun.jvm.hotspot.utilities.Observable; -+import sun.jvm.hotspot.utilities.Observer; + +/** Specialization of and implementation of abstract methods of the + Frame class for the riscv64 family of CPUs. */ @@ -60596,19951 +56251,375 @@ index 00000000000..df280005d72 + } + + private Frame senderForCompiledFrame(RISCV64RegisterMap map, CodeBlob cb) { -+ if (DEBUG) { -+ System.out.println("senderForCompiledFrame"); -+ } -+ -+ // -+ // NOTE: some of this code is (unfortunately) duplicated RISCV64CurrentFrameGuess -+ // -+ -+ if (Assert.ASSERTS_ENABLED) { -+ Assert.that(map != null, "map must be set"); -+ } -+ -+ // frame owned by optimizing compiler -+ if (Assert.ASSERTS_ENABLED) { -+ Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size"); -+ } -+ Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize()); -+ -+ // The return_address is always the word on the stack -+ Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize()); -+ -+ // This is the saved value of FP which may or may not really be an FP. -+ // It is only an FP if the sender is an interpreter frame. -+ Address savedFPAddr = senderSP.addOffsetTo(-2 * VM.getVM().getAddressSize()); -+ -+ if (map.getUpdateMap()) { -+ // Tell GC to use argument oopmaps for some runtime stubs that need it. -+ // For C1, the runtime stub might not have oop maps, so set this flag -+ // outside of update_register_map. -+ map.setIncludeArgumentOops(cb.callerMustGCArguments()); -+ -+ if (cb.getOopMaps() != null) { -+ ImmutableOopMapSet.updateRegisterMap(this, cb, map, true); -+ } -+ -+ // Since the prolog does the save and restore of FP there is no oopmap -+ // for it so we must fill in its location as if there was an oopmap entry -+ // since if our caller was compiled code there could be live jvm state in it. -+ updateMapWithSavedLink(map, savedFPAddr); -+ } -+ -+ return new RISCV64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC); -+ } -+ -+ protected boolean hasSenderPD() { -+ return true; -+ } -+ -+ public long frameSize() { -+ return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize()); -+ } -+ -+ public Address getLink() { -+ try { -+ if (DEBUG) { -+ System.out.println("Reading link at " + addressOfStackSlot(LINK_OFFSET) -+ + " = " + addressOfStackSlot(LINK_OFFSET).getAddressAt(0)); -+ } -+ return addressOfStackSlot(LINK_OFFSET).getAddressAt(0); -+ } catch (Exception e) { -+ if (DEBUG) -+ System.out.println("Returning null"); -+ return null; -+ } -+ } -+ -+ public Address getUnextendedSP() { return raw_unextendedSP; } -+ -+ // Return address: -+ public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); } -+ public Address getSenderPC() { return getSenderPCAddr().getAddressAt(0); } -+ -+ // return address of param, zero origin index. -+ public Address getNativeParamAddr(int idx) { -+ return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx); -+ } -+ -+ public Address getSenderSP() { return addressOfStackSlot(SENDER_SP_OFFSET); } -+ -+ public Address addressOfInterpreterFrameLocals() { -+ return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET); -+ } -+ -+ private Address addressOfInterpreterFrameBCX() { -+ return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET); -+ } -+ -+ public int getInterpreterFrameBCI() { -+ // FIXME: this is not atomic with respect to GC and is unsuitable -+ // for use in a non-debugging, or reflective, system. Need to -+ // figure out how to express this. -+ Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0); -+ Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0); -+ Method method = (Method)Metadata.instantiateWrapperFor(methodHandle); -+ return bcpToBci(bcp, method); -+ } -+ -+ public Address addressOfInterpreterFrameMDX() { -+ return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET); -+ } -+ -+ // expression stack -+ // (the max_stack arguments are used by the GC; see class FrameClosure) -+ -+ public Address addressOfInterpreterFrameExpressionStack() { -+ Address monitorEnd = interpreterFrameMonitorEnd().address(); -+ return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize()); -+ } -+ -+ public int getInterpreterFrameExpressionStackDirection() { return -1; } -+ -+ // top of expression stack -+ public Address addressOfInterpreterFrameTOS() { -+ return getSP(); -+ } -+ -+ /** Expression stack from top down */ -+ public Address addressOfInterpreterFrameTOSAt(int slot) { -+ return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize()); -+ } -+ -+ public Address getInterpreterFrameSenderSP() { -+ if (Assert.ASSERTS_ENABLED) { -+ Assert.that(isInterpretedFrame(), "interpreted frame expected"); -+ } -+ return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); -+ } -+ -+ // Monitors -+ public BasicObjectLock interpreterFrameMonitorBegin() { -+ return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET)); -+ } -+ -+ public BasicObjectLock interpreterFrameMonitorEnd() { -+ Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0); -+ if (Assert.ASSERTS_ENABLED) { -+ // make sure the pointer points inside the frame -+ Assert.that(AddressOps.gt(getFP(), result), "result must < than frame pointer"); -+ Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer"); -+ } -+ return new BasicObjectLock(result); -+ } -+ -+ public int interpreterFrameMonitorSize() { -+ return BasicObjectLock.size(); -+ } -+ -+ // Method -+ public Address addressOfInterpreterFrameMethod() { -+ return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET); -+ } -+ -+ // Constant pool cache -+ public Address addressOfInterpreterFrameCPCache() { -+ return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET); -+ } -+ -+ // Entry frames -+ public JavaCallWrapper getEntryFrameCallWrapper() { -+ return new RISCV64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0)); -+ } -+ -+ protected Address addressOfSavedOopResult() { -+ // offset is 2 for compiler2 and 3 for compiler1 -+ return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) * -+ VM.getVM().getAddressSize()); -+ } -+ -+ protected Address addressOfSavedReceiver() { -+ return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); -+ } -+ -+ private void dumpStack() { -+ for (Address addr = getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); -+ AddressOps.lt(addr, getSP()); -+ addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { -+ System.out.println(addr + ": " + addr.getAddressAt(0)); -+ } -+ System.out.println("-----------------------"); -+ for (Address addr = getSP(); -+ AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize())); -+ addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { -+ System.out.println(addr + ": " + addr.getAddressAt(0)); -+ } -+ } -+} -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java -new file mode 100644 -index 00000000000..d0ad2b559a6 ---- /dev/null -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java -@@ -0,0 +1,61 @@ -+/* -+ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, Red Hat Inc. -+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+package sun.jvm.hotspot.runtime.riscv64; -+ -+import java.util.*; -+import sun.jvm.hotspot.debugger.*; -+import sun.jvm.hotspot.types.*; -+import sun.jvm.hotspot.runtime.*; -+import sun.jvm.hotspot.utilities.*; -+import sun.jvm.hotspot.utilities.Observable; -+import sun.jvm.hotspot.utilities.Observer; -+ -+public class RISCV64JavaCallWrapper extends JavaCallWrapper { -+ private static AddressField lastJavaFPField; -+ -+ static { -+ VM.registerVMInitializedObserver(new Observer() { -+ public void update(Observable o, Object data) { -+ initialize(VM.getVM().getTypeDataBase()); -+ } -+ }); -+ } -+ -+ private static synchronized void initialize(TypeDataBase db) { -+ Type type = db.lookupType("JavaFrameAnchor"); -+ -+ lastJavaFPField = type.getAddressField("_last_Java_fp"); -+ } -+ -+ public RISCV64JavaCallWrapper(Address addr) { -+ super(addr); -+ } -+ -+ public Address getLastJavaFP() { -+ return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset())); -+ } -+} -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java -new file mode 100644 -index 00000000000..4aeb1c6f557 ---- /dev/null -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java -@@ -0,0 +1,53 @@ -+/* -+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, Red Hat Inc. -+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+package sun.jvm.hotspot.runtime.riscv64; -+ -+import sun.jvm.hotspot.debugger.*; -+import sun.jvm.hotspot.runtime.*; -+ -+public class RISCV64RegisterMap extends RegisterMap { -+ -+ /** This is the only public constructor */ -+ public RISCV64RegisterMap(JavaThread thread, boolean updateMap) { -+ super(thread, updateMap); -+ } -+ -+ protected RISCV64RegisterMap(RegisterMap map) { -+ super(map); -+ } -+ -+ public Object clone() { -+ RISCV64RegisterMap retval = new RISCV64RegisterMap(this); -+ return retval; -+ } -+ -+ // no PD state to clear or copy: -+ protected void clearPD() {} -+ protected void initializePD() {} -+ protected void initializeFromPD(RegisterMap map) {} -+ protected Address getLocationPD(VMReg reg) { return null; } -+} -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java -index 7d7a6107cab..6552ce255fc 100644 ---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -54,7 +54,7 @@ public static String getOS() throws UnsupportedPlatformException { - - public static boolean knownCPU(String cpu) { - final String[] KNOWN = -- new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64"}; -+ new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64", "riscv64"}; - - for(String s : KNOWN) { - if(s.equals(cpu)) -diff --git a/test/hotspot/jtreg/compiler/c2/TestBit.java b/test/hotspot/jtreg/compiler/c2/TestBit.java -index 7805918c28a..823b9f39dbf 100644 ---- a/test/hotspot/jtreg/compiler/c2/TestBit.java -+++ b/test/hotspot/jtreg/compiler/c2/TestBit.java -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -34,7 +34,7 @@ - * - * @run driver compiler.c2.TestBit - * -- * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le" -+ * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le" | os.arch == "riscv64" - * @requires vm.debug == true & vm.compiler2.enabled - */ - public class TestBit { -@@ -54,7 +54,8 @@ static void runTest(String testName) throws Exception { - String expectedTestBitInstruction = - "ppc64le".equals(System.getProperty("os.arch")) ? "ANDI" : - "aarch64".equals(System.getProperty("os.arch")) ? "tb" : -- "amd64".equals(System.getProperty("os.arch")) ? "test" : null; -+ "amd64".equals(System.getProperty("os.arch")) ? "test" : -+ "riscv64".equals(System.getProperty("os.arch")) ? "andi" : null; - - if (expectedTestBitInstruction != null) { - output.shouldContain(expectedTestBitInstruction); -diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java -index 558b4218f0b..55374b116e6 100644 ---- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java -+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -42,6 +42,7 @@ - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; -+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; - import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU; - -@@ -54,6 +55,8 @@ public static void main(String args[]) throws Throwable { - SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), - new GenericTestCaseForUnsupportedAArch64CPU( - SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), -+ new GenericTestCaseForUnsupportedRISCV64CPU( -+ SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), - new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU( - SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), - new GenericTestCaseForOtherCPU( -diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java -index 3ed72bf0a99..8fb82ee4531 100644 ---- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java -+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -42,6 +42,7 @@ - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; -+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; - import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU; - -@@ -54,6 +55,8 @@ public static void main(String args[]) throws Throwable { - SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), - new GenericTestCaseForUnsupportedAArch64CPU( - SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), -+ new GenericTestCaseForUnsupportedRISCV64CPU( -+ SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), - new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU( - SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), - new GenericTestCaseForOtherCPU( -diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java -index c05cf309dae..aca32137eda 100644 ---- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java -+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -42,6 +42,7 @@ - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; -+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; - import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU; - -@@ -54,6 +55,8 @@ public static void main(String args[]) throws Throwable { - SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), - new GenericTestCaseForUnsupportedAArch64CPU( - SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), -+ new GenericTestCaseForUnsupportedRISCV64CPU( -+ SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), - new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU( - SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), - new GenericTestCaseForOtherCPU( -diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java -index 58ce5366bae..8deac4f7895 100644 ---- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java -+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -41,6 +41,7 @@ - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; -+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; - import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; - import compiler.intrinsics.sha.cli.testcases.UseSHASpecificTestCaseForUnsupportedCPU; - -@@ -53,6 +54,8 @@ public static void main(String args[]) throws Throwable { - SHAOptionsBase.USE_SHA_OPTION), - new GenericTestCaseForUnsupportedAArch64CPU( - SHAOptionsBase.USE_SHA_OPTION), -+ new GenericTestCaseForUnsupportedRISCV64CPU( -+ SHAOptionsBase.USE_SHA_OPTION), - new UseSHASpecificTestCaseForUnsupportedCPU( - SHAOptionsBase.USE_SHA_OPTION), - new GenericTestCaseForOtherCPU( -diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java -index faa9fdbae67..26635002040 100644 ---- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java -+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -32,26 +32,27 @@ - - /** - * Generic test case for SHA-related options targeted to any CPU except -- * AArch64, PPC, S390x, SPARC and X86. -+ * AArch64, RISCV64, PPC, S390x, SPARC and X86. - */ - public class GenericTestCaseForOtherCPU extends - SHAOptionsBase.TestCase { - public GenericTestCaseForOtherCPU(String optionName) { -- // Execute the test case on any CPU except AArch64, PPC, S390x, SPARC and X86. -+ // Execute the test case on any CPU except AArch64, RISCV64, PPC, S390x, SPARC and X86. - super(optionName, new NotPredicate( - new OrPredicate(Platform::isAArch64, -+ new OrPredicate(Platform::isRISCV64, - new OrPredicate(Platform::isS390x, - new OrPredicate(Platform::isSparc, - new OrPredicate(Platform::isPPC, - new OrPredicate(Platform::isX64, -- Platform::isX86))))))); -+ Platform::isX86)))))))); - } - - @Override - protected void verifyWarnings() throws Throwable { - String shouldPassMessage = String.format("JVM should start with " - + "option '%s' without any warnings", optionName); -- // Verify that on non-x86, non-SPARC and non-AArch64 CPU usage of -+ // Verify that on non-x86, non-RISCV64, non-SPARC and non-AArch64 CPU usage of - // SHA-related options will not cause any warnings. - CommandLineOptionTest.verifySameJVMStartup(null, - new String[] { ".*" + optionName + ".*" }, shouldPassMessage, -diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java -new file mode 100644 -index 00000000000..2ecfec07a4c ---- /dev/null -+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java -@@ -0,0 +1,115 @@ -+/* -+ * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ */ -+ -+package compiler.intrinsics.sha.cli.testcases; -+ -+import compiler.intrinsics.sha.cli.DigestOptionsBase; -+import jdk.test.lib.process.ExitCode; -+import jdk.test.lib.Platform; -+import jdk.test.lib.cli.CommandLineOptionTest; -+import jdk.test.lib.cli.predicate.AndPredicate; -+import jdk.test.lib.cli.predicate.NotPredicate; -+ -+/** -+ * Generic test case for SHA-related options targeted to RISCV64 CPUs -+ * which don't support instruction required by the tested option. -+ */ -+public class GenericTestCaseForUnsupportedRISCV64CPU extends -+ DigestOptionsBase.TestCase { -+ -+ final private boolean checkUseSHA; -+ -+ public GenericTestCaseForUnsupportedRISCV64CPU(String optionName) { -+ this(optionName, true); -+ } -+ -+ public GenericTestCaseForUnsupportedRISCV64CPU(String optionName, boolean checkUseSHA) { -+ super(optionName, new AndPredicate(Platform::isRISCV64, -+ new NotPredicate(DigestOptionsBase.getPredicateForOption( -+ optionName)))); -+ -+ this.checkUseSHA = checkUseSHA; -+ } -+ -+ @Override -+ protected void verifyWarnings() throws Throwable { -+ String shouldPassMessage = String.format("JVM startup should pass with" -+ + "option '-XX:-%s' without any warnings", optionName); -+ //Verify that option could be disabled without any warnings. -+ CommandLineOptionTest.verifySameJVMStartup(null, new String[] { -+ DigestOptionsBase.getWarningForUnsupportedCPU(optionName) -+ }, shouldPassMessage, shouldPassMessage, ExitCode.OK, -+ DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, -+ CommandLineOptionTest.prepareBooleanFlag(optionName, false)); -+ -+ if (checkUseSHA) { -+ shouldPassMessage = String.format("If JVM is started with '-XX:-" -+ + "%s' '-XX:+%s', output should contain warning.", -+ DigestOptionsBase.USE_SHA_OPTION, optionName); -+ -+ // Verify that when the tested option is enabled, then -+ // a warning will occur in VM output if UseSHA is disabled. -+ if (!optionName.equals(DigestOptionsBase.USE_SHA_OPTION)) { -+ CommandLineOptionTest.verifySameJVMStartup( -+ new String[] { DigestOptionsBase.getWarningForUnsupportedCPU(optionName) }, -+ null, -+ shouldPassMessage, -+ shouldPassMessage, -+ ExitCode.OK, -+ DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, -+ CommandLineOptionTest.prepareBooleanFlag(DigestOptionsBase.USE_SHA_OPTION, false), -+ CommandLineOptionTest.prepareBooleanFlag(optionName, true)); -+ } -+ } -+ } -+ -+ @Override -+ protected void verifyOptionValues() throws Throwable { -+ // Verify that option is disabled by default. -+ CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", -+ String.format("Option '%s' should be disabled by default", -+ optionName), -+ DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS); -+ -+ if (checkUseSHA) { -+ // Verify that option is disabled even if it was explicitly enabled -+ // using CLI options. -+ CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", -+ String.format("Option '%s' should be off on unsupported " -+ + "RISCV64CPU even if set to true directly", optionName), -+ DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, -+ CommandLineOptionTest.prepareBooleanFlag(optionName, true)); -+ -+ // Verify that option is disabled when +UseSHA was passed to JVM. -+ CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", -+ String.format("Option '%s' should be off on unsupported " -+ + "RISCV64CPU even if %s flag set to JVM", -+ optionName, CommandLineOptionTest.prepareBooleanFlag( -+ DigestOptionsBase.USE_SHA_OPTION, true)), -+ DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, -+ CommandLineOptionTest.prepareBooleanFlag( -+ DigestOptionsBase.USE_SHA_OPTION, true)); -+ } -+ } -+} -diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java -index 2e3e2717a65..7be8af6d035 100644 ---- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java -+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -25,7 +25,7 @@ - * @test - * @bug 8074981 - * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test -- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" -+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" - * - * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 - * -XX:CompileThresholdScaling=0.1 -diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java -index 0e06a9e4327..797927b42bf 100644 ---- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java -+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -25,7 +25,7 @@ - * @test - * @bug 8074981 - * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test -- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" -+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" - * - * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 - * -XX:CompileThresholdScaling=0.1 -diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java -index c3cdbf37464..be8f7d586c2 100644 ---- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java -+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -25,7 +25,7 @@ - * @test - * @bug 8074981 - * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test -- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" -+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" - * - * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 - * -XX:CompileThresholdScaling=0.1 -diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java -index d33bd411f16..d96d5e29c00 100644 ---- a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java -+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -25,7 +25,7 @@ - * @test - * @bug 8074981 - * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test -- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" -+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" - * - * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions - * -XX:LoopUnrollLimit=250 -XX:CompileThresholdScaling=0.1 -diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java -index 992fa4b5161..b09c873d05d 100644 ---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java -+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -25,7 +25,7 @@ - * @test - * @bug 8138583 - * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : double abs & neg test -- * @requires os.arch=="aarch64" -+ * @requires os.arch=="aarch64" | os.arch=="riscv64" - * - * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 - * -XX:CompileThresholdScaling=0.1 -diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java -index 3e79b3528b7..fe40ed6f98d 100644 ---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java -+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -25,7 +25,7 @@ - * @test - * @bug 8138583 - * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : float abs & neg test -- * @requires os.arch=="aarch64" -+ * @requires os.arch=="aarch64" | os.arch=="riscv64" - * - * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 - * -XX:CompileThresholdScaling=0.1 -diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java -index 6603dd224ef..51631910493 100644 ---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java -+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -25,7 +25,7 @@ - * @test - * @bug 8135028 - * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double sqrt test -- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" -+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" - * - * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 - * -XX:CompileThresholdScaling=0.1 -diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java -index d9a0c988004..d999ae423cf 100644 ---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java -+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -25,7 +25,7 @@ - * @test - * @bug 8074981 - * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double test -- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" -+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" - * - * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 - * -XX:CompileThresholdScaling=0.1 -diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java -index 722db95aed3..65912a5c7fa 100644 ---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java -+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -25,7 +25,7 @@ - * @test - * @bug 8074981 - * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : float test -- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" -+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" - * - * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 - * -XX:CompileThresholdScaling=0.1 -diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java -index f58f21feb23..fffdc2f7565 100644 ---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java -+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -25,7 +25,7 @@ - * @test - * @bug 8074981 - * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : int test -- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" -+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" - * - * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 - * -XX:CompileThresholdScaling=0.1 -diff --git a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java -index 7774dabcb5f..7afe3560f30 100644 ---- a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java -+++ b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -61,15 +61,17 @@ public class IntrinsicPredicates { - - public static final BooleanSupplier SHA1_INSTRUCTION_AVAILABLE - = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha1" }, null), -+ new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha1" }, null), - new OrPredicate(new CPUSpecificPredicate("s390.*", new String[] { "sha1" }, null), - new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha1" }, null), - // x86 variants - new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "sha" }, null), - new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null), -- new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null)))))); -+ new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null))))))); - - public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE - = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha256" }, null), -+ new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha256" }, null), - new OrPredicate(new CPUSpecificPredicate("s390.*", new String[] { "sha256" }, null), - new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha256" }, null), - new OrPredicate(new CPUSpecificPredicate("ppc64.*", new String[] { "sha" }, null), -@@ -79,10 +81,11 @@ public class IntrinsicPredicates { - new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null), - new OrPredicate(new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null), - new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "avx2", "bmi2" }, null), -- new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null)))))))))); -+ new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null))))))))))); - - public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE - = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha512" }, null), -+ new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha512" }, null), - new OrPredicate(new CPUSpecificPredicate("s390.*", new String[] { "sha512" }, null), - new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha512" }, null), - new OrPredicate(new CPUSpecificPredicate("ppc64.*", new String[] { "sha" }, null), -@@ -92,7 +95,7 @@ public class IntrinsicPredicates { - new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null), - new OrPredicate(new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null), - new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "avx2", "bmi2" }, null), -- new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null)))))))))); -+ new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null))))))))))); - - public static final BooleanSupplier ANY_SHA_INSTRUCTION_AVAILABLE - = new OrPredicate(IntrinsicPredicates.SHA1_INSTRUCTION_AVAILABLE, -diff --git a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java -index 57256aa5a32..d4d43b01ae6 100644 ---- a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java -+++ b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -112,7 +112,7 @@ public static void main(String args[]) throws Exception { - // It's ok for ARM not to have symbols, because it does not support NMT detail - // when targeting thumb2. It's also ok for Windows not to have symbols, because - // they are only available if the symbols file is included with the build. -- if (Platform.isWindows() || Platform.isARM()) { -+ if (Platform.isWindows() || Platform.isARM() || Platform.isRISCV64()) { - return; // we are done - } - output.reportDiagnosticSummary(); -diff --git a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java -index 127bb6abcd9..eab19273ad8 100644 ---- a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java -+++ b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -239,7 +239,7 @@ private static boolean isAlwaysSupportedPlatform() { - return Platform.isAix() || - (Platform.isLinux() && - (Platform.isPPC() || Platform.isS390x() || Platform.isX64() || -- Platform.isX86())) || -+ Platform.isX86() || Platform.isRISCV64())) || - Platform.isOSX() || - Platform.isSolaris(); - } -diff --git a/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java b/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java -index 54640b245f8..f0b7aed5ceb 100644 ---- a/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java -+++ b/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java -@@ -1,5 +1,4 @@ - /* -- * Copyright (c) 2018, Google and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java -index 77458554b76..d4bfe31dd7a 100644 ---- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java -+++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java -@@ -45,7 +45,7 @@ - */ - public class TestMutuallyExclusivePlatformPredicates { - private static enum MethodGroup { -- ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"), -+ ARCH("isAArch64", "isARM", "isRISCV64", "isPPC", "isS390x", "isSparc", "isX64", "isX86"), - BITNESS("is32bit", "is64bit"), - OS("isAix", "isLinux", "isOSX", "isSolaris", "isWindows"), - VM_TYPE("isClient", "isServer", "isGraal", "isMinimal", "isZero", "isEmbedded"), -diff --git a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java -index 7990c49a1f6..abeff80e5e8 100644 ---- a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java -+++ b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2013, 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -54,8 +54,8 @@ public static void main(String[] args) throws Throwable { - Events.assertField(event, "hwThreads").atLeast(1); - Events.assertField(event, "cores").atLeast(1); - Events.assertField(event, "sockets").atLeast(1); -- Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390"); -- Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390"); -+ Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390"); -+ Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390"); - } - } - } -diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java -index f4ee0546c70..635cdd18054 100644 ---- a/test/lib/jdk/test/lib/Platform.java -+++ b/test/lib/jdk/test/lib/Platform.java -@@ -202,6 +202,10 @@ public static boolean isARM() { - return isArch("arm.*"); - } - -+ public static boolean isRISCV64() { -+ return isArch("riscv64"); -+ } -+ - public static boolean isPPC() { - return isArch("ppc.*"); - } - -From c51e546566c937354842a27696bd2221087101ae Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 28 Mar 2023 16:30:04 +0800 -Subject: [PATCH 002/140] Drop zgc part - ---- - .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 6 +- - .../riscv/gc/z/zBarrierSetAssembler_riscv.cpp | 441 ------------------ - .../riscv/gc/z/zBarrierSetAssembler_riscv.hpp | 101 ---- - src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp | 212 --------- - src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp | 36 -- - src/hotspot/cpu/riscv/gc/z/z_riscv64.ad | 233 --------- - .../cpu/riscv/macroAssembler_riscv.cpp | 46 -- - .../cpu/riscv/macroAssembler_riscv.hpp | 9 - - src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 10 - - 9 files changed, 1 insertion(+), 1093 deletions(-) - delete mode 100644 src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp - delete mode 100644 src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp - delete mode 100644 src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp - delete mode 100644 src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp - delete mode 100644 src/hotspot/cpu/riscv/gc/z/z_riscv64.ad - -diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -index 742c2126e60..bba3bd4709c 100644 ---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -@@ -871,11 +871,7 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch - if (UseCompressedOops && !wide) { - __ decode_heap_oop(dest->as_register()); - } -- -- if (!UseZGC) { -- // Load barrier has not yet been applied, so ZGC can't verify the oop here -- __ verify_oop(dest->as_register()); -- } -+ __ verify_oop(dest->as_register()); - } - } - -diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp -deleted file mode 100644 -index 3d3f4d4d774..00000000000 ---- a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp -+++ /dev/null -@@ -1,441 +0,0 @@ --/* -- * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved. -- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -- * -- * This code is free software; you can redistribute it and/or modify it -- * under the terms of the GNU General Public License version 2 only, as -- * published by the Free Software Foundation. -- * -- * This code is distributed in the hope that it will be useful, but WITHOUT -- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- * version 2 for more details (a copy is included in the LICENSE file that -- * accompanied this code). -- * -- * You should have received a copy of the GNU General Public License version -- * 2 along with this work; if not, write to the Free Software Foundation, -- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -- * -- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -- * or visit www.oracle.com if you need additional information or have any -- * questions. -- * -- */ -- --#include "precompiled.hpp" --#include "asm/macroAssembler.inline.hpp" --#include "code/codeBlob.hpp" --#include "code/vmreg.inline.hpp" --#include "gc/z/zBarrier.inline.hpp" --#include "gc/z/zBarrierSet.hpp" --#include "gc/z/zBarrierSetAssembler.hpp" --#include "gc/z/zBarrierSetRuntime.hpp" --#include "gc/z/zThreadLocalData.hpp" --#include "memory/resourceArea.hpp" --#include "runtime/sharedRuntime.hpp" --#include "utilities/macros.hpp" --#ifdef COMPILER1 --#include "c1/c1_LIRAssembler.hpp" --#include "c1/c1_MacroAssembler.hpp" --#include "gc/z/c1/zBarrierSetC1.hpp" --#endif // COMPILER1 --#ifdef COMPILER2 --#include "gc/z/c2/zBarrierSetC2.hpp" --#endif // COMPILER2 -- --#ifdef PRODUCT --#define BLOCK_COMMENT(str) /* nothing */ --#else --#define BLOCK_COMMENT(str) __ block_comment(str) --#endif -- --#undef __ --#define __ masm-> -- --void ZBarrierSetAssembler::load_at(MacroAssembler* masm, -- DecoratorSet decorators, -- BasicType type, -- Register dst, -- Address src, -- Register tmp1, -- Register tmp_thread) { -- if (!ZBarrierSet::barrier_needed(decorators, type)) { -- // Barrier not needed -- BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); -- return; -- } -- -- assert_different_registers(t1, src.base()); -- assert_different_registers(t0, t1, dst); -- -- Label done; -- -- // Load bad mask into temp register. -- __ la(t0, src); -- __ ld(t1, address_bad_mask_from_thread(xthread)); -- __ ld(dst, Address(t0)); -- -- // Test reference against bad mask. If mask bad, then we need to fix it up. -- __ andr(t1, dst, t1); -- __ beqz(t1, done); -- -- __ enter(); -- -- __ push_call_clobbered_registers_except(RegSet::of(dst)); -- -- if (c_rarg0 != dst) { -- __ mv(c_rarg0, dst); -- } -- -- __ mv(c_rarg1, t0); -- -- __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2); -- -- // Make sure dst has the return value. -- if (dst != x10) { -- __ mv(dst, x10); -- } -- -- __ pop_call_clobbered_registers_except(RegSet::of(dst)); -- __ leave(); -- -- __ bind(done); --} -- --#ifdef ASSERT -- --void ZBarrierSetAssembler::store_at(MacroAssembler* masm, -- DecoratorSet decorators, -- BasicType type, -- Address dst, -- Register val, -- Register tmp1, -- Register tmp2) { -- // Verify value -- if (is_reference_type(type)) { -- // Note that src could be noreg, which means we -- // are storing null and can skip verification. -- if (val != noreg) { -- Label done; -- -- // tmp1 and tmp2 are often set to noreg. -- RegSet savedRegs = RegSet::of(t0); -- __ push_reg(savedRegs, sp); -- -- __ ld(t0, address_bad_mask_from_thread(xthread)); -- __ andr(t0, val, t0); -- __ beqz(t0, done); -- __ stop("Verify oop store failed"); -- __ should_not_reach_here(); -- __ bind(done); -- __ pop_reg(savedRegs, sp); -- } -- } -- -- // Store value -- BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); --} -- --#endif // ASSERT -- --void ZBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, -- DecoratorSet decorators, -- bool is_oop, -- Register src, -- Register dst, -- Register count, -- RegSet saved_regs) { -- if (!is_oop) { -- // Barrier not needed -- return; -- } -- -- BLOCK_COMMENT("ZBarrierSetAssembler::arraycopy_prologue {"); -- -- assert_different_registers(src, count, t0); -- -- __ push_reg(saved_regs, sp); -- -- if (count == c_rarg0 && src == c_rarg1) { -- // exactly backwards!! -- __ xorr(c_rarg0, c_rarg0, c_rarg1); -- __ xorr(c_rarg1, c_rarg0, c_rarg1); -- __ xorr(c_rarg0, c_rarg0, c_rarg1); -- } else { -- __ mv(c_rarg0, src); -- __ mv(c_rarg1, count); -- } -- -- __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_array_addr(), 2); -- -- __ pop_reg(saved_regs, sp); -- -- BLOCK_COMMENT("} ZBarrierSetAssembler::arraycopy_prologue"); --} -- --void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, -- Register jni_env, -- Register robj, -- Register tmp, -- Label& slowpath) { -- BLOCK_COMMENT("ZBarrierSetAssembler::try_resolve_jobject_in_native {"); -- -- assert_different_registers(jni_env, robj, tmp); -- -- // Resolve jobject -- BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, robj, tmp, slowpath); -- -- // Compute the offset of address bad mask from the field of jni_environment -- long int bad_mask_relative_offset = (long int) (in_bytes(ZThreadLocalData::address_bad_mask_offset()) - -- in_bytes(JavaThread::jni_environment_offset())); -- -- // Load the address bad mask -- __ ld(tmp, Address(jni_env, bad_mask_relative_offset)); -- -- // Check address bad mask -- __ andr(tmp, robj, tmp); -- __ bnez(tmp, slowpath); -- -- BLOCK_COMMENT("} ZBarrierSetAssembler::try_resolve_jobject_in_native"); --} -- --#ifdef COMPILER2 -- --OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) { -- if (!OptoReg::is_reg(opto_reg)) { -- return OptoReg::Bad; -- } -- -- const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); -- if (vm_reg->is_FloatRegister()) { -- return opto_reg & ~1; -- } -- -- return opto_reg; --} -- --#undef __ --#define __ _masm-> -- --class ZSaveLiveRegisters { --private: -- MacroAssembler* const _masm; -- RegSet _gp_regs; -- FloatRegSet _fp_regs; -- VectorRegSet _vp_regs; -- --public: -- void initialize(ZLoadBarrierStubC2* stub) { -- // Record registers that needs to be saved/restored -- RegMaskIterator rmi(stub->live()); -- while (rmi.has_next()) { -- const OptoReg::Name opto_reg = rmi.next(); -- if (OptoReg::is_reg(opto_reg)) { -- const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); -- if (vm_reg->is_Register()) { -- _gp_regs += RegSet::of(vm_reg->as_Register()); -- } else if (vm_reg->is_FloatRegister()) { -- _fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister()); -- } else if (vm_reg->is_VectorRegister()) { -- const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~(VectorRegisterImpl::max_slots_per_register - 1)); -- _vp_regs += VectorRegSet::of(vm_reg_base->as_VectorRegister()); -- } else { -- fatal("Unknown register type"); -- } -- } -- } -- -- // Remove C-ABI SOE registers, tmp regs and _ref register that will be updated -- _gp_regs -= RegSet::range(x18, x27) + RegSet::of(x2) + RegSet::of(x8, x9) + RegSet::of(x5, stub->ref()); -- } -- -- ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) : -- _masm(masm), -- _gp_regs(), -- _fp_regs(), -- _vp_regs() { -- // Figure out what registers to save/restore -- initialize(stub); -- -- // Save registers -- __ push_reg(_gp_regs, sp); -- __ push_fp(_fp_regs, sp); -- __ push_vp(_vp_regs, sp); -- } -- -- ~ZSaveLiveRegisters() { -- // Restore registers -- __ pop_vp(_vp_regs, sp); -- __ pop_fp(_fp_regs, sp); -- __ pop_reg(_gp_regs, sp); -- } --}; -- --class ZSetupArguments { --private: -- MacroAssembler* const _masm; -- const Register _ref; -- const Address _ref_addr; -- --public: -- ZSetupArguments(MacroAssembler* masm, ZLoadBarrierStubC2* stub) : -- _masm(masm), -- _ref(stub->ref()), -- _ref_addr(stub->ref_addr()) { -- -- // Setup arguments -- if (_ref_addr.base() == noreg) { -- // No self healing -- if (_ref != c_rarg0) { -- __ mv(c_rarg0, _ref); -- } -- __ mv(c_rarg1, zr); -- } else { -- // Self healing -- if (_ref == c_rarg0) { -- // _ref is already at correct place -- __ la(c_rarg1, _ref_addr); -- } else if (_ref != c_rarg1) { -- // _ref is in wrong place, but not in c_rarg1, so fix it first -- __ la(c_rarg1, _ref_addr); -- __ mv(c_rarg0, _ref); -- } else if (_ref_addr.base() != c_rarg0) { -- assert(_ref == c_rarg1, "Mov ref first, vacating c_rarg0"); -- __ mv(c_rarg0, _ref); -- __ la(c_rarg1, _ref_addr); -- } else { -- assert(_ref == c_rarg1, "Need to vacate c_rarg1 and _ref_addr is using c_rarg0"); -- if (_ref_addr.base() == c_rarg0) { -- __ mv(t1, c_rarg1); -- __ la(c_rarg1, _ref_addr); -- __ mv(c_rarg0, t1); -- } else { -- ShouldNotReachHere(); -- } -- } -- } -- } -- -- ~ZSetupArguments() { -- // Transfer result -- if (_ref != x10) { -- __ mv(_ref, x10); -- } -- } --}; -- --#undef __ --#define __ masm-> -- --void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const { -- BLOCK_COMMENT("ZLoadBarrierStubC2"); -- -- // Stub entry -- __ bind(*stub->entry()); -- -- { -- ZSaveLiveRegisters save_live_registers(masm, stub); -- ZSetupArguments setup_arguments(masm, stub); -- int32_t offset = 0; -- __ la_patchable(t0, stub->slow_path(), offset); -- __ jalr(x1, t0, offset); -- } -- -- // Stub exit -- __ j(*stub->continuation()); --} -- --#undef __ -- --#endif // COMPILER2 -- --#ifdef COMPILER1 --#undef __ --#define __ ce->masm()-> -- --void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce, -- LIR_Opr ref) const { -- assert_different_registers(xthread, ref->as_register(), t1); -- __ ld(t1, address_bad_mask_from_thread(xthread)); -- __ andr(t1, t1, ref->as_register()); --} -- --void ZBarrierSetAssembler::generate_c1_load_barrier_stub(LIR_Assembler* ce, -- ZLoadBarrierStubC1* stub) const { -- // Stub entry -- __ bind(*stub->entry()); -- -- Register ref = stub->ref()->as_register(); -- Register ref_addr = noreg; -- Register tmp = noreg; -- -- if (stub->tmp()->is_valid()) { -- // Load address into tmp register -- ce->leal(stub->ref_addr(), stub->tmp()); -- ref_addr = tmp = stub->tmp()->as_pointer_register(); -- } else { -- // Address already in register -- ref_addr = stub->ref_addr()->as_address_ptr()->base()->as_pointer_register(); -- } -- -- assert_different_registers(ref, ref_addr, noreg); -- -- // Save x10 unless it is the result or tmp register -- // Set up SP to accomodate parameters and maybe x10. -- if (ref != x10 && tmp != x10) { -- __ sub(sp, sp, 32); -- __ sd(x10, Address(sp, 16)); -- } else { -- __ sub(sp, sp, 16); -- } -- -- // Setup arguments and call runtime stub -- ce->store_parameter(ref_addr, 1); -- ce->store_parameter(ref, 0); -- -- __ far_call(stub->runtime_stub()); -- -- // Verify result -- __ verify_oop(x10, "Bad oop"); -- -- -- // Move result into place -- if (ref != x10) { -- __ mv(ref, x10); -- } -- -- // Restore x10 unless it is the result or tmp register -- if (ref != x10 && tmp != x10) { -- __ ld(x10, Address(sp, 16)); -- __ add(sp, sp, 32); -- } else { -- __ add(sp, sp, 16); -- } -- -- // Stub exit -- __ j(*stub->continuation()); --} -- --#undef __ --#define __ sasm-> -- --void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler* sasm, -- DecoratorSet decorators) const { -- __ prologue("zgc_load_barrier stub", false); -- -- __ push_call_clobbered_registers_except(RegSet::of(x10)); -- -- // Setup arguments -- __ load_parameter(0, c_rarg0); -- __ load_parameter(1, c_rarg1); -- -- __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2); -- -- __ pop_call_clobbered_registers_except(RegSet::of(x10)); -- -- __ epilogue(); --} -- --#undef __ --#endif // COMPILER1 -diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp -deleted file mode 100644 -index dc07ab635fe..00000000000 ---- a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp -+++ /dev/null -@@ -1,101 +0,0 @@ --/* -- * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. -- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -- * -- * This code is free software; you can redistribute it and/or modify it -- * under the terms of the GNU General Public License version 2 only, as -- * published by the Free Software Foundation. -- * -- * This code is distributed in the hope that it will be useful, but WITHOUT -- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- * version 2 for more details (a copy is included in the LICENSE file that -- * accompanied this code). -- * -- * You should have received a copy of the GNU General Public License version -- * 2 along with this work; if not, write to the Free Software Foundation, -- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -- * -- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -- * or visit www.oracle.com if you need additional information or have any -- * questions. -- * -- */ -- --#ifndef CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP --#define CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP -- --#include "code/vmreg.hpp" --#include "oops/accessDecorators.hpp" --#ifdef COMPILER2 --#include "opto/optoreg.hpp" --#endif // COMPILER2 -- --#ifdef COMPILER1 --class LIR_Assembler; --class LIR_Opr; --class StubAssembler; --class ZLoadBarrierStubC1; --#endif // COMPILER1 -- --#ifdef COMPILER2 --class Node; --class ZLoadBarrierStubC2; --#endif // COMPILER2 -- --class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase { --public: -- virtual void load_at(MacroAssembler* masm, -- DecoratorSet decorators, -- BasicType type, -- Register dst, -- Address src, -- Register tmp1, -- Register tmp_thread); -- --#ifdef ASSERT -- virtual void store_at(MacroAssembler* masm, -- DecoratorSet decorators, -- BasicType type, -- Address dst, -- Register val, -- Register tmp1, -- Register tmp2); --#endif // ASSERT -- -- virtual void arraycopy_prologue(MacroAssembler* masm, -- DecoratorSet decorators, -- bool is_oop, -- Register src, -- Register dst, -- Register count, -- RegSet saved_regs); -- -- virtual void try_resolve_jobject_in_native(MacroAssembler* masm, -- Register jni_env, -- Register robj, -- Register tmp, -- Label& slowpath); -- --#ifdef COMPILER1 -- void generate_c1_load_barrier_test(LIR_Assembler* ce, -- LIR_Opr ref) const; -- -- void generate_c1_load_barrier_stub(LIR_Assembler* ce, -- ZLoadBarrierStubC1* stub) const; -- -- void generate_c1_load_barrier_runtime_stub(StubAssembler* sasm, -- DecoratorSet decorators) const; --#endif // COMPILER1 -- --#ifdef COMPILER2 -- OptoReg::Name refine_register(const Node* node, -- OptoReg::Name opto_reg); -- -- void generate_c2_load_barrier_stub(MacroAssembler* masm, -- ZLoadBarrierStubC2* stub) const; --#endif // COMPILER2 --}; -- --#endif // CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp -deleted file mode 100644 -index d14997790af..00000000000 ---- a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp -+++ /dev/null -@@ -1,212 +0,0 @@ --/* -- * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved. -- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -- * -- * This code is free software; you can redistribute it and/or modify it -- * under the terms of the GNU General Public License version 2 only, as -- * published by the Free Software Foundation. -- * -- * This code is distributed in the hope that it will be useful, but WITHOUT -- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- * version 2 for more details (a copy is included in the LICENSE file that -- * accompanied this code). -- * -- * You should have received a copy of the GNU General Public License version -- * 2 along with this work; if not, write to the Free Software Foundation, -- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -- * -- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -- * or visit www.oracle.com if you need additional information or have any -- * questions. -- * -- */ -- --#include "precompiled.hpp" --#include "gc/shared/gcLogPrecious.hpp" --#include "gc/shared/gc_globals.hpp" --#include "gc/z/zGlobals.hpp" --#include "runtime/globals.hpp" --#include "runtime/os.hpp" --#include "utilities/globalDefinitions.hpp" --#include "utilities/powerOfTwo.hpp" -- --#ifdef LINUX --#include --#endif // LINUX -- --// --// The heap can have three different layouts, depending on the max heap size. --// --// Address Space & Pointer Layout 1 --// -------------------------------- --// --// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB) --// . . --// . . --// . . --// +--------------------------------+ 0x0000014000000000 (20TB) --// | Remapped View | --// +--------------------------------+ 0x0000010000000000 (16TB) --// . . --// +--------------------------------+ 0x00000c0000000000 (12TB) --// | Marked1 View | --// +--------------------------------+ 0x0000080000000000 (8TB) --// | Marked0 View | --// +--------------------------------+ 0x0000040000000000 (4TB) --// . . --// +--------------------------------+ 0x0000000000000000 --// --// 6 4 4 4 4 --// 3 6 5 2 1 0 --// +--------------------+----+-----------------------------------------------+ --// |00000000 00000000 00|1111|11 11111111 11111111 11111111 11111111 11111111| --// +--------------------+----+-----------------------------------------------+ --// | | | --// | | * 41-0 Object Offset (42-bits, 4TB address space) --// | | --// | * 45-42 Metadata Bits (4-bits) 0001 = Marked0 (Address view 4-8TB) --// | 0010 = Marked1 (Address view 8-12TB) --// | 0100 = Remapped (Address view 16-20TB) --// | 1000 = Finalizable (Address view N/A) --// | --// * 63-46 Fixed (18-bits, always zero) --// --// --// Address Space & Pointer Layout 2 --// -------------------------------- --// --// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB) --// . . --// . . --// . . --// +--------------------------------+ 0x0000280000000000 (40TB) --// | Remapped View | --// +--------------------------------+ 0x0000200000000000 (32TB) --// . . --// +--------------------------------+ 0x0000180000000000 (24TB) --// | Marked1 View | --// +--------------------------------+ 0x0000100000000000 (16TB) --// | Marked0 View | --// +--------------------------------+ 0x0000080000000000 (8TB) --// . . --// +--------------------------------+ 0x0000000000000000 --// --// 6 4 4 4 4 --// 3 7 6 3 2 0 --// +------------------+-----+------------------------------------------------+ --// |00000000 00000000 0|1111|111 11111111 11111111 11111111 11111111 11111111| --// +-------------------+----+------------------------------------------------+ --// | | | --// | | * 42-0 Object Offset (43-bits, 8TB address space) --// | | --// | * 46-43 Metadata Bits (4-bits) 0001 = Marked0 (Address view 8-16TB) --// | 0010 = Marked1 (Address view 16-24TB) --// | 0100 = Remapped (Address view 32-40TB) --// | 1000 = Finalizable (Address view N/A) --// | --// * 63-47 Fixed (17-bits, always zero) --// --// --// Address Space & Pointer Layout 3 --// -------------------------------- --// --// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB) --// . . --// . . --// . . --// +--------------------------------+ 0x0000500000000000 (80TB) --// | Remapped View | --// +--------------------------------+ 0x0000400000000000 (64TB) --// . . --// +--------------------------------+ 0x0000300000000000 (48TB) --// | Marked1 View | --// +--------------------------------+ 0x0000200000000000 (32TB) --// | Marked0 View | --// +--------------------------------+ 0x0000100000000000 (16TB) --// . . --// +--------------------------------+ 0x0000000000000000 --// --// 6 4 4 4 4 --// 3 8 7 4 3 0 --// +------------------+----+-------------------------------------------------+ --// |00000000 00000000 |1111|1111 11111111 11111111 11111111 11111111 11111111| --// +------------------+----+-------------------------------------------------+ --// | | | --// | | * 43-0 Object Offset (44-bits, 16TB address space) --// | | --// | * 47-44 Metadata Bits (4-bits) 0001 = Marked0 (Address view 16-32TB) --// | 0010 = Marked1 (Address view 32-48TB) --// | 0100 = Remapped (Address view 64-80TB) --// | 1000 = Finalizable (Address view N/A) --// | --// * 63-48 Fixed (16-bits, always zero) --// -- --// Default value if probing is not implemented for a certain platform: 128TB --static const size_t DEFAULT_MAX_ADDRESS_BIT = 47; --// Minimum value returned, if probing fails: 64GB --static const size_t MINIMUM_MAX_ADDRESS_BIT = 36; -- --static size_t probe_valid_max_address_bit() { --#ifdef LINUX -- size_t max_address_bit = 0; -- const size_t page_size = os::vm_page_size(); -- for (size_t i = DEFAULT_MAX_ADDRESS_BIT; i > MINIMUM_MAX_ADDRESS_BIT; --i) { -- const uintptr_t base_addr = ((uintptr_t) 1U) << i; -- if (msync((void*)base_addr, page_size, MS_ASYNC) == 0) { -- // msync suceeded, the address is valid, and maybe even already mapped. -- max_address_bit = i; -- break; -- } -- if (errno != ENOMEM) { -- // Some error occured. This should never happen, but msync -- // has some undefined behavior, hence ignore this bit. --#ifdef ASSERT -- fatal("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno)); --#else // ASSERT -- log_warning_p(gc)("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno)); --#endif // ASSERT -- continue; -- } -- // Since msync failed with ENOMEM, the page might not be mapped. -- // Try to map it, to see if the address is valid. -- void* const result_addr = mmap((void*) base_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0); -- if (result_addr != MAP_FAILED) { -- munmap(result_addr, page_size); -- } -- if ((uintptr_t) result_addr == base_addr) { -- // address is valid -- max_address_bit = i; -- break; -- } -- } -- if (max_address_bit == 0) { -- // probing failed, allocate a very high page and take that bit as the maximum -- const uintptr_t high_addr = ((uintptr_t) 1U) << DEFAULT_MAX_ADDRESS_BIT; -- void* const result_addr = mmap((void*) high_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0); -- if (result_addr != MAP_FAILED) { -- max_address_bit = BitsPerSize_t - count_leading_zeros((size_t) result_addr) - 1; -- munmap(result_addr, page_size); -- } -- } -- log_info_p(gc, init)("Probing address space for the highest valid bit: " SIZE_FORMAT, max_address_bit); -- return MAX2(max_address_bit, MINIMUM_MAX_ADDRESS_BIT); --#else // LINUX -- return DEFAULT_MAX_ADDRESS_BIT; --#endif // LINUX --} -- --size_t ZPlatformAddressOffsetBits() { -- const static size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1; -- const size_t max_address_offset_bits = valid_max_address_offset_bits - 3; -- const size_t min_address_offset_bits = max_address_offset_bits - 2; -- const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio); -- const size_t address_offset_bits = log2i_exact(address_offset); -- return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits); --} -- --size_t ZPlatformAddressMetadataShift() { -- return ZPlatformAddressOffsetBits(); --} -diff --git a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp -deleted file mode 100644 -index f20ecd9b073..00000000000 ---- a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp -+++ /dev/null -@@ -1,36 +0,0 @@ --/* -- * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. -- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -- * -- * This code is free software; you can redistribute it and/or modify it -- * under the terms of the GNU General Public License version 2 only, as -- * published by the Free Software Foundation. -- * -- * This code is distributed in the hope that it will be useful, but WITHOUT -- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- * version 2 for more details (a copy is included in the LICENSE file that -- * accompanied this code). -- * -- * You should have received a copy of the GNU General Public License version -- * 2 along with this work; if not, write to the Free Software Foundation, -- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -- * -- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -- * or visit www.oracle.com if you need additional information or have any -- * questions. -- * -- */ -- --#ifndef CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP --#define CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP -- --const size_t ZPlatformGranuleSizeShift = 21; // 2MB --const size_t ZPlatformHeapViews = 3; --const size_t ZPlatformCacheLineSize = 64; -- --size_t ZPlatformAddressOffsetBits(); --size_t ZPlatformAddressMetadataShift(); -- --#endif // CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad b/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad -deleted file mode 100644 -index 6b6f87814a5..00000000000 ---- a/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad -+++ /dev/null -@@ -1,233 +0,0 @@ --// --// Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved. --// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. --// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. --// --// This code is free software; you can redistribute it and/or modify it --// under the terms of the GNU General Public License version 2 only, as --// published by the Free Software Foundation. --// --// This code is distributed in the hope that it will be useful, but WITHOUT --// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or --// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --// version 2 for more details (a copy is included in the LICENSE file that --// accompanied this code). --// --// You should have received a copy of the GNU General Public License version --// 2 along with this work; if not, write to the Free Software Foundation, --// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. --// --// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA --// or visit www.oracle.com if you need additional information or have any --// questions. --// -- --source_hpp %{ -- --#include "gc/shared/gc_globals.hpp" --#include "gc/z/c2/zBarrierSetC2.hpp" --#include "gc/z/zThreadLocalData.hpp" -- --%} -- --source %{ -- --static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, int barrier_data) { -- if (barrier_data == ZLoadBarrierElided) { -- return; -- } -- ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, barrier_data); -- __ ld(tmp, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); -- __ andr(tmp, tmp, ref); -- __ bnez(tmp, *stub->entry(), true /* far */); -- __ bind(*stub->continuation()); --} -- --static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) { -- ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, ZLoadBarrierStrong); -- __ j(*stub->entry()); -- __ bind(*stub->continuation()); --} -- --%} -- --// Load Pointer --instruct zLoadP(iRegPNoSp dst, memory mem) --%{ -- match(Set dst (LoadP mem)); -- predicate(UseZGC && (n->as_Load()->barrier_data() != 0)); -- effect(TEMP dst); -- -- ins_cost(4 * DEFAULT_COST); -- -- format %{ "ld $dst, $mem, #@zLoadP" %} -- -- ins_encode %{ -- const Address ref_addr (as_Register($mem$$base), $mem$$disp); -- __ ld($dst$$Register, ref_addr); -- z_load_barrier(_masm, this, ref_addr, $dst$$Register, t0 /* tmp */, barrier_data()); -- %} -- -- ins_pipe(iload_reg_mem); --%} -- --instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ -- match(Set res (CompareAndSwapP mem (Binary oldval newval))); -- match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); -- predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); -- effect(KILL cr, TEMP_DEF res); -- -- ins_cost(2 * VOLATILE_REF_COST); -- -- format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapP\n\t" -- "mv $res, $res == $oldval" %} -- -- ins_encode %{ -- Label failed; -- guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); -- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, -- Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, -- true /* result_as_bool */); -- __ beqz($res$$Register, failed); -- __ mv(t0, $oldval$$Register); -- __ bind(failed); -- if (barrier_data() != ZLoadBarrierElided) { -- Label good; -- __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */); -- __ andr(t1, t1, t0); -- __ beqz(t1, good); -- z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */); -- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, -- Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, -- true /* result_as_bool */); -- __ bind(good); -- } -- %} -- -- ins_pipe(pipe_slow); --%} -- --instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ -- match(Set res (CompareAndSwapP mem (Binary oldval newval))); -- match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); -- predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong)); -- effect(KILL cr, TEMP_DEF res); -- -- ins_cost(2 * VOLATILE_REF_COST); -- -- format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapPAcq\n\t" -- "mv $res, $res == $oldval" %} -- -- ins_encode %{ -- Label failed; -- guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); -- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, -- Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, -- true /* result_as_bool */); -- __ beqz($res$$Register, failed); -- __ mv(t0, $oldval$$Register); -- __ bind(failed); -- if (barrier_data() != ZLoadBarrierElided) { -- Label good; -- __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */); -- __ andr(t1, t1, t0); -- __ beqz(t1, good); -- z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */); -- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, -- Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, -- true /* result_as_bool */); -- __ bind(good); -- } -- %} -- -- ins_pipe(pipe_slow); --%} -- --instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{ -- match(Set res (CompareAndExchangeP mem (Binary oldval newval))); -- predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); -- effect(TEMP_DEF res); -- -- ins_cost(2 * VOLATILE_REF_COST); -- -- format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangeP" %} -- -- ins_encode %{ -- guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); -- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, -- Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register); -- if (barrier_data() != ZLoadBarrierElided) { -- Label good; -- __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); -- __ andr(t0, t0, $res$$Register); -- __ beqz(t0, good); -- z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */); -- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, -- Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register); -- __ bind(good); -- } -- %} -- -- ins_pipe(pipe_slow); --%} -- --instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{ -- match(Set res (CompareAndExchangeP mem (Binary oldval newval))); -- predicate(UseZGC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); -- effect(TEMP_DEF res); -- -- ins_cost(2 * VOLATILE_REF_COST); -- -- format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangePAcq" %} -- -- ins_encode %{ -- guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); -- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, -- Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register); -- if (barrier_data() != ZLoadBarrierElided) { -- Label good; -- __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); -- __ andr(t0, t0, $res$$Register); -- __ beqz(t0, good); -- z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */); -- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, -- Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register); -- __ bind(good); -- } -- %} -- -- ins_pipe(pipe_slow); --%} -- --instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{ -- match(Set prev (GetAndSetP mem newv)); -- predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0); -- effect(TEMP_DEF prev, KILL cr); -- -- ins_cost(2 * VOLATILE_REF_COST); -- -- format %{ "atomic_xchg $prev, $newv, [$mem], #@zGetAndSetP" %} -- -- ins_encode %{ -- __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); -- z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data()); -- %} -- -- ins_pipe(pipe_serial); --%} -- --instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{ -- match(Set prev (GetAndSetP mem newv)); -- predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() != 0)); -- effect(TEMP_DEF prev, KILL cr); -- -- ins_cost(VOLATILE_REF_COST); -- -- format %{ "atomic_xchg_acq $prev, $newv, [$mem], #@zGetAndSetPAcq" %} -- -- ins_encode %{ -- __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base)); -- z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data()); -- %} -- ins_pipe(pipe_serial); --%} -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -index 86710295444..9d2cc4cf89f 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -@@ -1046,52 +1046,6 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { - return count; - } - --#ifdef COMPILER2 --int MacroAssembler::push_vp(unsigned int bitset, Register stack) { -- CompressibleRegion cr(this); -- int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); -- -- // Scan bitset to accumulate register pairs -- unsigned char regs[32]; -- int count = 0; -- for (int reg = 31; reg >= 0; reg--) { -- if ((1U << 31) & bitset) { -- regs[count++] = reg; -- } -- bitset <<= 1; -- } -- -- for (int i = 0; i < count; i++) { -- sub(stack, stack, vector_size_in_bytes); -- vs1r_v(as_VectorRegister(regs[i]), stack); -- } -- -- return count * vector_size_in_bytes / wordSize; --} -- --int MacroAssembler::pop_vp(unsigned int bitset, Register stack) { -- CompressibleRegion cr(this); -- int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); -- -- // Scan bitset to accumulate register pairs -- unsigned char regs[32]; -- int count = 0; -- for (int reg = 31; reg >= 0; reg--) { -- if ((1U << 31) & bitset) { -- regs[count++] = reg; -- } -- bitset <<= 1; -- } -- -- for (int i = count - 1; i >= 0; i--) { -- vl1r_v(as_VectorRegister(regs[i]), stack); -- add(stack, stack, vector_size_in_bytes); -- } -- -- return count * vector_size_in_bytes / wordSize; --} --#endif // COMPILER2 -- - void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) { - CompressibleRegion cr(this); - // Push integer registers x7, x10-x17, x28-x31. -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -index 23e09475be1..b2f0455a1f1 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -@@ -484,12 +484,6 @@ class MacroAssembler: public Assembler { - void pop_reg(Register Rd); - int push_reg(unsigned int bitset, Register stack); - int pop_reg(unsigned int bitset, Register stack); -- void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); } -- void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); } --#ifdef COMPILER2 -- void push_vp(VectorRegSet regs, Register stack) { if (regs.bits()) push_vp(regs.bits(), stack); } -- void pop_vp(VectorRegSet regs, Register stack) { if (regs.bits()) pop_vp(regs.bits(), stack); } --#endif // COMPILER2 - - // Push and pop everything that might be clobbered by a native - // runtime call except t0 and t1. (They are always -@@ -783,9 +777,6 @@ class MacroAssembler: public Assembler { - int push_fp(unsigned int bitset, Register stack); - int pop_fp(unsigned int bitset, Register stack); - -- int push_vp(unsigned int bitset, Register stack); -- int pop_vp(unsigned int bitset, Register stack); -- - // vext - void vmnot_m(VectorRegister vd, VectorRegister vs); - void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked); -diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp -index b3fdd04db1b..b05edf7172c 100644 ---- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp -+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp -@@ -546,16 +546,6 @@ class StubGenerator: public StubCodeGenerator { - // make sure object is 'reasonable' - __ beqz(x10, exit); // if obj is NULL it is OK - --#if INCLUDE_ZGC -- if (UseZGC) { -- // Check if mask is good. -- // verifies that ZAddressBadMask & x10 == 0 -- __ ld(c_rarg3, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); -- __ andr(c_rarg2, x10, c_rarg3); -- __ bnez(c_rarg2, error); -- } --#endif -- - // Check if the oop is in the right area of memory - __ mv(c_rarg3, (intptr_t) Universe::verify_oop_mask()); - __ andr(c_rarg2, x10, c_rarg3); - -From 7772140df96747b42b13007d0827fc21d2a8b926 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Mon, 27 Mar 2023 15:43:39 +0800 -Subject: [PATCH 003/140] Drop the C2 Vector part - ---- - make/hotspot/gensrc/GensrcAdlc.gmk | 1 - - .../cpu/riscv/c2_MacroAssembler_riscv.cpp | 325 --- - .../cpu/riscv/c2_MacroAssembler_riscv.hpp | 52 - - src/hotspot/cpu/riscv/globals_riscv.hpp | 8 +- - .../cpu/riscv/macroAssembler_riscv.cpp | 22 +- - .../cpu/riscv/macroAssembler_riscv.hpp | 4 +- - src/hotspot/cpu/riscv/matcher_riscv.hpp | 44 +- - src/hotspot/cpu/riscv/register_riscv.cpp | 5 - - src/hotspot/cpu/riscv/register_riscv.hpp | 4 +- - src/hotspot/cpu/riscv/riscv.ad | 476 +--- - src/hotspot/cpu/riscv/riscv_v.ad | 2065 ----------------- - src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 61 +- - src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 110 - - src/hotspot/cpu/riscv/vm_version_riscv.cpp | 4 - - src/hotspot/cpu/riscv/vmreg_riscv.cpp | 10 +- - src/hotspot/cpu/riscv/vmreg_riscv.hpp | 17 +- - 16 files changed, 41 insertions(+), 3167 deletions(-) - delete mode 100644 src/hotspot/cpu/riscv/riscv_v.ad - -diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk -index 67f4c6f0574..51137b99db2 100644 ---- a/make/hotspot/gensrc/GensrcAdlc.gmk -+++ b/make/hotspot/gensrc/GensrcAdlc.gmk -@@ -152,7 +152,6 @@ ifeq ($(call check-jvm-feature, compiler2), true) - - ifeq ($(HOTSPOT_TARGET_CPU_ARCH), riscv) - AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ -- $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_v.ad \ - $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_b.ad \ - ))) - endif -diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp -index 27770dc17aa..73f84a724ca 100644 ---- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp -@@ -1319,328 +1319,3 @@ void C2_MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRe - - bind(Done); - } -- --void C2_MacroAssembler::element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2, -- VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE) { -- Label loop; -- Assembler::SEW sew = islatin ? Assembler::e8 : Assembler::e16; -- -- bind(loop); -- vsetvli(tmp1, cnt, sew, Assembler::m2); -- vlex_v(vr1, a1, sew); -- vlex_v(vr2, a2, sew); -- vmsne_vv(vrs, vr1, vr2); -- vfirst_m(tmp2, vrs); -- bgez(tmp2, DONE); -- sub(cnt, cnt, tmp1); -- if (!islatin) { -- slli(tmp1, tmp1, 1); // get byte counts -- } -- add(a1, a1, tmp1); -- add(a2, a2, tmp1); -- bnez(cnt, loop); -- -- mv(result, true); --} -- --void C2_MacroAssembler::string_equals_v(Register a1, Register a2, Register result, Register cnt, int elem_size) { -- Label DONE; -- Register tmp1 = t0; -- Register tmp2 = t1; -- -- BLOCK_COMMENT("string_equals_v {"); -- -- mv(result, false); -- -- if (elem_size == 2) { -- srli(cnt, cnt, 1); -- } -- -- element_compare(a1, a2, result, cnt, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE); -- -- bind(DONE); -- BLOCK_COMMENT("} string_equals_v"); --} -- --// used by C2 ClearArray patterns. --// base: Address of a buffer to be zeroed --// cnt: Count in HeapWords --// --// base, cnt, v0, v1 and t0 are clobbered. --void C2_MacroAssembler::clear_array_v(Register base, Register cnt) { -- Label loop; -- -- // making zero words -- vsetvli(t0, cnt, Assembler::e64, Assembler::m4); -- vxor_vv(v0, v0, v0); -- -- bind(loop); -- vsetvli(t0, cnt, Assembler::e64, Assembler::m4); -- vse64_v(v0, base); -- sub(cnt, cnt, t0); -- shadd(base, t0, base, t0, 3); -- bnez(cnt, loop); --} -- --void C2_MacroAssembler::arrays_equals_v(Register a1, Register a2, Register result, -- Register cnt1, int elem_size) { -- Label DONE; -- Register tmp1 = t0; -- Register tmp2 = t1; -- Register cnt2 = tmp2; -- int length_offset = arrayOopDesc::length_offset_in_bytes(); -- int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); -- -- BLOCK_COMMENT("arrays_equals_v {"); -- -- // if (a1 == a2), return true -- mv(result, true); -- beq(a1, a2, DONE); -- -- mv(result, false); -- // if a1 == null or a2 == null, return false -- beqz(a1, DONE); -- beqz(a2, DONE); -- // if (a1.length != a2.length), return false -- lwu(cnt1, Address(a1, length_offset)); -- lwu(cnt2, Address(a2, length_offset)); -- bne(cnt1, cnt2, DONE); -- -- la(a1, Address(a1, base_offset)); -- la(a2, Address(a2, base_offset)); -- -- element_compare(a1, a2, result, cnt1, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE); -- -- bind(DONE); -- -- BLOCK_COMMENT("} arrays_equals_v"); --} -- --void C2_MacroAssembler::string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2, -- Register result, Register tmp1, Register tmp2, int encForm) { -- Label DIFFERENCE, DONE, L, loop; -- bool encLL = encForm == StrIntrinsicNode::LL; -- bool encLU = encForm == StrIntrinsicNode::LU; -- bool encUL = encForm == StrIntrinsicNode::UL; -- -- bool str1_isL = encLL || encLU; -- bool str2_isL = encLL || encUL; -- -- int minCharsInWord = encLL ? wordSize : wordSize / 2; -- -- BLOCK_COMMENT("string_compare {"); -- -- // for Lating strings, 1 byte for 1 character -- // for UTF16 strings, 2 bytes for 1 character -- if (!str1_isL) -- sraiw(cnt1, cnt1, 1); -- if (!str2_isL) -- sraiw(cnt2, cnt2, 1); -- -- // if str1 == str2, return the difference -- // save the minimum of the string lengths in cnt2. -- sub(result, cnt1, cnt2); -- bgt(cnt1, cnt2, L); -- mv(cnt2, cnt1); -- bind(L); -- -- if (str1_isL == str2_isL) { // LL or UU -- element_compare(str1, str2, zr, cnt2, tmp1, tmp2, v2, v4, v1, encLL, DIFFERENCE); -- j(DONE); -- } else { // LU or UL -- Register strL = encLU ? str1 : str2; -- Register strU = encLU ? str2 : str1; -- VectorRegister vstr1 = encLU ? v4 : v0; -- VectorRegister vstr2 = encLU ? v0 : v4; -- -- bind(loop); -- vsetvli(tmp1, cnt2, Assembler::e8, Assembler::m2); -- vle8_v(vstr1, strL); -- vsetvli(tmp1, cnt2, Assembler::e16, Assembler::m4); -- vzext_vf2(vstr2, vstr1); -- vle16_v(vstr1, strU); -- vmsne_vv(v0, vstr2, vstr1); -- vfirst_m(tmp2, v0); -- bgez(tmp2, DIFFERENCE); -- sub(cnt2, cnt2, tmp1); -- add(strL, strL, tmp1); -- shadd(strU, tmp1, strU, tmp1, 1); -- bnez(cnt2, loop); -- j(DONE); -- } -- bind(DIFFERENCE); -- slli(tmp1, tmp2, 1); -- add(str1, str1, str1_isL ? tmp2 : tmp1); -- add(str2, str2, str2_isL ? tmp2 : tmp1); -- str1_isL ? lbu(tmp1, Address(str1, 0)) : lhu(tmp1, Address(str1, 0)); -- str2_isL ? lbu(tmp2, Address(str2, 0)) : lhu(tmp2, Address(str2, 0)); -- sub(result, tmp1, tmp2); -- -- bind(DONE); --} -- --void C2_MacroAssembler::byte_array_inflate_v(Register src, Register dst, Register len, Register tmp) { -- Label loop; -- assert_different_registers(src, dst, len, tmp, t0); -- -- BLOCK_COMMENT("byte_array_inflate_v {"); -- bind(loop); -- vsetvli(tmp, len, Assembler::e8, Assembler::m2); -- vle8_v(v2, src); -- vsetvli(t0, len, Assembler::e16, Assembler::m4); -- vzext_vf2(v0, v2); -- vse16_v(v0, dst); -- sub(len, len, tmp); -- add(src, src, tmp); -- shadd(dst, tmp, dst, tmp, 1); -- bnez(len, loop); -- BLOCK_COMMENT("} byte_array_inflate_v"); --} -- --// Compress char[] array to byte[]. --// result: the array length if every element in array can be encoded; 0, otherwise. --void C2_MacroAssembler::char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp) { -- Label done; -- encode_iso_array_v(src, dst, len, result, tmp); -- beqz(len, done); -- mv(result, zr); -- bind(done); --} -- --// result: the number of elements had been encoded. --void C2_MacroAssembler::encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp) { -- Label loop, DIFFERENCE, DONE; -- -- BLOCK_COMMENT("encode_iso_array_v {"); -- mv(result, 0); -- -- bind(loop); -- mv(tmp, 0xff); -- vsetvli(t0, len, Assembler::e16, Assembler::m2); -- vle16_v(v2, src); -- // if element > 0xff, stop -- vmsgtu_vx(v1, v2, tmp); -- vfirst_m(tmp, v1); -- vmsbf_m(v0, v1); -- // compress char to byte -- vsetvli(t0, len, Assembler::e8); -- vncvt_x_x_w(v1, v2, Assembler::v0_t); -- vse8_v(v1, dst, Assembler::v0_t); -- -- bgez(tmp, DIFFERENCE); -- add(result, result, t0); -- add(dst, dst, t0); -- sub(len, len, t0); -- shadd(src, t0, src, t0, 1); -- bnez(len, loop); -- j(DONE); -- -- bind(DIFFERENCE); -- add(result, result, tmp); -- -- bind(DONE); -- BLOCK_COMMENT("} encode_iso_array_v"); --} -- --void C2_MacroAssembler::count_positives_v(Register ary, Register len, Register result, Register tmp) { -- Label LOOP, SET_RESULT, DONE; -- -- BLOCK_COMMENT("count_positives_v {"); -- mv(result, zr); -- -- bind(LOOP); -- vsetvli(t0, len, Assembler::e8, Assembler::m4); -- vle8_v(v0, ary); -- vmslt_vx(v0, v0, zr); -- vfirst_m(tmp, v0); -- bgez(tmp, SET_RESULT); -- // if tmp == -1, all bytes are positive -- add(result, result, t0); -- -- sub(len, len, t0); -- add(ary, ary, t0); -- bnez(len, LOOP); -- j(DONE); -- -- // add remaining positive bytes count -- bind(SET_RESULT); -- add(result, result, tmp); -- -- bind(DONE); -- BLOCK_COMMENT("} count_positives_v"); --} -- --void C2_MacroAssembler::string_indexof_char_v(Register str1, Register cnt1, -- Register ch, Register result, -- Register tmp1, Register tmp2, -- bool isL) { -- mv(result, zr); -- -- Label loop, MATCH, DONE; -- Assembler::SEW sew = isL ? Assembler::e8 : Assembler::e16; -- bind(loop); -- vsetvli(tmp1, cnt1, sew, Assembler::m4); -- vlex_v(v0, str1, sew); -- vmseq_vx(v0, v0, ch); -- vfirst_m(tmp2, v0); -- bgez(tmp2, MATCH); // if equal, return index -- -- add(result, result, tmp1); -- sub(cnt1, cnt1, tmp1); -- if (!isL) slli(tmp1, tmp1, 1); -- add(str1, str1, tmp1); -- bnez(cnt1, loop); -- -- mv(result, -1); -- j(DONE); -- -- bind(MATCH); -- add(result, result, tmp2); -- -- bind(DONE); --} -- --// Set dst to NaN if any NaN input. --void C2_MacroAssembler::minmax_FD_v(VectorRegister dst, VectorRegister src1, VectorRegister src2, -- bool is_double, bool is_min) { -- assert_different_registers(dst, src1, src2); -- -- vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32); -- -- is_min ? vfmin_vv(dst, src1, src2) -- : vfmax_vv(dst, src1, src2); -- -- vmfne_vv(v0, src1, src1); -- vfadd_vv(dst, src1, src1, Assembler::v0_t); -- vmfne_vv(v0, src2, src2); -- vfadd_vv(dst, src2, src2, Assembler::v0_t); --} -- --// Set dst to NaN if any NaN input. --void C2_MacroAssembler::reduce_minmax_FD_v(FloatRegister dst, -- FloatRegister src1, VectorRegister src2, -- VectorRegister tmp1, VectorRegister tmp2, -- bool is_double, bool is_min) { -- assert_different_registers(src2, tmp1, tmp2); -- -- Label L_done, L_NaN; -- vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32); -- vfmv_s_f(tmp2, src1); -- -- is_min ? vfredmin_vs(tmp1, src2, tmp2) -- : vfredmax_vs(tmp1, src2, tmp2); -- -- fsflags(zr); -- // Checking NaNs -- vmflt_vf(tmp2, src2, src1); -- frflags(t0); -- bnez(t0, L_NaN); -- j(L_done); -- -- bind(L_NaN); -- vfmv_s_f(tmp2, src1); -- vfredsum_vs(tmp1, src2, tmp2); -- -- bind(L_done); -- vfmv_f_s(dst, tmp1); --} -diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp -index c71df4c101b..90b6554af02 100644 ---- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp -+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp -@@ -28,13 +28,6 @@ - - // C2_MacroAssembler contains high-level macros for C2 - -- private: -- void element_compare(Register r1, Register r2, -- Register result, Register cnt, -- Register tmp1, Register tmp2, -- VectorRegister vr1, VectorRegister vr2, -- VectorRegister vrs, -- bool is_latin, Label& DONE); - public: - - void string_compare(Register str1, Register str2, -@@ -145,49 +138,4 @@ - FloatRegister src1, FloatRegister src2, - bool is_double, bool is_min); - -- // intrinsic methods implemented by rvv instructions -- void string_equals_v(Register r1, Register r2, -- Register result, Register cnt1, -- int elem_size); -- -- void arrays_equals_v(Register r1, Register r2, -- Register result, Register cnt1, -- int elem_size); -- -- void string_compare_v(Register str1, Register str2, -- Register cnt1, Register cnt2, -- Register result, -- Register tmp1, Register tmp2, -- int encForm); -- -- void clear_array_v(Register base, Register cnt); -- -- void byte_array_inflate_v(Register src, Register dst, -- Register len, Register tmp); -- -- void char_array_compress_v(Register src, Register dst, -- Register len, Register result, -- Register tmp); -- -- void encode_iso_array_v(Register src, Register dst, -- Register len, Register result, -- Register tmp); -- -- void count_positives_v(Register ary, Register len, -- Register result, Register tmp); -- -- void string_indexof_char_v(Register str1, Register cnt1, -- Register ch, Register result, -- Register tmp1, Register tmp2, -- bool isL); -- -- void minmax_FD_v(VectorRegister dst, -- VectorRegister src1, VectorRegister src2, -- bool is_double, bool is_min); -- -- void reduce_minmax_FD_v(FloatRegister dst, -- FloatRegister src1, VectorRegister src2, -- VectorRegister tmp1, VectorRegister tmp2, -- bool is_double, bool is_min); -- - #endif // CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp -index cbfc0583883..845064d6cbc 100644 ---- a/src/hotspot/cpu/riscv/globals_riscv.hpp -+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp -@@ -90,10 +90,8 @@ define_pd_global(intx, InlineSmallCode, 1000); - "Extend fence.i to fence.i + fence.") \ - product(bool, AvoidUnalignedAccesses, true, \ - "Avoid generating unaligned memory accesses") \ -- product(bool, UseRVV, false, EXPERIMENTAL, "Use RVV instructions") \ -- product(bool, UseRVB, false, EXPERIMENTAL, "Use RVB instructions") \ -- product(bool, UseRVC, false, EXPERIMENTAL, "Use RVC instructions") \ -- product(bool, UseRVVForBigIntegerShiftIntrinsics, true, \ -- "Use RVV instructions for left/right shift of BigInteger") -+ experimental(bool, UseRVV, false, "Use RVV instructions") \ -+ experimental(bool, UseRVB, false, "Use RVB instructions") \ -+ experimental(bool, UseRVC, false, "Use RVC instructions") - - #endif // CPU_RISCV_GLOBALS_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -index 9d2cc4cf89f..8b8d126f6c9 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -@@ -1086,7 +1086,7 @@ void MacroAssembler::popa() { - pop_reg(0xffffffe2, sp); - } - --void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) { -+void MacroAssembler::push_CPU_state() { - CompressibleRegion cr(this); - // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) - push_reg(0xffffffe0, sp); -@@ -1096,28 +1096,10 @@ void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) - for (int i = 0; i < 32; i++) { - fsd(as_FloatRegister(i), Address(sp, i * wordSize)); - } -- -- // vector registers -- if (save_vectors) { -- sub(sp, sp, vector_size_in_bytes * VectorRegisterImpl::number_of_registers); -- vsetvli(t0, x0, Assembler::e64, Assembler::m8); -- for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) { -- add(t0, sp, vector_size_in_bytes * i); -- vse64_v(as_VectorRegister(i), t0); -- } -- } - } - --void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) { -+void MacroAssembler::pop_CPU_state() { - CompressibleRegion cr(this); -- // vector registers -- if (restore_vectors) { -- vsetvli(t0, x0, Assembler::e64, Assembler::m8); -- for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) { -- vle64_v(as_VectorRegister(i), sp); -- add(sp, sp, vector_size_in_bytes * 8); -- } -- } - - // float registers - for (int i = 0; i < 32; i++) { -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -index b2f0455a1f1..b43131514c1 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -@@ -501,8 +501,8 @@ class MacroAssembler: public Assembler { - - void pusha(); - void popa(); -- void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0); -- void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0); -+ void push_CPU_state(); -+ void pop_CPU_state(); - - // if heap base register is used - reinit it with the correct value - void reinit_heapbase(); -diff --git a/src/hotspot/cpu/riscv/matcher_riscv.hpp b/src/hotspot/cpu/riscv/matcher_riscv.hpp -index 23a75d20502..4c7fabd7240 100644 ---- a/src/hotspot/cpu/riscv/matcher_riscv.hpp -+++ b/src/hotspot/cpu/riscv/matcher_riscv.hpp -@@ -31,16 +31,9 @@ - // false => size gets scaled to BytesPerLong, ok. - static const bool init_array_count_is_in_bytes = false; - -- // Whether this platform implements the scalable vector feature -- static const bool implements_scalable_vector = true; -- -- static const bool supports_scalable_vector() { -- return UseRVV; -- } -- -- // riscv supports misaligned vectors store/load. -+ // riscv doesn't support misaligned vectors store/load on JDK11. - static constexpr bool misaligned_vectors_ok() { -- return true; -+ return false; - } - - // Whether code generation need accurate ConvI2L types. -@@ -53,9 +46,6 @@ - // the cpu only look at the lower 5/6 bits anyway? - static const bool need_masked_shift_count = false; - -- // No support for generic vector operands. -- static const bool supports_generic_vector_operands = false; -- - static constexpr bool isSimpleConstant64(jlong value) { - // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. - // Probably always true, even if a temp register is required. -@@ -127,31 +117,6 @@ - // the relevant 32 bits. - static const bool int_in_long = true; - -- // Does the CPU supports vector variable shift instructions? -- static constexpr bool supports_vector_variable_shifts(void) { -- return false; -- } -- -- // Does the CPU supports vector variable rotate instructions? -- static constexpr bool supports_vector_variable_rotates(void) { -- return false; -- } -- -- // Does the CPU supports vector constant rotate instructions? -- static constexpr bool supports_vector_constant_rotates(int shift) { -- return false; -- } -- -- // Does the CPU supports vector unsigned comparison instructions? -- static const bool supports_vector_comparison_unsigned(int vlen, BasicType bt) { -- return false; -- } -- -- // Some microarchitectures have mask registers used on vectors -- static const bool has_predicated_vectors(void) { -- return false; -- } -- - // true means we have fast l2f convers - // false means that conversion is done by runtime call - static constexpr bool convL2FSupported(void) { -@@ -161,9 +126,4 @@ - // Implements a variant of EncodeISOArrayNode that encode ASCII only - static const bool supports_encode_ascii_array = false; - -- // Returns pre-selection estimated size of a vector operation. -- static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) { -- return 0; -- } -- - #endif // CPU_RISCV_MATCHER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp -index f8116e9df8c..96cf1996a83 100644 ---- a/src/hotspot/cpu/riscv/register_riscv.cpp -+++ b/src/hotspot/cpu/riscv/register_riscv.cpp -@@ -37,11 +37,6 @@ const int ConcreteRegisterImpl::max_fpr = - ConcreteRegisterImpl::max_gpr + - FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register; - --const int ConcreteRegisterImpl::max_vpr = -- ConcreteRegisterImpl::max_fpr + -- VectorRegisterImpl::number_of_registers * VectorRegisterImpl::max_slots_per_register; -- -- - const char* RegisterImpl::name() const { - static const char *const names[number_of_registers] = { - "zr", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "fp", "x9", -diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp -index a9200cac647..d697751f55f 100644 ---- a/src/hotspot/cpu/riscv/register_riscv.hpp -+++ b/src/hotspot/cpu/riscv/register_riscv.hpp -@@ -307,14 +307,12 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl { - // it's optoregs. - - number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers + -- FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers + -- VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers) -+ FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers) - }; - - // added to make it compile - static const int max_gpr; - static const int max_fpr; -- static const int max_vpr; - }; - - typedef AbstractRegSet RegSet; -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index 588887e1d96..85593a942e9 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -226,177 +226,6 @@ reg_def F30_H ( SOC, SOC, Op_RegF, 30, f30->as_VMReg()->next() ); - reg_def F31 ( SOC, SOC, Op_RegF, 31, f31->as_VMReg() ); - reg_def F31_H ( SOC, SOC, Op_RegF, 31, f31->as_VMReg()->next() ); - --// ---------------------------- --// Vector Registers --// ---------------------------- -- --// For RVV vector registers, we simply extend vector register size to 4 --// 'logical' slots. This is nominally 128 bits but it actually covers --// all possible 'physical' RVV vector register lengths from 128 ~ 1024 --// bits. The 'physical' RVV vector register length is detected during --// startup, so the register allocator is able to identify the correct --// number of bytes needed for an RVV spill/unspill. -- --reg_def V0 ( SOC, SOC, Op_VecA, 0, v0->as_VMReg() ); --reg_def V0_H ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next() ); --reg_def V0_J ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next(2) ); --reg_def V0_K ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next(3) ); -- --reg_def V1 ( SOC, SOC, Op_VecA, 1, v1->as_VMReg() ); --reg_def V1_H ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next() ); --reg_def V1_J ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next(2) ); --reg_def V1_K ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next(3) ); -- --reg_def V2 ( SOC, SOC, Op_VecA, 2, v2->as_VMReg() ); --reg_def V2_H ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next() ); --reg_def V2_J ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next(2) ); --reg_def V2_K ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next(3) ); -- --reg_def V3 ( SOC, SOC, Op_VecA, 3, v3->as_VMReg() ); --reg_def V3_H ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next() ); --reg_def V3_J ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next(2) ); --reg_def V3_K ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next(3) ); -- --reg_def V4 ( SOC, SOC, Op_VecA, 4, v4->as_VMReg() ); --reg_def V4_H ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next() ); --reg_def V4_J ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next(2) ); --reg_def V4_K ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next(3) ); -- --reg_def V5 ( SOC, SOC, Op_VecA, 5, v5->as_VMReg() ); --reg_def V5_H ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next() ); --reg_def V5_J ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next(2) ); --reg_def V5_K ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next(3) ); -- --reg_def V6 ( SOC, SOC, Op_VecA, 6, v6->as_VMReg() ); --reg_def V6_H ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next() ); --reg_def V6_J ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next(2) ); --reg_def V6_K ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next(3) ); -- --reg_def V7 ( SOC, SOC, Op_VecA, 7, v7->as_VMReg() ); --reg_def V7_H ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next() ); --reg_def V7_J ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next(2) ); --reg_def V7_K ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next(3) ); -- --reg_def V8 ( SOC, SOC, Op_VecA, 8, v8->as_VMReg() ); --reg_def V8_H ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next() ); --reg_def V8_J ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next(2) ); --reg_def V8_K ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next(3) ); -- --reg_def V9 ( SOC, SOC, Op_VecA, 9, v9->as_VMReg() ); --reg_def V9_H ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next() ); --reg_def V9_J ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next(2) ); --reg_def V9_K ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next(3) ); -- --reg_def V10 ( SOC, SOC, Op_VecA, 10, v10->as_VMReg() ); --reg_def V10_H ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next() ); --reg_def V10_J ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(2) ); --reg_def V10_K ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(3) ); -- --reg_def V11 ( SOC, SOC, Op_VecA, 11, v11->as_VMReg() ); --reg_def V11_H ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next() ); --reg_def V11_J ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(2) ); --reg_def V11_K ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(3) ); -- --reg_def V12 ( SOC, SOC, Op_VecA, 12, v12->as_VMReg() ); --reg_def V12_H ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next() ); --reg_def V12_J ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(2) ); --reg_def V12_K ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(3) ); -- --reg_def V13 ( SOC, SOC, Op_VecA, 13, v13->as_VMReg() ); --reg_def V13_H ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next() ); --reg_def V13_J ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(2) ); --reg_def V13_K ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(3) ); -- --reg_def V14 ( SOC, SOC, Op_VecA, 14, v14->as_VMReg() ); --reg_def V14_H ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next() ); --reg_def V14_J ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(2) ); --reg_def V14_K ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(3) ); -- --reg_def V15 ( SOC, SOC, Op_VecA, 15, v15->as_VMReg() ); --reg_def V15_H ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next() ); --reg_def V15_J ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(2) ); --reg_def V15_K ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(3) ); -- --reg_def V16 ( SOC, SOC, Op_VecA, 16, v16->as_VMReg() ); --reg_def V16_H ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next() ); --reg_def V16_J ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(2) ); --reg_def V16_K ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(3) ); -- --reg_def V17 ( SOC, SOC, Op_VecA, 17, v17->as_VMReg() ); --reg_def V17_H ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next() ); --reg_def V17_J ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(2) ); --reg_def V17_K ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(3) ); -- --reg_def V18 ( SOC, SOC, Op_VecA, 18, v18->as_VMReg() ); --reg_def V18_H ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next() ); --reg_def V18_J ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(2) ); --reg_def V18_K ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(3) ); -- --reg_def V19 ( SOC, SOC, Op_VecA, 19, v19->as_VMReg() ); --reg_def V19_H ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next() ); --reg_def V19_J ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(2) ); --reg_def V19_K ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(3) ); -- --reg_def V20 ( SOC, SOC, Op_VecA, 20, v20->as_VMReg() ); --reg_def V20_H ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next() ); --reg_def V20_J ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(2) ); --reg_def V20_K ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(3) ); -- --reg_def V21 ( SOC, SOC, Op_VecA, 21, v21->as_VMReg() ); --reg_def V21_H ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next() ); --reg_def V21_J ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(2) ); --reg_def V21_K ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(3) ); -- --reg_def V22 ( SOC, SOC, Op_VecA, 22, v22->as_VMReg() ); --reg_def V22_H ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next() ); --reg_def V22_J ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(2) ); --reg_def V22_K ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(3) ); -- --reg_def V23 ( SOC, SOC, Op_VecA, 23, v23->as_VMReg() ); --reg_def V23_H ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next() ); --reg_def V23_J ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(2) ); --reg_def V23_K ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(3) ); -- --reg_def V24 ( SOC, SOC, Op_VecA, 24, v24->as_VMReg() ); --reg_def V24_H ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next() ); --reg_def V24_J ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(2) ); --reg_def V24_K ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(3) ); -- --reg_def V25 ( SOC, SOC, Op_VecA, 25, v25->as_VMReg() ); --reg_def V25_H ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next() ); --reg_def V25_J ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(2) ); --reg_def V25_K ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(3) ); -- --reg_def V26 ( SOC, SOC, Op_VecA, 26, v26->as_VMReg() ); --reg_def V26_H ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next() ); --reg_def V26_J ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(2) ); --reg_def V26_K ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(3) ); -- --reg_def V27 ( SOC, SOC, Op_VecA, 27, v27->as_VMReg() ); --reg_def V27_H ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next() ); --reg_def V27_J ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(2) ); --reg_def V27_K ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(3) ); -- --reg_def V28 ( SOC, SOC, Op_VecA, 28, v28->as_VMReg() ); --reg_def V28_H ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next() ); --reg_def V28_J ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(2) ); --reg_def V28_K ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(3) ); -- --reg_def V29 ( SOC, SOC, Op_VecA, 29, v29->as_VMReg() ); --reg_def V29_H ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next() ); --reg_def V29_J ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(2) ); --reg_def V29_K ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(3) ); -- --reg_def V30 ( SOC, SOC, Op_VecA, 30, v30->as_VMReg() ); --reg_def V30_H ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next() ); --reg_def V30_J ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(2) ); --reg_def V30_K ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(3) ); -- --reg_def V31 ( SOC, SOC, Op_VecA, 31, v31->as_VMReg() ); --reg_def V31_H ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next() ); --reg_def V31_J ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(2) ); --reg_def V31_K ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(3) ); -- - // ---------------------------- - // Special Registers - // ---------------------------- -@@ -495,42 +324,7 @@ alloc_class chunk1( - F27, F27_H, - ); - --alloc_class chunk2( -- V0, V0_H, V0_J, V0_K, -- V1, V1_H, V1_J, V1_K, -- V2, V2_H, V2_J, V2_K, -- V3, V3_H, V3_J, V3_K, -- V4, V4_H, V4_J, V4_K, -- V5, V5_H, V5_J, V5_K, -- V6, V6_H, V6_J, V6_K, -- V7, V7_H, V7_J, V7_K, -- V8, V8_H, V8_J, V8_K, -- V9, V9_H, V9_J, V9_K, -- V10, V10_H, V10_J, V10_K, -- V11, V11_H, V11_J, V11_K, -- V12, V12_H, V12_J, V12_K, -- V13, V13_H, V13_J, V13_K, -- V14, V14_H, V14_J, V14_K, -- V15, V15_H, V15_J, V15_K, -- V16, V16_H, V16_J, V16_K, -- V17, V17_H, V17_J, V17_K, -- V18, V18_H, V18_J, V18_K, -- V19, V19_H, V19_J, V19_K, -- V20, V20_H, V20_J, V20_K, -- V21, V21_H, V21_J, V21_K, -- V22, V22_H, V22_J, V22_K, -- V23, V23_H, V23_J, V23_K, -- V24, V24_H, V24_J, V24_K, -- V25, V25_H, V25_J, V25_K, -- V26, V26_H, V26_J, V26_K, -- V27, V27_H, V27_J, V27_K, -- V28, V28_H, V28_J, V28_K, -- V29, V29_H, V29_J, V29_K, -- V30, V30_H, V30_J, V30_K, -- V31, V31_H, V31_J, V31_K, --); -- --alloc_class chunk3(RFLAGS); -+alloc_class chunk2(RFLAGS); - - //----------Architecture Description Register Classes-------------------------- - // Several register classes are automatically defined based upon information in -@@ -826,41 +620,6 @@ reg_class double_reg( - F31, F31_H - ); - --// Class for all RVV vector registers --reg_class vectora_reg( -- V1, V1_H, V1_J, V1_K, -- V2, V2_H, V2_J, V2_K, -- V3, V3_H, V3_J, V3_K, -- V4, V4_H, V4_J, V4_K, -- V5, V5_H, V5_J, V5_K, -- V6, V6_H, V6_J, V6_K, -- V7, V7_H, V7_J, V7_K, -- V8, V8_H, V8_J, V8_K, -- V9, V9_H, V9_J, V9_K, -- V10, V10_H, V10_J, V10_K, -- V11, V11_H, V11_J, V11_K, -- V12, V12_H, V12_J, V12_K, -- V13, V13_H, V13_J, V13_K, -- V14, V14_H, V14_J, V14_K, -- V15, V15_H, V15_J, V15_K, -- V16, V16_H, V16_J, V16_K, -- V17, V17_H, V17_J, V17_K, -- V18, V18_H, V18_J, V18_K, -- V19, V19_H, V19_J, V19_K, -- V20, V20_H, V20_J, V20_K, -- V21, V21_H, V21_J, V21_K, -- V22, V22_H, V22_J, V22_K, -- V23, V23_H, V23_J, V23_K, -- V24, V24_H, V24_J, V24_K, -- V25, V25_H, V25_J, V25_K, -- V26, V26_H, V26_J, V26_K, -- V27, V27_H, V27_J, V27_K, -- V28, V28_H, V28_J, V28_K, -- V29, V29_H, V29_J, V29_K, -- V30, V30_H, V30_J, V30_K, -- V31, V31_H, V31_J, V31_K --); -- - // Class for 64 bit register f0 - reg_class f0_reg( - F0, F0_H -@@ -881,31 +640,6 @@ reg_class f3_reg( - F3, F3_H - ); - --// class for vector register v1 --reg_class v1_reg( -- V1, V1_H, V1_J, V1_K --); -- --// class for vector register v2 --reg_class v2_reg( -- V2, V2_H, V2_J, V2_K --); -- --// class for vector register v3 --reg_class v3_reg( -- V3, V3_H, V3_J, V3_K --); -- --// class for vector register v4 --reg_class v4_reg( -- V4, V4_H, V4_J, V4_K --); -- --// class for vector register v5 --reg_class v5_reg( -- V5, V5_H, V5_J, V5_K --); -- - // class for condition codes - reg_class reg_flags(RFLAGS); - %} -@@ -1447,7 +1181,7 @@ const Pipeline * MachEpilogNode::pipeline() const { - - // Figure out which register class each belongs in: rc_int, rc_float or - // rc_stack. --enum RC { rc_bad, rc_int, rc_float, rc_vector, rc_stack }; -+enum RC { rc_bad, rc_int, rc_float, rc_stack }; - - static enum RC rc_class(OptoReg::Name reg) { - -@@ -1468,13 +1202,7 @@ static enum RC rc_class(OptoReg::Name reg) { - return rc_float; - } - -- // we have 32 vector register * 4 halves -- int slots_of_vector_registers = VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers; -- if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_vector_registers) { -- return rc_vector; -- } -- -- // Between vector regs & stack is the flags regs. -+ // Between float regs & stack is the flags regs. - assert(OptoReg::is_stack(reg), "blow up if spilling flags"); - - return rc_stack; -@@ -1512,30 +1240,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo - int src_offset = ra_->reg2offset(src_lo); - int dst_offset = ra_->reg2offset(dst_lo); - -- if (bottom_type()->isa_vect() != NULL) { -- uint ireg = ideal_reg(); -- if (ireg == Op_VecA && cbuf) { -- C2_MacroAssembler _masm(cbuf); -- Assembler::CompressibleRegion cr(&_masm); -- int vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); -- if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { -- // stack to stack -- __ spill_copy_vector_stack_to_stack(src_offset, dst_offset, -- vector_reg_size_in_bytes); -- } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_stack) { -- // vpr to stack -- __ spill(as_VectorRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo)); -- } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vector) { -- // stack to vpr -- __ unspill(as_VectorRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo)); -- } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_vector) { -- // vpr to vpr -- __ vmv1r_v(as_VectorRegister(Matcher::_regEncode[dst_lo]), as_VectorRegister(Matcher::_regEncode[src_lo])); -- } else { -- ShouldNotReachHere(); -- } -- } -- } else if (cbuf != NULL) { -+ if (cbuf != NULL) { - C2_MacroAssembler _masm(cbuf); - Assembler::CompressibleRegion cr(&_masm); - switch (src_lo_rc) { -@@ -1619,17 +1324,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo - } else { - st->print("%s", Matcher::regName[dst_lo]); - } -- if (bottom_type()->isa_vect() != NULL) { -- int vsize = 0; -- if (ideal_reg() == Op_VecA) { -- vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8; -- } else { -- ShouldNotReachHere(); -- } -- st->print("\t# vector spill size = %d", vsize); -- } else { -- st->print("\t# spill size = %d", is64 ? 64 : 32); -- } -+ st->print("\t# spill size = %d", is64 ? 64 : 32); - } - - return 0; -@@ -1796,14 +1491,6 @@ const bool Matcher::match_rule_supported(int opcode) { - } - break; - -- case Op_StrCompressedCopy: // fall through -- case Op_StrInflatedCopy: // fall through -- case Op_CountPositives: -- return UseRVV; -- -- case Op_EncodeISOArray: -- return UseRVV && SpecialEncodeISOArray; -- - case Op_PopCountI: - case Op_PopCountL: - return UsePopCountInstruction; -@@ -1821,37 +1508,15 @@ const bool Matcher::match_rule_supported(int opcode) { - } - - // Identify extra cases that we might want to provide match rules for vector nodes and --// other intrinsics guarded with vector length (vlen) and element type (bt). --const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { -- if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { -- return false; -- } -- -- return op_vec_supported(opcode); --} -- --const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { -+// other intrinsics guarded with vector length (vlen). -+const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { - return false; - } - --const RegMask* Matcher::predicate_reg_mask(void) { -- return NULL; --} -- --const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) { -- return NULL; --} -- --// Vector calling convention not yet implemented. --const bool Matcher::supports_vector_calling_convention(void) { -+const bool Matcher::has_predicated_vectors(void) { - return false; - } - --OptoRegPair Matcher::vector_return_value(uint ideal_reg) { -- Unimplemented(); -- return OptoRegPair(0, 0); --} -- - // Is this branch offset short enough that a short branch can be used? - // - // NOTE: If the platform does not provide any short branch variants, then -@@ -1877,11 +1542,6 @@ bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { - - // Vector width in bytes. - const int Matcher::vector_width_in_bytes(BasicType bt) { -- if (UseRVV) { -- // The MaxVectorSize should have been set by detecting RVV max vector register size when check UseRVV. -- // MaxVectorSize == VM_Version::_initial_vector_length -- return MaxVectorSize; -- } - return 0; - } - -@@ -1895,34 +1555,10 @@ const int Matcher::min_vector_size(const BasicType bt) { - - // Vector ideal reg. - const uint Matcher::vector_ideal_reg(int len) { -- assert(MaxVectorSize >= len, ""); -- if (UseRVV) { -- return Op_VecA; -- } -- - ShouldNotReachHere(); - return 0; - } - --const int Matcher::scalable_vector_reg_size(const BasicType bt) { -- return Matcher::max_vector_size(bt); --} -- --MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) { -- ShouldNotReachHere(); // generic vector operands not supported -- return NULL; --} -- --bool Matcher::is_reg2reg_move(MachNode* m) { -- ShouldNotReachHere(); // generic vector operands not supported -- return false; --} -- --bool Matcher::is_generic_vector(MachOper* opnd) { -- ShouldNotReachHere(); // generic vector operands not supported -- return false; --} -- - // Return whether or not this register is ever used as an argument. - // This function is used on startup to build the trampoline stubs in - // generateOptoStub. Registers not mentioned will be killed by the VM -@@ -3384,67 +3020,6 @@ operand fRegD() - interface(REG_INTER); - %} - --// Generic vector class. This will be used for --// all vector operands. --operand vReg() --%{ -- constraint(ALLOC_IN_RC(vectora_reg)); -- match(VecA); -- op_cost(0); -- format %{ %} -- interface(REG_INTER); --%} -- --operand vReg_V1() --%{ -- constraint(ALLOC_IN_RC(v1_reg)); -- match(VecA); -- match(vReg); -- op_cost(0); -- format %{ %} -- interface(REG_INTER); --%} -- --operand vReg_V2() --%{ -- constraint(ALLOC_IN_RC(v2_reg)); -- match(VecA); -- match(vReg); -- op_cost(0); -- format %{ %} -- interface(REG_INTER); --%} -- --operand vReg_V3() --%{ -- constraint(ALLOC_IN_RC(v3_reg)); -- match(VecA); -- match(vReg); -- op_cost(0); -- format %{ %} -- interface(REG_INTER); --%} -- --operand vReg_V4() --%{ -- constraint(ALLOC_IN_RC(v4_reg)); -- match(VecA); -- match(vReg); -- op_cost(0); -- format %{ %} -- interface(REG_INTER); --%} -- --operand vReg_V5() --%{ -- constraint(ALLOC_IN_RC(v5_reg)); -- match(VecA); -- match(vReg); -- op_cost(0); -- format %{ %} -- interface(REG_INTER); --%} -- - // Java Thread Register - operand javaThread_RegP(iRegP reg) - %{ -@@ -7939,17 +7514,6 @@ instruct castDD(fRegD dst) - ins_pipe(pipe_class_empty); - %} - --instruct castVV(vReg dst) --%{ -- match(Set dst (CastVV dst)); -- -- size(0); -- format %{ "# castVV of $dst" %} -- ins_encode(/* empty encoding */); -- ins_cost(0); -- ins_pipe(pipe_class_empty); --%} -- - // ============================================================================ - // Convert Instructions - -@@ -10076,7 +9640,7 @@ instruct partialSubtypeCheckVsZero(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 su - instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, - iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) - %{ -- predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU); -+ predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU); - match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); - effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); - -@@ -10094,7 +9658,7 @@ instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R - instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, - iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) - %{ -- predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL); -+ predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL); - match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); - effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); - -@@ -10111,7 +9675,7 @@ instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R - instruct string_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, - iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) - %{ -- predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL); -+ predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL); - match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); - effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); - -@@ -10129,7 +9693,7 @@ instruct string_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_ - iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, - rFlagsReg cr) - %{ -- predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU); -+ predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU); - match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); - effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); - -@@ -10275,7 +9839,7 @@ instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, - iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) - %{ - match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); -- predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); -+ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U); - effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, - TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); - -@@ -10294,7 +9858,7 @@ instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, - iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) - %{ - match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); -- predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); -+ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L); - effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, - TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); - -@@ -10310,7 +9874,6 @@ instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, - // clearing of an array - instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy) - %{ -- predicate(!UseRVV); - match(Set dummy (ClearArray cnt base)); - effect(USE_KILL cnt, USE_KILL base); - -@@ -10330,8 +9893,7 @@ instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy) - - instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr) - %{ -- predicate(!UseRVV && (uint64_t)n->in(2)->get_long() -- < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)); -+ predicate((uint64_t)n->in(2)->get_long() < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)); - match(Set dummy (ClearArray cnt base)); - effect(USE_KILL base, KILL cr); - -@@ -10348,7 +9910,7 @@ instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg - instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, - iRegI_R10 result, rFlagsReg cr) - %{ -- predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); -+ predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); - match(Set result (StrEquals (Binary str1 str2) cnt)); - effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr); - -@@ -10364,7 +9926,7 @@ instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, - instruct string_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, - iRegI_R10 result, rFlagsReg cr) - %{ -- predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); -+ predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); - match(Set result (StrEquals (Binary str1 str2) cnt)); - effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr); - -@@ -10381,7 +9943,7 @@ instruct array_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, - iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3, - iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr) - %{ -- predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); -+ predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); - match(Set result (AryEq ary1 ary2)); - effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr); - -@@ -10398,7 +9960,7 @@ instruct array_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, - iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3, - iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr) - %{ -- predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); -+ predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); - match(Set result (AryEq ary1 ary2)); - effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr); - -diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad -deleted file mode 100644 -index 3828e096b21..00000000000 ---- a/src/hotspot/cpu/riscv/riscv_v.ad -+++ /dev/null -@@ -1,2065 +0,0 @@ --// --// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. --// Copyright (c) 2020, Arm Limited. All rights reserved. --// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. --// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. --// --// This code is free software; you can redistribute it and/or modify it --// under the terms of the GNU General Public License version 2 only, as --// published by the Free Software Foundation. --// --// This code is distributed in the hope that it will be useful, but WITHOUT --// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or --// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --// version 2 for more details (a copy is included in the LICENSE file that --// accompanied this code). --// --// You should have received a copy of the GNU General Public License version --// 2 along with this work; if not, write to the Free Software Foundation, --// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. --// --// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA --// or visit www.oracle.com if you need additional information or have any --// questions. --// --// -- --// RISCV Vector Extension Architecture Description File -- --opclass vmemA(indirect); -- --source_hpp %{ -- bool op_vec_supported(int opcode); --%} -- --source %{ -- -- static void loadStore(C2_MacroAssembler masm, bool is_store, -- VectorRegister reg, BasicType bt, Register base) { -- Assembler::SEW sew = Assembler::elemtype_to_sew(bt); -- masm.vsetvli(t0, x0, sew); -- if (is_store) { -- masm.vsex_v(reg, base, sew); -- } else { -- masm.vlex_v(reg, base, sew); -- } -- } -- -- bool op_vec_supported(int opcode) { -- switch (opcode) { -- // No multiply reduction instructions -- case Op_MulReductionVD: -- case Op_MulReductionVF: -- case Op_MulReductionVI: -- case Op_MulReductionVL: -- // Others -- case Op_Extract: -- case Op_ExtractB: -- case Op_ExtractC: -- case Op_ExtractD: -- case Op_ExtractF: -- case Op_ExtractI: -- case Op_ExtractL: -- case Op_ExtractS: -- case Op_ExtractUB: -- // Vector API specific -- case Op_AndReductionV: -- case Op_OrReductionV: -- case Op_XorReductionV: -- case Op_LoadVectorGather: -- case Op_StoreVectorScatter: -- case Op_VectorBlend: -- case Op_VectorCast: -- case Op_VectorCastB2X: -- case Op_VectorCastD2X: -- case Op_VectorCastF2X: -- case Op_VectorCastI2X: -- case Op_VectorCastL2X: -- case Op_VectorCastS2X: -- case Op_VectorInsert: -- case Op_VectorLoadConst: -- case Op_VectorLoadMask: -- case Op_VectorLoadShuffle: -- case Op_VectorMaskCmp: -- case Op_VectorRearrange: -- case Op_VectorReinterpret: -- case Op_VectorStoreMask: -- case Op_VectorTest: -- return false; -- default: -- return UseRVV; -- } -- } -- --%} -- --definitions %{ -- int_def VEC_COST (200, 200); --%} -- --// All VEC instructions -- --// vector load/store --instruct loadV(vReg dst, vmemA mem) %{ -- match(Set dst (LoadVector mem)); -- ins_cost(VEC_COST); -- format %{ "vle $dst, $mem\t#@loadV" %} -- ins_encode %{ -- VectorRegister dst_reg = as_VectorRegister($dst$$reg); -- loadStore(C2_MacroAssembler(&cbuf), false, dst_reg, -- Matcher::vector_element_basic_type(this), as_Register($mem$$base)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct storeV(vReg src, vmemA mem) %{ -- match(Set mem (StoreVector mem src)); -- ins_cost(VEC_COST); -- format %{ "vse $src, $mem\t#@storeV" %} -- ins_encode %{ -- VectorRegister src_reg = as_VectorRegister($src$$reg); -- loadStore(C2_MacroAssembler(&cbuf), true, src_reg, -- Matcher::vector_element_basic_type(this, $src), as_Register($mem$$base)); -- %} -- ins_pipe(pipe_slow); --%} -- --// vector abs -- --instruct vabsB(vReg dst, vReg src, vReg tmp) %{ -- match(Set dst (AbsVB src)); -- ins_cost(VEC_COST); -- effect(TEMP tmp); -- format %{ "vrsub.vi $tmp, 0, $src\t#@vabsB\n\t" -- "vmax.vv $dst, $tmp, $src" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e8); -- __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); -- __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vabsS(vReg dst, vReg src, vReg tmp) %{ -- match(Set dst (AbsVS src)); -- ins_cost(VEC_COST); -- effect(TEMP tmp); -- format %{ "vrsub.vi $tmp, 0, $src\t#@vabsS\n\t" -- "vmax.vv $dst, $tmp, $src" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e16); -- __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); -- __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vabsI(vReg dst, vReg src, vReg tmp) %{ -- match(Set dst (AbsVI src)); -- ins_cost(VEC_COST); -- effect(TEMP tmp); -- format %{ "vrsub.vi $tmp, 0, $src\t#@vabsI\n\t" -- "vmax.vv $dst, $tmp, $src" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); -- __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vabsL(vReg dst, vReg src, vReg tmp) %{ -- match(Set dst (AbsVL src)); -- ins_cost(VEC_COST); -- effect(TEMP tmp); -- format %{ "vrsub.vi $tmp, 0, $src\t#@vabsL\n\t" -- "vmax.vv $dst, $tmp, $src" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); -- __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vabsF(vReg dst, vReg src) %{ -- match(Set dst (AbsVF src)); -- ins_cost(VEC_COST); -- format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsF" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vabsD(vReg dst, vReg src) %{ -- match(Set dst (AbsVD src)); -- ins_cost(VEC_COST); -- format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsD" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// vector add -- --instruct vaddB(vReg dst, vReg src1, vReg src2) %{ -- match(Set dst (AddVB src1 src2)); -- ins_cost(VEC_COST); -- format %{ "vadd.vv $dst, $src1, $src2\t#@vaddB" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e8); -- __ vadd_vv(as_VectorRegister($dst$$reg), -- as_VectorRegister($src1$$reg), -- as_VectorRegister($src2$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vaddS(vReg dst, vReg src1, vReg src2) %{ -- match(Set dst (AddVS src1 src2)); -- ins_cost(VEC_COST); -- format %{ "vadd.vv $dst, $src1, $src2\t#@vaddS" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e16); -- __ vadd_vv(as_VectorRegister($dst$$reg), -- as_VectorRegister($src1$$reg), -- as_VectorRegister($src2$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vaddI(vReg dst, vReg src1, vReg src2) %{ -- match(Set dst (AddVI src1 src2)); -- ins_cost(VEC_COST); -- format %{ "vadd.vv $dst, $src1, $src2\t#@vaddI" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vadd_vv(as_VectorRegister($dst$$reg), -- as_VectorRegister($src1$$reg), -- as_VectorRegister($src2$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vaddL(vReg dst, vReg src1, vReg src2) %{ -- match(Set dst (AddVL src1 src2)); -- ins_cost(VEC_COST); -- format %{ "vadd.vv $dst, $src1, $src2\t#@vaddL" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vadd_vv(as_VectorRegister($dst$$reg), -- as_VectorRegister($src1$$reg), -- as_VectorRegister($src2$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vaddF(vReg dst, vReg src1, vReg src2) %{ -- match(Set dst (AddVF src1 src2)); -- ins_cost(VEC_COST); -- format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddF" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vfadd_vv(as_VectorRegister($dst$$reg), -- as_VectorRegister($src1$$reg), -- as_VectorRegister($src2$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vaddD(vReg dst, vReg src1, vReg src2) %{ -- match(Set dst (AddVD src1 src2)); -- ins_cost(VEC_COST); -- format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddD" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vfadd_vv(as_VectorRegister($dst$$reg), -- as_VectorRegister($src1$$reg), -- as_VectorRegister($src2$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// vector and -- --instruct vand(vReg dst, vReg src1, vReg src2) %{ -- match(Set dst (AndV src1 src2)); -- ins_cost(VEC_COST); -- format %{ "vand.vv $dst, $src1, $src2\t#@vand" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vand_vv(as_VectorRegister($dst$$reg), -- as_VectorRegister($src1$$reg), -- as_VectorRegister($src2$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// vector or -- --instruct vor(vReg dst, vReg src1, vReg src2) %{ -- match(Set dst (OrV src1 src2)); -- ins_cost(VEC_COST); -- format %{ "vor.vv $dst, $src1, $src2\t#@vor" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vor_vv(as_VectorRegister($dst$$reg), -- as_VectorRegister($src1$$reg), -- as_VectorRegister($src2$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// vector xor -- --instruct vxor(vReg dst, vReg src1, vReg src2) %{ -- match(Set dst (XorV src1 src2)); -- ins_cost(VEC_COST); -- format %{ "vxor.vv $dst, $src1, $src2\t#@vxor" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vxor_vv(as_VectorRegister($dst$$reg), -- as_VectorRegister($src1$$reg), -- as_VectorRegister($src2$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// vector float div -- --instruct vdivF(vReg dst, vReg src1, vReg src2) %{ -- match(Set dst (DivVF src1 src2)); -- ins_cost(VEC_COST); -- format %{ "vfdiv.vv $dst, $src1, $src2\t#@vdivF" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vfdiv_vv(as_VectorRegister($dst$$reg), -- as_VectorRegister($src1$$reg), -- as_VectorRegister($src2$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vdivD(vReg dst, vReg src1, vReg src2) %{ -- match(Set dst (DivVD src1 src2)); -- ins_cost(VEC_COST); -- format %{ "vfdiv.vv $dst, $src1, $src2\t#@vdivD" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vfdiv_vv(as_VectorRegister($dst$$reg), -- as_VectorRegister($src1$$reg), -- as_VectorRegister($src2$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// vector integer max/min -- --instruct vmax(vReg dst, vReg src1, vReg src2) %{ -- predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT && -- n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE); -- match(Set dst (MaxV src1 src2)); -- ins_cost(VEC_COST); -- format %{ "vmax.vv $dst, $src1, $src2\t#@vmax" %} -- ins_encode %{ -- BasicType bt = Matcher::vector_element_basic_type(this); -- Assembler::SEW sew = Assembler::elemtype_to_sew(bt); -- __ vsetvli(t0, x0, sew); -- __ vmax_vv(as_VectorRegister($dst$$reg), -- as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vmin(vReg dst, vReg src1, vReg src2) %{ -- predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT && -- n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE); -- match(Set dst (MinV src1 src2)); -- ins_cost(VEC_COST); -- format %{ "vmin.vv $dst, $src1, $src2\t#@vmin" %} -- ins_encode %{ -- BasicType bt = Matcher::vector_element_basic_type(this); -- Assembler::SEW sew = Assembler::elemtype_to_sew(bt); -- __ vsetvli(t0, x0, sew); -- __ vmin_vv(as_VectorRegister($dst$$reg), -- as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// vector float-point max/min -- --instruct vmaxF(vReg dst, vReg src1, vReg src2) %{ -- predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); -- match(Set dst (MaxV src1 src2)); -- effect(TEMP_DEF dst); -- ins_cost(VEC_COST); -- format %{ "vmaxF $dst, $src1, $src2\t#@vmaxF" %} -- ins_encode %{ -- __ minmax_FD_v(as_VectorRegister($dst$$reg), -- as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), -- false /* is_double */, false /* is_min */); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vmaxD(vReg dst, vReg src1, vReg src2) %{ -- predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); -- match(Set dst (MaxV src1 src2)); -- effect(TEMP_DEF dst); -- ins_cost(VEC_COST); -- format %{ "vmaxD $dst, $src1, $src2\t#@vmaxD" %} -- ins_encode %{ -- __ minmax_FD_v(as_VectorRegister($dst$$reg), -- as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), -- true /* is_double */, false /* is_min */); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vminF(vReg dst, vReg src1, vReg src2) %{ -- predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); -- match(Set dst (MinV src1 src2)); -- effect(TEMP_DEF dst); -- ins_cost(VEC_COST); -- format %{ "vminF $dst, $src1, $src2\t#@vminF" %} -- ins_encode %{ -- __ minmax_FD_v(as_VectorRegister($dst$$reg), -- as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), -- false /* is_double */, true /* is_min */); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vminD(vReg dst, vReg src1, vReg src2) %{ -- predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); -- match(Set dst (MinV src1 src2)); -- effect(TEMP_DEF dst); -- ins_cost(VEC_COST); -- format %{ "vminD $dst, $src1, $src2\t#@vminD" %} -- ins_encode %{ -- __ minmax_FD_v(as_VectorRegister($dst$$reg), -- as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), -- true /* is_double */, true /* is_min */); -- %} -- ins_pipe(pipe_slow); --%} -- --// vector fmla -- --// dst_src1 = dst_src1 + src2 * src3 --instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{ -- predicate(UseFMA); -- match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3))); -- ins_cost(VEC_COST); -- format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaF" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vfmacc_vv(as_VectorRegister($dst_src1$$reg), -- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// dst_src1 = dst_src1 + src2 * src3 --instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{ -- predicate(UseFMA); -- match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3))); -- ins_cost(VEC_COST); -- format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaD" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vfmacc_vv(as_VectorRegister($dst_src1$$reg), -- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// vector fmls -- --// dst_src1 = dst_src1 + -src2 * src3 --// dst_src1 = dst_src1 + src2 * -src3 --instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{ -- predicate(UseFMA); -- match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3))); -- match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3)))); -- ins_cost(VEC_COST); -- format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsF" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg), -- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// dst_src1 = dst_src1 + -src2 * src3 --// dst_src1 = dst_src1 + src2 * -src3 --instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{ -- predicate(UseFMA); -- match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3))); -- match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3)))); -- ins_cost(VEC_COST); -- format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsD" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg), -- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// vector fnmla -- --// dst_src1 = -dst_src1 + -src2 * src3 --// dst_src1 = -dst_src1 + src2 * -src3 --instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{ -- predicate(UseFMA); -- match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3))); -- match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3)))); -- ins_cost(VEC_COST); -- format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaF" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg), -- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// dst_src1 = -dst_src1 + -src2 * src3 --// dst_src1 = -dst_src1 + src2 * -src3 --instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{ -- predicate(UseFMA); -- match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3))); -- match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3)))); -- ins_cost(VEC_COST); -- format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaD" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg), -- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// vector fnmls -- --// dst_src1 = -dst_src1 + src2 * src3 --instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{ -- predicate(UseFMA); -- match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3))); -- ins_cost(VEC_COST); -- format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsF" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vfmsac_vv(as_VectorRegister($dst_src1$$reg), -- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// dst_src1 = -dst_src1 + src2 * src3 --instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{ -- predicate(UseFMA); -- match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3))); -- ins_cost(VEC_COST); -- format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsD" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vfmsac_vv(as_VectorRegister($dst_src1$$reg), -- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// vector mla -- --// dst_src1 = dst_src1 + src2 * src3 --instruct vmlaB(vReg dst_src1, vReg src2, vReg src3) %{ -- match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3))); -- ins_cost(VEC_COST); -- format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaB" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e8); -- __ vmacc_vv(as_VectorRegister($dst_src1$$reg), -- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// dst_src1 = dst_src1 + src2 * src3 --instruct vmlaS(vReg dst_src1, vReg src2, vReg src3) %{ -- match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3))); -- ins_cost(VEC_COST); -- format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaS" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e16); -- __ vmacc_vv(as_VectorRegister($dst_src1$$reg), -- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// dst_src1 = dst_src1 + src2 * src3 --instruct vmlaI(vReg dst_src1, vReg src2, vReg src3) %{ -- match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3))); -- ins_cost(VEC_COST); -- format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaI" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vmacc_vv(as_VectorRegister($dst_src1$$reg), -- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// dst_src1 = dst_src1 + src2 * src3 --instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) %{ -- match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3))); -- ins_cost(VEC_COST); -- format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaL" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vmacc_vv(as_VectorRegister($dst_src1$$reg), -- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// vector mls -- --// dst_src1 = dst_src1 - src2 * src3 --instruct vmlsB(vReg dst_src1, vReg src2, vReg src3) %{ -- match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3))); -- ins_cost(VEC_COST); -- format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsB" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e8); -- __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), -- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// dst_src1 = dst_src1 - src2 * src3 --instruct vmlsS(vReg dst_src1, vReg src2, vReg src3) %{ -- match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3))); -- ins_cost(VEC_COST); -- format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsS" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e16); -- __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), -- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// dst_src1 = dst_src1 - src2 * src3 --instruct vmlsI(vReg dst_src1, vReg src2, vReg src3) %{ -- match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3))); -- ins_cost(VEC_COST); -- format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsI" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), -- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// dst_src1 = dst_src1 - src2 * src3 --instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) %{ -- match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3))); -- ins_cost(VEC_COST); -- format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsL" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), -- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// vector mul -- --instruct vmulB(vReg dst, vReg src1, vReg src2) %{ -- match(Set dst (MulVB src1 src2)); -- ins_cost(VEC_COST); -- format %{ "vmul.vv $dst, $src1, $src2\t#@vmulB" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e8); -- __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -- as_VectorRegister($src2$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vmulS(vReg dst, vReg src1, vReg src2) %{ -- match(Set dst (MulVS src1 src2)); -- ins_cost(VEC_COST); -- format %{ "vmul.vv $dst, $src1, $src2\t#@vmulS" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e16); -- __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -- as_VectorRegister($src2$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vmulI(vReg dst, vReg src1, vReg src2) %{ -- match(Set dst (MulVI src1 src2)); -- ins_cost(VEC_COST); -- format %{ "vmul.vv $dst, $src1, $src2\t#@vmulI" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -- as_VectorRegister($src2$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vmulL(vReg dst, vReg src1, vReg src2) %{ -- match(Set dst (MulVL src1 src2)); -- ins_cost(VEC_COST); -- format %{ "vmul.vv $dst, $src1, $src2\t#@vmulL" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -- as_VectorRegister($src2$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vmulF(vReg dst, vReg src1, vReg src2) %{ -- match(Set dst (MulVF src1 src2)); -- ins_cost(VEC_COST); -- format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulF" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -- as_VectorRegister($src2$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vmulD(vReg dst, vReg src1, vReg src2) %{ -- match(Set dst (MulVD src1 src2)); -- ins_cost(VEC_COST); -- format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulD" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -- as_VectorRegister($src2$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// vector fneg -- --instruct vnegF(vReg dst, vReg src) %{ -- match(Set dst (NegVF src)); -- ins_cost(VEC_COST); -- format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegF" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vnegD(vReg dst, vReg src) %{ -- match(Set dst (NegVD src)); -- ins_cost(VEC_COST); -- format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegD" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// popcount vector -- --instruct vpopcountI(iRegINoSp dst, vReg src) %{ -- match(Set dst (PopCountVI src)); -- format %{ "vpopc.m $dst, $src\t#@vpopcountI" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vpopc_m(as_Register($dst$$reg), as_VectorRegister($src$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// vector add reduction -- --instruct reduce_addB(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ -- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); -- match(Set dst (AddReductionVI src1 src2)); -- effect(TEMP tmp); -- ins_cost(VEC_COST); -- format %{ "vmv.s.x $tmp, $src1\t#@reduce_addB\n\t" -- "vredsum.vs $tmp, $src2, $tmp\n\t" -- "vmv.x.s $dst, $tmp" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e8); -- __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); -- __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), -- as_VectorRegister($tmp$$reg)); -- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct reduce_addS(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ -- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); -- match(Set dst (AddReductionVI src1 src2)); -- effect(TEMP tmp); -- ins_cost(VEC_COST); -- format %{ "vmv.s.x $tmp, $src1\t#@reduce_addS\n\t" -- "vredsum.vs $tmp, $src2, $tmp\n\t" -- "vmv.x.s $dst, $tmp" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e16); -- __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); -- __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), -- as_VectorRegister($tmp$$reg)); -- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ -- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); -- match(Set dst (AddReductionVI src1 src2)); -- effect(TEMP tmp); -- ins_cost(VEC_COST); -- format %{ "vmv.s.x $tmp, $src1\t#@reduce_addI\n\t" -- "vredsum.vs $tmp, $src2, $tmp\n\t" -- "vmv.x.s $dst, $tmp" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); -- __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), -- as_VectorRegister($tmp$$reg)); -- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ -- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); -- match(Set dst (AddReductionVL src1 src2)); -- effect(TEMP tmp); -- ins_cost(VEC_COST); -- format %{ "vmv.s.x $tmp, $src1\t#@reduce_addL\n\t" -- "vredsum.vs $tmp, $src2, $tmp\n\t" -- "vmv.x.s $dst, $tmp" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); -- __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), -- as_VectorRegister($tmp$$reg)); -- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct reduce_addF(fRegF src1_dst, vReg src2, vReg tmp) %{ -- match(Set src1_dst (AddReductionVF src1_dst src2)); -- effect(TEMP tmp); -- ins_cost(VEC_COST); -- format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addF\n\t" -- "vfredosum.vs $tmp, $src2, $tmp\n\t" -- "vfmv.f.s $src1_dst, $tmp" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister); -- __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), -- as_VectorRegister($tmp$$reg)); -- __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct reduce_addD(fRegD src1_dst, vReg src2, vReg tmp) %{ -- match(Set src1_dst (AddReductionVD src1_dst src2)); -- effect(TEMP tmp); -- ins_cost(VEC_COST); -- format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addD\n\t" -- "vfredosum.vs $tmp, $src2, $tmp\n\t" -- "vfmv.f.s $src1_dst, $tmp" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister); -- __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), -- as_VectorRegister($tmp$$reg)); -- __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// vector integer max reduction --instruct vreduce_maxB(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{ -- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); -- match(Set dst (MaxReductionV src1 src2)); -- ins_cost(VEC_COST); -- effect(TEMP tmp); -- format %{ "vreduce_maxB $dst, $src1, $src2, $tmp" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e8); -- __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg)); -- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); -- Label Ldone; -- __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone); -- __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); -- __ bind(Ldone); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vreduce_maxS(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{ -- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); -- match(Set dst (MaxReductionV src1 src2)); -- ins_cost(VEC_COST); -- effect(TEMP tmp); -- format %{ "vreduce_maxS $dst, $src1, $src2, $tmp" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e16); -- __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg)); -- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); -- Label Ldone; -- __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone); -- __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); -- __ bind(Ldone); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vreduce_maxI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ -- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); -- match(Set dst (MaxReductionV src1 src2)); -- ins_cost(VEC_COST); -- effect(TEMP tmp); -- format %{ "vreduce_maxI $dst, $src1, $src2, $tmp" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); -- __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg)); -- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vreduce_maxL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ -- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); -- match(Set dst (MaxReductionV src1 src2)); -- ins_cost(VEC_COST); -- effect(TEMP tmp); -- format %{ "vreduce_maxL $dst, $src1, $src2, $tmp" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); -- __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg)); -- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// vector integer min reduction --instruct vreduce_minB(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{ -- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); -- match(Set dst (MinReductionV src1 src2)); -- ins_cost(VEC_COST); -- effect(TEMP tmp); -- format %{ "vreduce_minB $dst, $src1, $src2, $tmp" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e8); -- __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg)); -- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); -- Label Ldone; -- __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone); -- __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); -- __ bind(Ldone); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vreduce_minS(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{ -- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); -- match(Set dst (MinReductionV src1 src2)); -- ins_cost(VEC_COST); -- effect(TEMP tmp); -- format %{ "vreduce_minS $dst, $src1, $src2, $tmp" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e16); -- __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg)); -- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); -- Label Ldone; -- __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone); -- __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); -- __ bind(Ldone); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vreduce_minI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ -- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); -- match(Set dst (MinReductionV src1 src2)); -- ins_cost(VEC_COST); -- effect(TEMP tmp); -- format %{ "vreduce_minI $dst, $src1, $src2, $tmp" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); -- __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg)); -- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vreduce_minL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ -- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); -- match(Set dst (MinReductionV src1 src2)); -- ins_cost(VEC_COST); -- effect(TEMP tmp); -- format %{ "vreduce_minL $dst, $src1, $src2, $tmp" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); -- __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg)); -- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// vector float max reduction -- --instruct vreduce_maxF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{ -- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); -- match(Set dst (MaxReductionV src1 src2)); -- ins_cost(VEC_COST); -- effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); -- format %{ "reduce_maxF $dst, $src1, $src2, $tmp1, $tmp2" %} -- ins_encode %{ -- __ reduce_minmax_FD_v($dst$$FloatRegister, -- $src1$$FloatRegister, as_VectorRegister($src2$$reg), -- as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), -- false /* is_double */, false /* is_min */); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vreduce_maxD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{ -- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); -- match(Set dst (MaxReductionV src1 src2)); -- ins_cost(VEC_COST); -- effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); -- format %{ "reduce_maxD $dst, $src1, $src2, $tmp1, $tmp2" %} -- ins_encode %{ -- __ reduce_minmax_FD_v($dst$$FloatRegister, -- $src1$$FloatRegister, as_VectorRegister($src2$$reg), -- as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), -- true /* is_double */, false /* is_min */); -- %} -- ins_pipe(pipe_slow); --%} -- --// vector float min reduction -- --instruct vreduce_minF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{ -- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); -- match(Set dst (MinReductionV src1 src2)); -- ins_cost(VEC_COST); -- effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); -- format %{ "reduce_minF $dst, $src1, $src2, $tmp1, $tmp2" %} -- ins_encode %{ -- __ reduce_minmax_FD_v($dst$$FloatRegister, -- $src1$$FloatRegister, as_VectorRegister($src2$$reg), -- as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), -- false /* is_double */, true /* is_min */); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vreduce_minD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{ -- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); -- match(Set dst (MinReductionV src1 src2)); -- ins_cost(VEC_COST); -- effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); -- format %{ "reduce_minD $dst, $src1, $src2, $tmp1, $tmp2" %} -- ins_encode %{ -- __ reduce_minmax_FD_v($dst$$FloatRegister, -- $src1$$FloatRegister, as_VectorRegister($src2$$reg), -- as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), -- true /* is_double */, true /* is_min */); -- %} -- ins_pipe(pipe_slow); --%} -- --// vector Math.rint, floor, ceil -- --instruct vroundD(vReg dst, vReg src, immI rmode) %{ -- predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); -- match(Set dst (RoundDoubleModeV src rmode)); -- format %{ "vroundD $dst, $src, $rmode" %} -- ins_encode %{ -- switch ($rmode$$constant) { -- case RoundDoubleModeNode::rmode_rint: -- __ csrwi(CSR_FRM, C2_MacroAssembler::rne); -- __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); -- break; -- case RoundDoubleModeNode::rmode_floor: -- __ csrwi(CSR_FRM, C2_MacroAssembler::rdn); -- __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); -- break; -- case RoundDoubleModeNode::rmode_ceil: -- __ csrwi(CSR_FRM, C2_MacroAssembler::rup); -- __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); -- break; -- default: -- ShouldNotReachHere(); -- break; -- } -- %} -- ins_pipe(pipe_slow); --%} -- --// vector replicate -- --instruct replicateB(vReg dst, iRegIorL2I src) %{ -- match(Set dst (ReplicateB src)); -- ins_cost(VEC_COST); -- format %{ "vmv.v.x $dst, $src\t#@replicateB" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e8); -- __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct replicateS(vReg dst, iRegIorL2I src) %{ -- match(Set dst (ReplicateS src)); -- ins_cost(VEC_COST); -- format %{ "vmv.v.x $dst, $src\t#@replicateS" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e16); -- __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct replicateI(vReg dst, iRegIorL2I src) %{ -- match(Set dst (ReplicateI src)); -- ins_cost(VEC_COST); -- format %{ "vmv.v.x $dst, $src\t#@replicateI" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct replicateL(vReg dst, iRegL src) %{ -- match(Set dst (ReplicateL src)); -- ins_cost(VEC_COST); -- format %{ "vmv.v.x $dst, $src\t#@replicateL" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct replicateB_imm5(vReg dst, immI5 con) %{ -- match(Set dst (ReplicateB con)); -- ins_cost(VEC_COST); -- format %{ "vmv.v.i $dst, $con\t#@replicateB_imm5" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e8); -- __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct replicateS_imm5(vReg dst, immI5 con) %{ -- match(Set dst (ReplicateS con)); -- ins_cost(VEC_COST); -- format %{ "vmv.v.i $dst, $con\t#@replicateS_imm5" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e16); -- __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct replicateI_imm5(vReg dst, immI5 con) %{ -- match(Set dst (ReplicateI con)); -- ins_cost(VEC_COST); -- format %{ "vmv.v.i $dst, $con\t#@replicateI_imm5" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct replicateL_imm5(vReg dst, immL5 con) %{ -- match(Set dst (ReplicateL con)); -- ins_cost(VEC_COST); -- format %{ "vmv.v.i $dst, $con\t#@replicateL_imm5" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct replicateF(vReg dst, fRegF src) %{ -- match(Set dst (ReplicateF src)); -- ins_cost(VEC_COST); -- format %{ "vfmv.v.f $dst, $src\t#@replicateF" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct replicateD(vReg dst, fRegD src) %{ -- match(Set dst (ReplicateD src)); -- ins_cost(VEC_COST); -- format %{ "vfmv.v.f $dst, $src\t#@replicateD" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister); -- %} -- ins_pipe(pipe_slow); --%} -- --// vector shift -- --instruct vasrB(vReg dst, vReg src, vReg shift) %{ -- match(Set dst (RShiftVB src shift)); -- ins_cost(VEC_COST); -- effect(TEMP_DEF dst); -- format %{ "vmsgtu.vi v0, $shift 7\t#@vasrB\n\t" -- "vsra.vi $dst, $src, 7, Assembler::v0_t\n\t" -- "vmnot.m v0, v0\n\t" -- "vsra.vv $dst, $src, $shift, Assembler::v0_t" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e8); -- // if shift > BitsPerByte - 1, clear the low BitsPerByte - 1 bits -- __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); -- __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- BitsPerByte - 1, Assembler::v0_t); -- // otherwise, shift -- __ vmnot_m(v0, v0); -- __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- as_VectorRegister($shift$$reg), Assembler::v0_t); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vasrS(vReg dst, vReg src, vReg shift) %{ -- match(Set dst (RShiftVS src shift)); -- ins_cost(VEC_COST); -- effect(TEMP_DEF dst); -- format %{ "vmsgtu.vi v0, $shift, 15\t#@vasrS\n\t" -- "vsra.vi $dst, $src, 15, Assembler::v0_t\n\t" -- "vmnot.m v0, v0\n\t" -- "vsra.vv $dst, $src, $shift, Assembler::v0_t" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e16); -- // if shift > BitsPerShort - 1, clear the low BitsPerShort - 1 bits -- __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); -- __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- BitsPerShort - 1, Assembler::v0_t); -- // otherwise, shift -- __ vmnot_m(v0, v0); -- __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- as_VectorRegister($shift$$reg), Assembler::v0_t); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vasrI(vReg dst, vReg src, vReg shift) %{ -- match(Set dst (RShiftVI src shift)); -- ins_cost(VEC_COST); -- format %{ "vsra.vv $dst, $src, $shift\t#@vasrI" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- as_VectorRegister($shift$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vasrL(vReg dst, vReg src, vReg shift) %{ -- match(Set dst (RShiftVL src shift)); -- ins_cost(VEC_COST); -- format %{ "vsra.vv $dst, $src, $shift\t#@vasrL" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- as_VectorRegister($shift$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vlslB(vReg dst, vReg src, vReg shift) %{ -- match(Set dst (LShiftVB src shift)); -- ins_cost(VEC_COST); -- effect( TEMP_DEF dst); -- format %{ "vmsgtu.vi v0, $shift, 7\t#@vlslB\n\t" -- "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" -- "vmnot.m v0, v0\n\t" -- "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e8); -- // if shift > BitsPerByte - 1, clear the element -- __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); -- __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- as_VectorRegister($src$$reg), Assembler::v0_t); -- // otherwise, shift -- __ vmnot_m(v0, v0); -- __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- as_VectorRegister($shift$$reg), Assembler::v0_t); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vlslS(vReg dst, vReg src, vReg shift) %{ -- match(Set dst (LShiftVS src shift)); -- ins_cost(VEC_COST); -- effect(TEMP_DEF dst); -- format %{ "vmsgtu.vi v0, $shift, 15\t#@vlslS\n\t" -- "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" -- "vmnot.m v0, v0\n\t" -- "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e16); -- // if shift > BitsPerShort - 1, clear the element -- __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); -- __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- as_VectorRegister($src$$reg), Assembler::v0_t); -- // otherwise, shift -- __ vmnot_m(v0, v0); -- __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- as_VectorRegister($shift$$reg), Assembler::v0_t); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vlslI(vReg dst, vReg src, vReg shift) %{ -- match(Set dst (LShiftVI src shift)); -- ins_cost(VEC_COST); -- format %{ "vsll.vv $dst, $src, $shift\t#@vlslI" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- as_VectorRegister($shift$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vlslL(vReg dst, vReg src, vReg shift) %{ -- match(Set dst (LShiftVL src shift)); -- ins_cost(VEC_COST); -- format %{ "vsll.vv $dst, $src, $shift\t# vector (D)" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- as_VectorRegister($shift$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vlsrB(vReg dst, vReg src, vReg shift) %{ -- match(Set dst (URShiftVB src shift)); -- ins_cost(VEC_COST); -- effect(TEMP_DEF dst); -- format %{ "vmsgtu.vi v0, $shift, 7\t#@vlsrB\n\t" -- "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" -- "vmnot.m v0, v0, v0\n\t" -- "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e8); -- // if shift > BitsPerByte - 1, clear the element -- __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); -- __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- as_VectorRegister($src$$reg), Assembler::v0_t); -- // otherwise, shift -- __ vmnot_m(v0, v0); -- __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- as_VectorRegister($shift$$reg), Assembler::v0_t); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vlsrS(vReg dst, vReg src, vReg shift) %{ -- match(Set dst (URShiftVS src shift)); -- ins_cost(VEC_COST); -- effect(TEMP_DEF dst); -- format %{ "vmsgtu.vi v0, $shift, 15\t#@vlsrS\n\t" -- "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" -- "vmnot.m v0, v0\n\t" -- "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e16); -- // if shift > BitsPerShort - 1, clear the element -- __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); -- __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- as_VectorRegister($src$$reg), Assembler::v0_t); -- // otherwise, shift -- __ vmnot_m(v0, v0); -- __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- as_VectorRegister($shift$$reg), Assembler::v0_t); -- %} -- ins_pipe(pipe_slow); --%} -- -- --instruct vlsrI(vReg dst, vReg src, vReg shift) %{ -- match(Set dst (URShiftVI src shift)); -- ins_cost(VEC_COST); -- format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrI" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- as_VectorRegister($shift$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- -- --instruct vlsrL(vReg dst, vReg src, vReg shift) %{ -- match(Set dst (URShiftVL src shift)); -- ins_cost(VEC_COST); -- format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrL" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- as_VectorRegister($shift$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vasrB_imm(vReg dst, vReg src, immI shift) %{ -- match(Set dst (RShiftVB src (RShiftCntV shift))); -- ins_cost(VEC_COST); -- format %{ "vsra.vi $dst, $src, $shift\t#@vasrB_imm" %} -- ins_encode %{ -- uint32_t con = (unsigned)$shift$$constant & 0x1f; -- __ vsetvli(t0, x0, Assembler::e8); -- if (con == 0) { -- __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- as_VectorRegister($src$$reg)); -- return; -- } -- if (con >= BitsPerByte) con = BitsPerByte - 1; -- __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vasrS_imm(vReg dst, vReg src, immI shift) %{ -- match(Set dst (RShiftVS src (RShiftCntV shift))); -- ins_cost(VEC_COST); -- format %{ "vsra.vi $dst, $src, $shift\t#@vasrS_imm" %} -- ins_encode %{ -- uint32_t con = (unsigned)$shift$$constant & 0x1f; -- __ vsetvli(t0, x0, Assembler::e16); -- if (con == 0) { -- __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- as_VectorRegister($src$$reg)); -- return; -- } -- if (con >= BitsPerShort) con = BitsPerShort - 1; -- __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vasrI_imm(vReg dst, vReg src, immI shift) %{ -- match(Set dst (RShiftVI src (RShiftCntV shift))); -- ins_cost(VEC_COST); -- format %{ "vsrl.vi $dst, $src, $shift\t#@vasrI_imm" %} -- ins_encode %{ -- uint32_t con = (unsigned)$shift$$constant & 0x1f; -- __ vsetvli(t0, x0, Assembler::e32); -- if (con == 0) { -- __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- as_VectorRegister($src$$reg)); -- return; -- } -- __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vasrL_imm(vReg dst, vReg src, immI shift) %{ -- predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32); -- match(Set dst (RShiftVL src (RShiftCntV shift))); -- ins_cost(VEC_COST); -- format %{ "vsrl.vi $dst, $src, $shift\t#@vasrL_imm" %} -- ins_encode %{ -- uint32_t con = (unsigned)$shift$$constant & 0x1f; -- __ vsetvli(t0, x0, Assembler::e64); -- if (con == 0) { -- __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- as_VectorRegister($src$$reg)); -- return; -- } -- __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{ -- match(Set dst (URShiftVB src (RShiftCntV shift))); -- ins_cost(VEC_COST); -- format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrB_imm" %} -- ins_encode %{ -- uint32_t con = (unsigned)$shift$$constant & 0x1f; -- __ vsetvli(t0, x0, Assembler::e8); -- if (con == 0) { -- __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- as_VectorRegister($src$$reg)); -- return; -- } -- if (con >= BitsPerByte) { -- __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- as_VectorRegister($src$$reg)); -- return; -- } -- __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{ -- match(Set dst (URShiftVS src (RShiftCntV shift))); -- ins_cost(VEC_COST); -- format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrS_imm" %} -- ins_encode %{ -- uint32_t con = (unsigned)$shift$$constant & 0x1f; -- __ vsetvli(t0, x0, Assembler::e16); -- if (con == 0) { -- __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- as_VectorRegister($src$$reg)); -- return; -- } -- if (con >= BitsPerShort) { -- __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- as_VectorRegister($src$$reg)); -- return; -- } -- __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{ -- match(Set dst (URShiftVI src (RShiftCntV shift))); -- ins_cost(VEC_COST); -- format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrI_imm" %} -- ins_encode %{ -- uint32_t con = (unsigned)$shift$$constant & 0x1f; -- __ vsetvli(t0, x0, Assembler::e32); -- if (con == 0) { -- __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- as_VectorRegister($src$$reg)); -- return; -- } -- __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{ -- predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32); -- match(Set dst (URShiftVL src (RShiftCntV shift))); -- ins_cost(VEC_COST); -- format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrL_imm" %} -- ins_encode %{ -- uint32_t con = (unsigned)$shift$$constant & 0x1f; -- __ vsetvli(t0, x0, Assembler::e64); -- if (con == 0) { -- __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- as_VectorRegister($src$$reg)); -- return; -- } -- __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vlslB_imm(vReg dst, vReg src, immI shift) %{ -- match(Set dst (LShiftVB src (LShiftCntV shift))); -- ins_cost(VEC_COST); -- format %{ "vsll.vi $dst, $src, $shift\t#@vlslB_imm" %} -- ins_encode %{ -- uint32_t con = (unsigned)$shift$$constant & 0x1f; -- __ vsetvli(t0, x0, Assembler::e8); -- if (con >= BitsPerByte) { -- __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- as_VectorRegister($src$$reg)); -- return; -- } -- __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vlslS_imm(vReg dst, vReg src, immI shift) %{ -- match(Set dst (LShiftVS src (LShiftCntV shift))); -- ins_cost(VEC_COST); -- format %{ "vsll.vi $dst, $src, $shift\t#@vlslS_imm" %} -- ins_encode %{ -- uint32_t con = (unsigned)$shift$$constant & 0x1f; -- __ vsetvli(t0, x0, Assembler::e16); -- if (con >= BitsPerShort) { -- __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -- as_VectorRegister($src$$reg)); -- return; -- } -- __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vlslI_imm(vReg dst, vReg src, immI shift) %{ -- match(Set dst (LShiftVI src (LShiftCntV shift))); -- ins_cost(VEC_COST); -- format %{ "vsll.vi $dst, $src, $shift\t#@vlslI_imm" %} -- ins_encode %{ -- uint32_t con = (unsigned)$shift$$constant & 0x1f; -- __ vsetvli(t0, x0, Assembler::e32); -- __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vlslL_imm(vReg dst, vReg src, immI shift) %{ -- predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32); -- match(Set dst (LShiftVL src (LShiftCntV shift))); -- ins_cost(VEC_COST); -- format %{ "vsll.vi $dst, $src, $shift\t#@vlslL_imm" %} -- ins_encode %{ -- uint32_t con = (unsigned)$shift$$constant & 0x1f; -- __ vsetvli(t0, x0, Assembler::e64); -- __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{ -- predicate(n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); -- match(Set dst (LShiftCntV cnt)); -- match(Set dst (RShiftCntV cnt)); -- format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntB" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e8); -- __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{ -- predicate(n->bottom_type()->is_vect()->element_basic_type() == T_SHORT || -- n->bottom_type()->is_vect()->element_basic_type() == T_CHAR); -- match(Set dst (LShiftCntV cnt)); -- match(Set dst (RShiftCntV cnt)); -- format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntS" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e16); -- __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{ -- predicate(n->bottom_type()->is_vect()->element_basic_type() == T_INT); -- match(Set dst (LShiftCntV cnt)); -- match(Set dst (RShiftCntV cnt)); -- format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntI" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{ -- predicate(n->bottom_type()->is_vect()->element_basic_type() == T_LONG); -- match(Set dst (LShiftCntV cnt)); -- match(Set dst (RShiftCntV cnt)); -- format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntL" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// vector sqrt -- --instruct vsqrtF(vReg dst, vReg src) %{ -- match(Set dst (SqrtVF src)); -- ins_cost(VEC_COST); -- format %{ "vfsqrt.v $dst, $src\t#@vsqrtF" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vsqrtD(vReg dst, vReg src) %{ -- match(Set dst (SqrtVD src)); -- ins_cost(VEC_COST); -- format %{ "vfsqrt.v $dst, $src\t#@vsqrtD" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --// vector sub -- --instruct vsubB(vReg dst, vReg src1, vReg src2) %{ -- match(Set dst (SubVB src1 src2)); -- ins_cost(VEC_COST); -- format %{ "vsub.vv $dst, $src1, $src2\t#@vsubB" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e8); -- __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -- as_VectorRegister($src2$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vsubS(vReg dst, vReg src1, vReg src2) %{ -- match(Set dst (SubVS src1 src2)); -- ins_cost(VEC_COST); -- format %{ "vsub.vv $dst, $src1, $src2\t#@vsubS" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e16); -- __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -- as_VectorRegister($src2$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vsubI(vReg dst, vReg src1, vReg src2) %{ -- match(Set dst (SubVI src1 src2)); -- ins_cost(VEC_COST); -- format %{ "vsub.vv $dst, $src1, $src2\t#@vsubI" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -- as_VectorRegister($src2$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vsubL(vReg dst, vReg src1, vReg src2) %{ -- match(Set dst (SubVL src1 src2)); -- ins_cost(VEC_COST); -- format %{ "vsub.vv $dst, $src1, $src2\t#@vsubL" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -- as_VectorRegister($src2$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vsubF(vReg dst, vReg src1, vReg src2) %{ -- match(Set dst (SubVF src1 src2)); -- ins_cost(VEC_COST); -- format %{ "vfsub.vv $dst, $src1, $src2\t@vsubF" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e32); -- __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -- as_VectorRegister($src2$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vsubD(vReg dst, vReg src1, vReg src2) %{ -- match(Set dst (SubVD src1 src2)); -- ins_cost(VEC_COST); -- format %{ "vfsub.vv $dst, $src1, $src2\t#@vsubD" %} -- ins_encode %{ -- __ vsetvli(t0, x0, Assembler::e64); -- __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -- as_VectorRegister($src2$$reg)); -- %} -- ins_pipe(pipe_slow); --%} -- --instruct vstring_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, -- iRegI_R10 result, vReg_V1 v1, -- vReg_V2 v2, vReg_V3 v3, rFlagsReg cr) --%{ -- predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); -- match(Set result (StrEquals (Binary str1 str2) cnt)); -- effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr); -- -- format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %} -- ins_encode %{ -- // Count is in 8-bit bytes; non-Compact chars are 16 bits. -- __ string_equals_v($str1$$Register, $str2$$Register, -- $result$$Register, $cnt$$Register, 1); -- %} -- ins_pipe(pipe_class_memory); --%} -- --instruct vstring_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, -- iRegI_R10 result, vReg_V1 v1, -- vReg_V2 v2, vReg_V3 v3, rFlagsReg cr) --%{ -- predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); -- match(Set result (StrEquals (Binary str1 str2) cnt)); -- effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr); -- -- format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %} -- ins_encode %{ -- // Count is in 8-bit bytes; non-Compact chars are 16 bits. -- __ string_equals_v($str1$$Register, $str2$$Register, -- $result$$Register, $cnt$$Register, 2); -- %} -- ins_pipe(pipe_class_memory); --%} -- --instruct varray_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, -- vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr) --%{ -- predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); -- match(Set result (AryEq ary1 ary2)); -- effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr); -- -- format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %} -- ins_encode %{ -- __ arrays_equals_v($ary1$$Register, $ary2$$Register, -- $result$$Register, $tmp$$Register, 1); -- %} -- ins_pipe(pipe_class_memory); --%} -- --instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, -- vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr) --%{ -- predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); -- match(Set result (AryEq ary1 ary2)); -- effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr); -- -- format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %} -- ins_encode %{ -- __ arrays_equals_v($ary1$$Register, $ary2$$Register, -- $result$$Register, $tmp$$Register, 2); -- %} -- ins_pipe(pipe_class_memory); --%} -- --instruct vstring_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -- iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, -- iRegP_R28 tmp1, iRegL_R29 tmp2) --%{ -- predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU); -- match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); -- effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, -- TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); -- -- format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %} -- ins_encode %{ -- // Count is in 8-bit bytes; non-Compact chars are 16 bits. -- __ string_compare_v($str1$$Register, $str2$$Register, -- $cnt1$$Register, $cnt2$$Register, $result$$Register, -- $tmp1$$Register, $tmp2$$Register, -- StrIntrinsicNode::UU); -- %} -- ins_pipe(pipe_class_memory); --%} --instruct vstring_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -- iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, -- iRegP_R28 tmp1, iRegL_R29 tmp2) --%{ -- predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL); -- match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); -- effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, -- TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); -- -- format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %} -- ins_encode %{ -- __ string_compare_v($str1$$Register, $str2$$Register, -- $cnt1$$Register, $cnt2$$Register, $result$$Register, -- $tmp1$$Register, $tmp2$$Register, -- StrIntrinsicNode::LL); -- %} -- ins_pipe(pipe_class_memory); --%} -- --instruct vstring_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -- iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, -- iRegP_R28 tmp1, iRegL_R29 tmp2) --%{ -- predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL); -- match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); -- effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, -- TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); -- -- format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %} -- ins_encode %{ -- __ string_compare_v($str1$$Register, $str2$$Register, -- $cnt1$$Register, $cnt2$$Register, $result$$Register, -- $tmp1$$Register, $tmp2$$Register, -- StrIntrinsicNode::UL); -- %} -- ins_pipe(pipe_class_memory); --%} --instruct vstring_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -- iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, -- iRegP_R28 tmp1, iRegL_R29 tmp2) --%{ -- predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU); -- match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); -- effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, -- TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); -- -- format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %} -- ins_encode %{ -- __ string_compare_v($str1$$Register, $str2$$Register, -- $cnt1$$Register, $cnt2$$Register, $result$$Register, -- $tmp1$$Register, $tmp2$$Register, -- StrIntrinsicNode::LU); -- %} -- ins_pipe(pipe_class_memory); --%} -- --// fast byte[] to char[] inflation --instruct vstring_inflate(Universe dummy, iRegP_R10 src, iRegP_R11 dst, iRegI_R12 len, -- vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp) --%{ -- predicate(UseRVV); -- match(Set dummy (StrInflatedCopy src (Binary dst len))); -- effect(TEMP v1, TEMP v2, TEMP v3, TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len); -- -- format %{ "String Inflate $src,$dst" %} -- ins_encode %{ -- __ byte_array_inflate_v($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register); -- %} -- ins_pipe(pipe_class_memory); --%} -- --// encode char[] to byte[] in ISO_8859_1 --instruct vencode_iso_array(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result, -- vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp) --%{ -- predicate(UseRVV); -- match(Set result (EncodeISOArray src (Binary dst len))); -- effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len, -- TEMP v1, TEMP v2, TEMP v3, TEMP tmp); -- -- format %{ "Encode array $src,$dst,$len -> $result" %} -- ins_encode %{ -- __ encode_iso_array_v($src$$Register, $dst$$Register, $len$$Register, -- $result$$Register, $tmp$$Register); -- %} -- ins_pipe( pipe_class_memory ); --%} -- --// fast char[] to byte[] compression --instruct vstring_compress(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result, -- vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp) --%{ -- predicate(UseRVV); -- match(Set result (StrCompressedCopy src (Binary dst len))); -- effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len, -- TEMP v1, TEMP v2, TEMP v3, TEMP tmp); -- -- format %{ "String Compress $src,$dst -> $result // KILL R11, R12, R13" %} -- ins_encode %{ -- __ char_array_compress_v($src$$Register, $dst$$Register, $len$$Register, -- $result$$Register, $tmp$$Register); -- %} -- ins_pipe( pipe_slow ); --%} -- --instruct vcount_positives(iRegP_R11 ary, iRegI_R12 len, iRegI_R10 result, iRegL tmp) --%{ -- predicate(UseRVV); -- match(Set result (CountPositives ary len)); -- effect(USE_KILL ary, USE_KILL len, TEMP tmp); -- -- format %{ "count positives byte[] $ary, $len -> $result" %} -- ins_encode %{ -- __ count_positives_v($ary$$Register, $len$$Register, $result$$Register, $tmp$$Register); -- %} -- -- ins_pipe(pipe_slow); --%} -- --instruct vstringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, -- iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, -- vReg_V1 v1, vReg_V2 v2, vReg_V3 v3) --%{ -- predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); -- match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); -- effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, -- TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3); -- -- format %{ "StringUTF16 IndexOf char[] $str1, $cnt1, $ch -> $result" %} -- -- ins_encode %{ -- __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register, -- $result$$Register, $tmp1$$Register, $tmp2$$Register, -- false /* isL */); -- %} -- -- ins_pipe(pipe_class_memory); --%} -- --instruct vstringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, -- iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, -- vReg_V1 v1, vReg_V2 v2, vReg_V3 v3) --%{ -- predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); -- match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); -- effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, -- TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3); -- -- format %{ "StringLatin1 IndexOf char[] $str1, $cnt1, $ch -> $result" %} -- -- ins_encode %{ -- __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register, -- $result$$Register, $tmp1$$Register, $tmp2$$Register, -- true /* isL */); -- %} -- -- ins_pipe(pipe_class_memory); --%} -- --// clearing of an array --instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy, -- vReg_V1 vReg1, vReg_V2 vReg2, vReg_V3 vReg3) --%{ -- predicate(UseRVV); -- match(Set dummy (ClearArray cnt base)); -- effect(USE_KILL cnt, USE_KILL base, TEMP vReg1, TEMP vReg2, TEMP vReg3); -- -- format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %} -- -- ins_encode %{ -- __ clear_array_v($base$$Register, $cnt$$Register); -- %} -- -- ins_pipe(pipe_class_memory); --%} -diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -index f85d4b25a76..4daed17df10 100644 ---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -@@ -80,9 +80,8 @@ class SimpleRuntimeFrame { - }; - - class RegisterSaver { -- const bool _save_vectors; - public: -- RegisterSaver(bool save_vectors) : _save_vectors(UseRVV && save_vectors) {} -+ RegisterSaver() {} - ~RegisterSaver() {} - OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); - void restore_live_registers(MacroAssembler* masm); -@@ -91,11 +90,7 @@ class RegisterSaver { - // Used by deoptimization when it is managing result register - // values on its own - // gregs:28, float_register:32; except: x1(ra) & x2(sp) & gp(x3) & tp(x4) -- // |---v0---|<---SP -- // |---v1---|save vectors only in generate_handler_blob -- // |-- .. --| -- // |---v31--|----- -- // |---f0---| -+ // |---f0---|<---SP - // |---f1---| - // | .. | - // |---f31--| -@@ -106,16 +101,8 @@ class RegisterSaver { - // |---x31--| - // |---fp---| - // |---ra---| -- int v0_offset_in_bytes(void) { return 0; } - int f0_offset_in_bytes(void) { -- int f0_offset = 0; --#ifdef COMPILER2 -- if (_save_vectors) { -- f0_offset += Matcher::scalable_vector_reg_size(T_INT) * VectorRegisterImpl::number_of_registers * -- BytesPerInt; -- } --#endif -- return f0_offset; -+ return 0; - } - int reserved_slot_offset_in_bytes(void) { - return f0_offset_in_bytes() + -@@ -142,15 +129,6 @@ class RegisterSaver { - }; - - OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { -- int vector_size_in_bytes = 0; -- int vector_size_in_slots = 0; --#ifdef COMPILER2 -- if (_save_vectors) { -- vector_size_in_bytes += Matcher::scalable_vector_reg_size(T_BYTE); -- vector_size_in_slots += Matcher::scalable_vector_reg_size(T_INT); -- } --#endif -- - assert_cond(masm != NULL && total_frame_words != NULL); - int frame_size_in_bytes = align_up(additional_frame_words * wordSize + ra_offset_in_bytes() + wordSize, 16); - // OopMap frame size is in compiler stack slots (jint's) not bytes or words -@@ -161,9 +139,9 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ - int frame_size_in_words = frame_size_in_bytes / wordSize; - *total_frame_words = frame_size_in_words; - -- // Save Integer, Float and Vector registers. -+ // Save Integer and Float registers. - __ enter(); -- __ push_CPU_state(_save_vectors, vector_size_in_bytes); -+ __ push_CPU_state(); - - // Set an oopmap for the call site. This oopmap will map all - // oop-registers and debug-info registers as callee-saved. This -@@ -176,13 +154,6 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ - - int sp_offset_in_slots = 0; - int step_in_slots = 0; -- if (_save_vectors) { -- step_in_slots = vector_size_in_slots; -- for (int i = 0; i < VectorRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) { -- VectorRegister r = as_VectorRegister(i); -- oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg()); -- } -- } - - step_in_slots = FloatRegisterImpl::max_slots_per_register; - for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) { -@@ -207,18 +178,13 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ - - void RegisterSaver::restore_live_registers(MacroAssembler* masm) { - assert_cond(masm != NULL); --#ifdef COMPILER2 -- __ pop_CPU_state(_save_vectors, Matcher::scalable_vector_reg_size(T_BYTE)); --#else -- __ pop_CPU_state(_save_vectors); --#endif -+ __ pop_CPU_state(); - __ leave(); - } - - // Is vector's size (in bytes) bigger than a size saved by default? --// riscv does not ovlerlay the floating-point registers on vector registers like aarch64. - bool SharedRuntime::is_wide_vector(int size) { -- return UseRVV; -+ return false; - } - - // The java_calling_convention describes stack locations as ideal slots on -@@ -674,13 +640,6 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm - return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry); - } - --int SharedRuntime::vector_calling_convention(VMRegPair *regs, -- uint num_bits, -- uint total_args_passed) { -- Unimplemented(); -- return 0; --} -- - int SharedRuntime::c_calling_convention(const BasicType *sig_bt, - VMRegPair *regs, - VMRegPair *regs2, -@@ -1891,7 +1850,7 @@ void SharedRuntime::generate_deopt_blob() { - OopMap* map = NULL; - OopMapSet *oop_maps = new OopMapSet(); - assert_cond(masm != NULL && oop_maps != NULL); -- RegisterSaver reg_saver(COMPILER2_OR_JVMCI != 0); -+ RegisterSaver reg_saver; - - // ------------- - // This code enters when returning to a de-optimized nmethod. A return -@@ -2423,7 +2382,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t - address call_pc = NULL; - int frame_size_in_words = -1; - bool cause_return = (poll_type == POLL_AT_RETURN); -- RegisterSaver reg_saver(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */); -+ RegisterSaver reg_saver; - - // Save Integer and Float registers. - map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); -@@ -2542,7 +2501,7 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha - assert_cond(masm != NULL); - - int frame_size_in_words = -1; -- RegisterSaver reg_saver(false /* save_vectors */); -+ RegisterSaver reg_saver; - - OopMapSet *oop_maps = new OopMapSet(); - assert_cond(oop_maps != NULL); -diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp -index b05edf7172c..39416441bdf 100644 ---- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp -+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp -@@ -2843,111 +2843,6 @@ class StubGenerator: public StubCodeGenerator { - - return entry; - } -- -- // Arguments: -- // -- // Input: -- // c_rarg0 - newArr address -- // c_rarg1 - oldArr address -- // c_rarg2 - newIdx -- // c_rarg3 - shiftCount -- // c_rarg4 - numIter -- // -- address generate_bigIntegerLeftShift() { -- __ align(CodeEntryAlignment); -- StubCodeMark mark(this, "StubRoutines", "bigIntegerLeftShiftWorker"); -- address entry = __ pc(); -- -- Label loop, exit; -- -- Register newArr = c_rarg0; -- Register oldArr = c_rarg1; -- Register newIdx = c_rarg2; -- Register shiftCount = c_rarg3; -- Register numIter = c_rarg4; -- -- Register shiftRevCount = c_rarg5; -- Register oldArrNext = t1; -- -- __ beqz(numIter, exit); -- __ shadd(newArr, newIdx, newArr, t0, 2); -- -- __ li(shiftRevCount, 32); -- __ sub(shiftRevCount, shiftRevCount, shiftCount); -- -- __ bind(loop); -- __ addi(oldArrNext, oldArr, 4); -- __ vsetvli(t0, numIter, Assembler::e32, Assembler::m4); -- __ vle32_v(v0, oldArr); -- __ vle32_v(v4, oldArrNext); -- __ vsll_vx(v0, v0, shiftCount); -- __ vsrl_vx(v4, v4, shiftRevCount); -- __ vor_vv(v0, v0, v4); -- __ vse32_v(v0, newArr); -- __ sub(numIter, numIter, t0); -- __ shadd(oldArr, t0, oldArr, t1, 2); -- __ shadd(newArr, t0, newArr, t1, 2); -- __ bnez(numIter, loop); -- -- __ bind(exit); -- __ ret(); -- -- return entry; -- } -- -- // Arguments: -- // -- // Input: -- // c_rarg0 - newArr address -- // c_rarg1 - oldArr address -- // c_rarg2 - newIdx -- // c_rarg3 - shiftCount -- // c_rarg4 - numIter -- // -- address generate_bigIntegerRightShift() { -- __ align(CodeEntryAlignment); -- StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker"); -- address entry = __ pc(); -- -- Label loop, exit; -- -- Register newArr = c_rarg0; -- Register oldArr = c_rarg1; -- Register newIdx = c_rarg2; -- Register shiftCount = c_rarg3; -- Register numIter = c_rarg4; -- Register idx = numIter; -- -- Register shiftRevCount = c_rarg5; -- Register oldArrNext = c_rarg6; -- Register newArrCur = t0; -- Register oldArrCur = t1; -- -- __ beqz(idx, exit); -- __ shadd(newArr, newIdx, newArr, t0, 2); -- -- __ li(shiftRevCount, 32); -- __ sub(shiftRevCount, shiftRevCount, shiftCount); -- -- __ bind(loop); -- __ vsetvli(t0, idx, Assembler::e32, Assembler::m4); -- __ sub(idx, idx, t0); -- __ shadd(oldArrNext, idx, oldArr, t1, 2); -- __ shadd(newArrCur, idx, newArr, t1, 2); -- __ addi(oldArrCur, oldArrNext, 4); -- __ vle32_v(v0, oldArrCur); -- __ vle32_v(v4, oldArrNext); -- __ vsrl_vx(v0, v0, shiftCount); -- __ vsll_vx(v4, v4, shiftRevCount); -- __ vor_vv(v0, v0, v4); -- __ vse32_v(v0, newArrCur); -- __ bnez(idx, loop); -- -- __ bind(exit); -- __ ret(); -- -- return entry; -- } - #endif - - #ifdef COMPILER2 -@@ -3813,11 +3708,6 @@ class StubGenerator: public StubCodeGenerator { - MontgomeryMultiplyGenerator g(_masm, /*squaring*/true); - StubRoutines::_montgomerySquare = g.generate_square(); - } -- -- if (UseRVVForBigIntegerShiftIntrinsics) { -- StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift(); -- StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift(); -- } - #endif - - generate_compare_long_strings(); -diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp -index 768c7633ca6..2c15a834542 100644 ---- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp -+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp -@@ -167,10 +167,6 @@ void VM_Version::c2_initialize() { - FLAG_SET_DEFAULT(MaxVectorSize, 0); - } - -- if (!UseRVV) { -- FLAG_SET_DEFAULT(UseRVVForBigIntegerShiftIntrinsics, false); -- } -- - if (UseRVV) { - if (FLAG_IS_DEFAULT(MaxVectorSize)) { - MaxVectorSize = _initial_vector_length; -diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp -index aa7222dc64a..1f6eff96cba 100644 ---- a/src/hotspot/cpu/riscv/vmreg_riscv.cpp -+++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp -@@ -45,16 +45,8 @@ void VMRegImpl::set_regName() { - freg = freg->successor(); - } - -- VectorRegister vreg = ::as_VectorRegister(0); -- for ( ; i < ConcreteRegisterImpl::max_vpr ; ) { -- for (int j = 0 ; j < VectorRegisterImpl::max_slots_per_register ; j++) { -- regName[i++] = reg->name(); -- } -- vreg = vreg->successor(); -- } -- - for ( ; i < ConcreteRegisterImpl::number_of_registers ; i++) { -- regName[i] = "NON-GPR-FPR-VPR"; -+ regName[i] = "NON-GPR-FPR"; - } - } - -diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.hpp -index 9e611b1f671..6f613a8f11a 100644 ---- a/src/hotspot/cpu/riscv/vmreg_riscv.hpp -+++ b/src/hotspot/cpu/riscv/vmreg_riscv.hpp -@@ -34,10 +34,6 @@ inline bool is_FloatRegister() { - return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr; - } - --inline bool is_VectorRegister() { -- return value() >= ConcreteRegisterImpl::max_fpr && value() < ConcreteRegisterImpl::max_vpr; --} -- - inline Register as_Register() { - assert(is_Register(), "must be"); - return ::as_Register(value() / RegisterImpl::max_slots_per_register); -@@ -49,20 +45,9 @@ inline FloatRegister as_FloatRegister() { - FloatRegisterImpl::max_slots_per_register); - } - --inline VectorRegister as_VectorRegister() { -- assert(is_VectorRegister() && ((value() & (VectorRegisterImpl::max_slots_per_register - 1)) == 0), "must be"); -- return ::as_VectorRegister((value() - ConcreteRegisterImpl::max_fpr) / -- VectorRegisterImpl::max_slots_per_register); --} -- - inline bool is_concrete() { - assert(is_reg(), "must be"); -- if (is_VectorRegister()) { -- int base = value() - ConcreteRegisterImpl::max_fpr; -- return (base % VectorRegisterImpl::max_slots_per_register) == 0; -- } else { -- return is_even(value()); -- } -+ return is_even(value()); - } - - #endif // CPU_RISCV_VMREG_RISCV_HPP - -From b2011bad9b7404c1f6d0c1aa3176569d7f07d7a9 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Mon, 27 Mar 2023 16:05:55 +0800 -Subject: [PATCH 004/140] Revert: JDK-8253180: ZGC: Implementation of JEP 376: - ZGC: Concurrent Thread-Stack Processing JDK-8220051: Remove global safepoint - code - ---- - src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp | 14 ------ - .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 14 +++--- - .../riscv/c2_safepointPollStubTable_riscv.cpp | 47 ------------------ - src/hotspot/cpu/riscv/frame_riscv.cpp | 9 +--- - src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 19 +------- - .../cpu/riscv/macroAssembler_riscv.cpp | 48 +++++++++++-------- - .../cpu/riscv/macroAssembler_riscv.hpp | 5 +- - src/hotspot/cpu/riscv/riscv.ad | 14 ++---- - src/hotspot/cpu/riscv/vm_version_riscv.hpp | 2 - - 9 files changed, 45 insertions(+), 127 deletions(-) - delete mode 100644 src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp - -diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp -index dcd0472c540..af7bd067f33 100644 ---- a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp -@@ -39,20 +39,6 @@ - - #define __ ce->masm()-> - --void C1SafepointPollStub::emit_code(LIR_Assembler* ce) { -- __ bind(_entry); -- InternalAddress safepoint_pc(__ pc() - __ offset() + safepoint_offset()); -- __ code_section()->relocate(__ pc(), safepoint_pc.rspec()); -- __ la(t0, safepoint_pc.target()); -- __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset())); -- -- assert(SharedRuntime::polling_page_return_handler_blob() != NULL, -- "polling page return stub not created yet"); -- address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); -- -- __ far_jump(RuntimeAddress(stub)); --} -- - void CounterOverflowStub::emit_code(LIR_Assembler* ce) { - __ bind(_entry); - Metadata *m = _method->as_constant_ptr()->as_metadata(); -diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -index bba3bd4709c..0e383a3c139 100644 ---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -@@ -424,7 +424,7 @@ int LIR_Assembler::emit_deopt_handler() { - return offset; - } - --void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) { -+void LIR_Assembler::return_op(LIR_Opr result) { - assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == x10, "word returns are in x10"); - - // Pop the stack before the safepoint code -@@ -434,18 +434,20 @@ void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) { - __ reserved_stack_check(); - } - -- code_stub->set_safepoint_offset(__ offset()); -- __ relocate(relocInfo::poll_return_type); -- __ safepoint_poll(*code_stub->entry(), true /* at_return */, false /* acquire */, true /* in_nmethod */); -+ address polling_page(os::get_polling_page()); -+ __ read_polling_page(t0, polling_page, relocInfo::poll_return_type); - __ ret(); - } - - int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) { -+ address polling_page(os::get_polling_page()); - guarantee(info != NULL, "Shouldn't be NULL"); -- __ get_polling_page(t0, relocInfo::poll_type); -+ assert(os::is_poll_address(polling_page), "should be"); -+ int32_t offset = 0; -+ __ get_polling_page(t0, polling_page, offset, relocInfo::poll_type); - add_debug_info_for_branch(info); // This isn't just debug info: - // it's the oop map -- __ read_polling_page(t0, 0, relocInfo::poll_type); -+ __ read_polling_page(t0, offset, relocInfo::poll_type); - return __ offset(); - } - -diff --git a/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp b/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp -deleted file mode 100644 -index a90d9fdc160..00000000000 ---- a/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp -+++ /dev/null -@@ -1,47 +0,0 @@ --/* -- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. -- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -- * -- * This code is free software; you can redistribute it and/or modify it -- * under the terms of the GNU General Public License version 2 only, as -- * published by the Free Software Foundation. -- * -- * This code is distributed in the hope that it will be useful, but WITHOUT -- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- * version 2 for more details (a copy is included in the LICENSE file that -- * accompanied this code). -- * -- * You should have received a copy of the GNU General Public License version -- * 2 along with this work; if not, write to the Free Software Foundation, -- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -- * -- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -- * or visit www.oracle.com if you need additional information or have any -- * questions. -- * -- */ -- --#include "precompiled.hpp" --#include "asm/macroAssembler.hpp" --#include "opto/compile.hpp" --#include "opto/node.hpp" --#include "opto/output.hpp" --#include "runtime/sharedRuntime.hpp" -- --#define __ masm. --void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointPollStub* entry) const { -- assert(SharedRuntime::polling_page_return_handler_blob() != NULL, -- "polling page return stub not created yet"); -- address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); -- RuntimeAddress callback_addr(stub); -- -- __ bind(entry->_stub_label); -- InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset); -- masm.code_section()->relocate(masm.pc(), safepoint_pc.rspec()); -- __ la(t0, safepoint_pc.target()); -- __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset())); -- __ far_jump(callback_addr); --} --#undef __ -diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp -index 6e38960598a..41e52a4d491 100644 ---- a/src/hotspot/cpu/riscv/frame_riscv.cpp -+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp -@@ -39,7 +39,6 @@ - #include "runtime/monitorChunk.hpp" - #include "runtime/os.inline.hpp" - #include "runtime/signature.hpp" --#include "runtime/stackWatermarkSet.hpp" - #include "runtime/stubCodeGenerator.hpp" - #include "runtime/stubRoutines.hpp" - #include "vmreg_riscv.inline.hpp" -@@ -509,13 +508,7 @@ frame frame::sender_raw(RegisterMap* map) const { - } - - frame frame::sender(RegisterMap* map) const { -- frame result = sender_raw(map); -- -- if (map->process_frames()) { -- StackWatermarkSet::on_iteration(map->thread(), result); -- } -- -- return result; -+ return sender_raw(map); - } - - bool frame::is_interpreted_frame_valid(JavaThread* thread) const { -diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -index d12dcb2af19..9090ad0c058 100644 ---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -@@ -519,7 +519,7 @@ void InterpreterMacroAssembler::dispatch_base(TosState state, - - if (needs_thread_local_poll) { - NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); -- ld(t1, Address(xthread, JavaThread::polling_word_offset())); -+ ld(t1, Address(xthread, Thread::polling_page_offset())); - andi(t1, t1, SafepointMechanism::poll_bit()); - bnez(t1, safepoint); - } -@@ -591,23 +591,6 @@ void InterpreterMacroAssembler::remove_activation( - // result check if synchronized method - Label unlocked, unlock, no_unlock; - -- // The below poll is for the stack watermark barrier. It allows fixing up frames lazily, -- // that would normally not be safe to use. Such bad returns into unsafe territory of -- // the stack, will call InterpreterRuntime::at_unwind. -- Label slow_path; -- Label fast_path; -- safepoint_poll(slow_path, true /* at_return */, false /* acquire */, false /* in_nmethod */); -- j(fast_path); -- -- bind(slow_path); -- push(state); -- set_last_Java_frame(esp, fp, (address)pc(), t0); -- super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), xthread); -- reset_last_Java_frame(true); -- pop(state); -- -- bind(fast_path); -- - // get the value of _do_not_unlock_if_synchronized into x13 - const Address do_not_unlock_if_synchronized(xthread, - in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -index 8b8d126f6c9..4b6136ae36b 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -@@ -2122,15 +2122,16 @@ void MacroAssembler::check_klass_subtype(Register sub_klass, - } - - void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) { -- ld(t0, Address(xthread, JavaThread::polling_word_offset())); -- if (acquire) { -- membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); -- } -- if (at_return) { -- bgtu(in_nmethod ? sp : fp, t0, slow_path, true /* is_far */); -+ if (SafepointMechanism::uses_thread_local_poll()) { -+ ld(t1, Address(xthread, Thread::polling_page_offset())); -+ andi(t0, t1, SafepointMechanism::poll_bit()); -+ bnez(t0, slow_path); - } else { -- andi(t0, t0, SafepointMechanism::poll_bit()); -- bnez(t0, slow_path, true /* is_far */); -+ int32_t offset = 0; -+ la_patchable(t0, ExternalAddress(SafepointSynchronize::address_of_state()), offset); -+ lwu(t0, Address(t0, offset)); -+ assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code"); -+ bnez(t0, slow_path); - } - } - -@@ -2752,22 +2753,29 @@ void MacroAssembler::reserved_stack_check() { - } - - // Move the address of the polling page into dest. --void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) { -- ld(dest, Address(xthread, JavaThread::polling_page_offset())); -+void MacroAssembler::get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype) { -+ if (SafepointMechanism::uses_thread_local_poll()) { -+ ld(dest, Address(xthread, Thread::polling_page_offset())); -+ } else { -+ uint64_t align = (uint64_t)page & 0xfff; -+ assert(align == 0, "polling page must be page aligned"); -+ la_patchable(dest, Address(page, rtype), offset); -+ } - } - - // Read the polling page. The address of the polling page must - // already be in r. --address MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) { -- address mark; -- { -- InstructionMark im(this); -- code_section()->relocate(inst_mark(), rtype); -- lwu(zr, Address(r, offset)); -- mark = inst_mark(); -- } -- verify_cross_modify_fence_not_required(); -- return mark; -+void MacroAssembler::read_polling_page(Register dest, address page, relocInfo::relocType rtype) { -+ int32_t offset = 0; -+ get_polling_page(dest, page, offset, rtype); -+ read_polling_page(dest, offset, rtype); -+} -+ -+// Read the polling page. The address of the polling page must -+// already be in r. -+void MacroAssembler::read_polling_page(Register dest, int32_t offset, relocInfo::relocType rtype) { -+ code_section()->relocate(pc(), rtype); -+ lwu(zr, Address(dest, offset)); - } - - void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -index b43131514c1..041c696add6 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -@@ -625,8 +625,9 @@ class MacroAssembler: public Assembler { - - void reserved_stack_check(); - -- void get_polling_page(Register dest, relocInfo::relocType rtype); -- address read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype); -+ void get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype); -+ void read_polling_page(Register r, address page, relocInfo::relocType rtype); -+ void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype); - - address trampoline_call(Address entry, CodeBuffer* cbuf = NULL); - address ic_call(address entry, jint method_index = 0); -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index 85593a942e9..996fa1fb68f 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -1132,9 +1132,9 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const { - } - - if (do_polling() && C->is_method_compilation()) { -- st->print("# test polling word\n\t"); -- st->print("ld t0, [xthread,#%d]\n\t", in_bytes(JavaThread::polling_word_offset())); -- st->print("bgtu sp, t0, #slow_path"); -+ st->print("# touch polling page\n\t"); -+ st->print("li t0, #0x%lx\n\t", p2i(os::get_polling_page())); -+ st->print("ld zr, [t0]"); - } - } - #endif -@@ -1153,13 +1153,7 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { - } - - if (do_polling() && C->is_method_compilation()) { -- Label dummy_label; -- Label* code_stub = &dummy_label; -- if (!C->output()->in_scratch_emit_size()) { -- code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset()); -- } -- __ relocate(relocInfo::poll_return_type); -- __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */); -+ __ read_polling_page(t0, os::get_polling_page(), relocInfo::poll_return_type); - } - } - -diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp -index 8e35530359a..7586af01d99 100644 ---- a/src/hotspot/cpu/riscv/vm_version_riscv.hpp -+++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp -@@ -48,8 +48,6 @@ class VM_Version : public Abstract_VM_Version { - // Initialization - static void initialize(); - -- constexpr static bool supports_stack_watermark_barrier() { return true; } -- - enum Feature_Flag { - #define CPU_FEATURE_FLAGS(decl) \ - decl(I, "i", 8) \ - -From a032c615883fe2bd557baf40f1439cbae55be206 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Mon, 1 May 2023 15:42:09 +0800 -Subject: [PATCH 005/140] Revert JDK-8221554: aarch64 cross-modifying code - ---- - .../cpu/riscv/macroAssembler_riscv.cpp | 22 ------------------- - .../cpu/riscv/macroAssembler_riscv.hpp | 2 -- - 2 files changed, 24 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -index 4b6136ae36b..269d76ba69e 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -@@ -2716,7 +2716,6 @@ void MacroAssembler::build_frame(int framesize) { - sd(fp, Address(sp, framesize - 2 * wordSize)); - sd(ra, Address(sp, framesize - wordSize)); - if (PreserveFramePointer) { add(fp, sp, framesize); } -- verify_cross_modify_fence_not_required(); - } - - void MacroAssembler::remove_frame(int framesize) { -@@ -3935,26 +3934,5 @@ void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Registe - - void MacroAssembler::safepoint_ifence() { - ifence(); --#ifndef PRODUCT -- if (VerifyCrossModifyFence) { -- // Clear the thread state. -- sb(zr, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset()))); -- } --#endif - } - --#ifndef PRODUCT --void MacroAssembler::verify_cross_modify_fence_not_required() { -- if (VerifyCrossModifyFence) { -- // Check if thread needs a cross modify fence. -- lbu(t0, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset()))); -- Label fence_not_required; -- beqz(t0, fence_not_required); -- // If it does then fail. -- la(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::verify_cross_modify_fence_failure))); -- mv(c_rarg0, xthread); -- jalr(t0); -- bind(fence_not_required); -- } --} --#endif -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -index 041c696add6..b59bdadb8bf 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -@@ -821,8 +821,6 @@ class MacroAssembler: public Assembler { - void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire); - void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release); - -- // Check the current thread doesn't need a cross modify fence. -- void verify_cross_modify_fence_not_required() PRODUCT_RETURN; - }; - - #ifdef ASSERT - -From fd89cf689015649a5cb850e1e24dcbb7bb59735a Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 21:11:30 +0800 -Subject: [PATCH 006/140] Revert JDK-8242263: Diagnose synchronization on - primitive wrappers - ---- - src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp | 7 ------- - src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 7 ------- - src/hotspot/cpu/riscv/riscv.ad | 7 ------- - 3 files changed, 21 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp -index 6f656c8c533..348546a9ea0 100644 ---- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp -@@ -64,13 +64,6 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr - - null_check_offset = offset(); - -- if (DiagnoseSyncOnValueBasedClasses != 0) { -- load_klass(hdr, obj); -- lwu(hdr, Address(hdr, Klass::access_flags_offset())); -- andi(t0, hdr, JVM_ACC_IS_VALUE_BASED_CLASS); -- bnez(t0, slow_case, true /* is_far */); -- } -- - // Load object header - ld(hdr, Address(obj, hdr_offset)); - // and mark it as unlocked -diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -index 9090ad0c058..8adc7b1320d 100644 ---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -@@ -782,13 +782,6 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) - // Load object pointer into obj_reg c_rarg3 - ld(obj_reg, Address(lock_reg, obj_offset)); - -- if (DiagnoseSyncOnValueBasedClasses != 0) { -- load_klass(tmp, obj_reg); -- lwu(tmp, Address(tmp, Klass::access_flags_offset())); -- andi(tmp, tmp, JVM_ACC_IS_VALUE_BASED_CLASS); -- bnez(tmp, slow_case); -- } -- - // Load (object->mark() | 1) into swap_reg - ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - ori(swap_reg, t0, 1); -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index 996fa1fb68f..2eefc71dde0 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -1965,13 +1965,6 @@ encode %{ - // Load markWord from object into displaced_header. - __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes())); - -- if (DiagnoseSyncOnValueBasedClasses != 0) { -- __ load_klass(flag, oop); -- __ lwu(flag, Address(flag, Klass::access_flags_offset())); -- __ andi(flag, flag, JVM_ACC_IS_VALUE_BASED_CLASS, tmp /* tmp */); -- __ bnez(flag, cont, true /* is_far */); -- } -- - // Check for existing monitor - __ andi(t0, disp_hdr, markWord::monitor_value); - __ bnez(t0, object_has_monitor); - -From feea78c5a227c0a57e57d6d1d544a14682310053 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 16:24:12 +0800 -Subject: [PATCH 007/140] Revert JDK-8278104: C1 should support the compiler - directive 'BreakAtExecute' - ---- - src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp -index 348546a9ea0..e5ed25616d6 100644 ---- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp -@@ -311,7 +311,7 @@ void C1_MacroAssembler::remove_frame(int framesize) { - } - - --void C1_MacroAssembler::verified_entry(bool breakAtEntry) { -+void C1_MacroAssembler::verified_entry() { - // If we have to make this method not-entrant we'll overwrite its - // first instruction with a jump. For this action to be legal we - // must ensure that this first instruction is a J, JAL or NOP. - -From 651009a5783f6f5150b3e75a50069dc841622d33 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sat, 29 Apr 2023 15:57:14 +0800 -Subject: [PATCH 008/140] Revert: JDK-8234562: Move - OrderAccess::release_store*/load_acquire to Atomic JDK-8234736: Harmonize - parameter order in Atomic - store JDK-8234737: Harmonize parameter order in - Atomic - add JDK-8234740: Harmonize parameter order in Atomic - cmpxchg - JDK-8234739: Harmonize parameter order in Atomic - xchg JDK-8236778: Add - Atomic::fetch_and_add - ---- - .../os_cpu/linux_riscv/atomic_linux_riscv.hpp | 51 +++++++------------ - .../linux_riscv/orderAccess_linux_riscv.hpp | 31 +++++++---- - 2 files changed, 39 insertions(+), 43 deletions(-) - -diff --git a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp -index 761da5d743e..9b8b1a31774 100644 ---- a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp -+++ b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp -@@ -33,25 +33,31 @@ - // Note that memory_order_conservative requires a full barrier after atomic stores. - // See https://patchwork.kernel.org/patch/3575821/ - -+#define FULL_MEM_BARRIER __sync_synchronize() -+#define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE); -+#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE); -+ - template --struct Atomic::PlatformAdd { -- template -- D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const { -+struct Atomic::PlatformAdd -+ : Atomic::FetchAndAdd > -+{ -+ template -+ D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const { - D res = __atomic_add_fetch(dest, add_value, __ATOMIC_RELEASE); - FULL_MEM_BARRIER; - return res; - } - -- template -- D fetch_and_add(D volatile* dest, I add_value, atomic_memory_order order) const { -- return add_and_fetch(dest, add_value, order) - add_value; -+ template -+ D fetch_and_add(I add_value, D volatile* dest, atomic_memory_order order) const { -+ return add_and_fetch(add_value, dest, order) - add_value; - } - }; - - template - template --inline T Atomic::PlatformXchg::operator()(T volatile* dest, -- T exchange_value, -+inline T Atomic::PlatformXchg::operator()(T exchange_value, -+ T volatile* dest, - atomic_memory_order order) const { - STATIC_ASSERT(byte_size == sizeof(T)); - T res = __atomic_exchange_n(dest, exchange_value, __ATOMIC_RELEASE); -@@ -62,9 +68,9 @@ inline T Atomic::PlatformXchg::operator()(T volatile* dest, - // __attribute__((unused)) on dest is to get rid of spurious GCC warnings. - template - template --inline T Atomic::PlatformCmpxchg::operator()(T volatile* dest __attribute__((unused)), -+inline T Atomic::PlatformCmpxchg::operator()(T exchange_value, -+ T volatile* dest __attribute__((unused)), - T compare_value, -- T exchange_value, - atomic_memory_order order) const { - STATIC_ASSERT(byte_size == sizeof(T)); - T value = compare_value; -@@ -83,9 +89,9 @@ inline T Atomic::PlatformCmpxchg::operator()(T volatile* dest __attri - - template<> - template --inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest __attribute__((unused)), -+inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, -+ T volatile* dest __attribute__((unused)), - T compare_value, -- T exchange_value, - atomic_memory_order order) const { - STATIC_ASSERT(4 == sizeof(T)); - if (order != memory_order_relaxed) { -@@ -110,25 +116,4 @@ inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest __attribute__(( - return rv; - } - --template --struct Atomic::PlatformOrderedLoad --{ -- template -- T operator()(const volatile T* p) const { T data; __atomic_load(const_cast(p), &data, __ATOMIC_ACQUIRE); return data; } --}; -- --template --struct Atomic::PlatformOrderedStore --{ -- template -- void operator()(volatile T* p, T v) const { __atomic_store(const_cast(p), &v, __ATOMIC_RELEASE); } --}; -- --template --struct Atomic::PlatformOrderedStore --{ -- template -- void operator()(volatile T* p, T v) const { release_store(p, v); OrderAccess::fence(); } --}; -- - #endif // OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP -diff --git a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp -index 1c33dc1e87f..5b5d35553f7 100644 ---- a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp -+++ b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp -@@ -37,10 +37,6 @@ inline void OrderAccess::storestore() { release(); } - inline void OrderAccess::loadstore() { acquire(); } - inline void OrderAccess::storeload() { fence(); } - --#define FULL_MEM_BARRIER __sync_synchronize() --#define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE); --#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE); -- - inline void OrderAccess::acquire() { - READ_MEM_BARRIER; - } -@@ -53,11 +49,26 @@ inline void OrderAccess::fence() { - FULL_MEM_BARRIER; - } - --inline void OrderAccess::cross_modify_fence_impl() { -- asm volatile("fence.i" : : : "memory"); -- if (UseConservativeFence) { -- asm volatile("fence ir, ir" : : : "memory"); -- } --} -+ -+template -+struct OrderAccess::PlatformOrderedLoad -+{ -+ template -+ T operator()(const volatile T* p) const { T data; __atomic_load(const_cast(p), &data, __ATOMIC_ACQUIRE); return data; } -+}; -+ -+template -+struct OrderAccess::PlatformOrderedStore -+{ -+ template -+ void operator()(T v, volatile T* p) const { __atomic_store(const_cast(p), &v, __ATOMIC_RELEASE); } -+}; -+ -+template -+struct OrderAccess::PlatformOrderedStore -+{ -+ template -+ void operator()(T v, volatile T* p) const { release_store(p, v); OrderAccess::fence(); } -+}; - - #endif // OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP - -From b078a2ec01598fbcd99aea61af15d44f9c884aaa Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 25 Apr 2023 21:07:42 +0800 -Subject: [PATCH 009/140] Revert JDK-8229258: Rework markOop and markOopDesc - into a simpler mark word value carrier - ---- - .../cpu/riscv/c1_MacroAssembler_riscv.cpp | 4 ++-- - .../shenandoahBarrierSetAssembler_riscv.cpp | 4 ++-- - src/hotspot/cpu/riscv/riscv.ad | 22 +++++++++---------- - src/hotspot/cpu/riscv/templateTable_riscv.cpp | 2 +- - 4 files changed, 16 insertions(+), 16 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp -index e5ed25616d6..2d52343587e 100644 ---- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp -@@ -67,7 +67,7 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr - // Load object header - ld(hdr, Address(obj, hdr_offset)); - // and mark it as unlocked -- ori(hdr, hdr, markWord::unlocked_value); -+ ori(hdr, hdr, markOopDesc::unlocked_value); - // save unlocked object header into the displaced header location on the stack - sd(hdr, Address(disp_hdr, 0)); - // test if object header is still the same (i.e. unlocked), and if so, store the -@@ -141,7 +141,7 @@ void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, i - void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2) { - assert_different_registers(obj, klass, len); - // This assumes that all prototype bits fitr in an int32_t -- mv(tmp1, (int32_t)(intptr_t)markWord::prototype().value()); -+ mv(tmp1, (int32_t)(intptr_t)markOopDesc::prototype()); - sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes())); - - if (UseCompressedClassPointers) { // Take care not to kill klass -diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp -index d0ac6e52436..84e1205bc25 100644 ---- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp -@@ -216,9 +216,9 @@ void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssemb - Label done; - __ ld(tmp, Address(dst, oopDesc::mark_offset_in_bytes())); - __ xori(tmp, tmp, -1); // eon with 0 is equivalent to XOR with -1 -- __ andi(t2, tmp, markWord::lock_mask_in_place); -+ __ andi(t2, tmp, markOopDesc::lock_mask_in_place); - __ bnez(t2, done); -- __ ori(tmp, tmp, markWord::marked_value); -+ __ ori(tmp, tmp, markOopDesc::marked_value); - __ xori(dst, tmp, -1); // eon with 0 is equivalent to XOR with -1 - __ bind(done); - -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index 2eefc71dde0..44ab44dece1 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -1966,12 +1966,12 @@ encode %{ - __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes())); - - // Check for existing monitor -- __ andi(t0, disp_hdr, markWord::monitor_value); -+ __ andi(t0, disp_hdr, markOopDesc::monitor_value); - __ bnez(t0, object_has_monitor); - - if (!UseHeavyMonitors) { - // Set tmp to be (markWord of object | UNLOCK_VALUE). -- __ ori(tmp, disp_hdr, markWord::unlocked_value); -+ __ ori(tmp, disp_hdr, markOopDesc::unlocked_value); - - // Initialize the box. (Must happen before we update the object mark!) - __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); -@@ -1993,7 +1993,7 @@ encode %{ - // Check if the owner is self by comparing the value in the - // markWord of object (disp_hdr) with the stack pointer. - __ sub(disp_hdr, disp_hdr, sp); -- __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markWord::lock_mask_in_place)); -+ __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place)); - // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont, - // hence we can store 0 as the displaced header in the box, which indicates that it is a - // recursive lock. -@@ -2012,15 +2012,15 @@ encode %{ - // otherwise m->owner may contain a thread or a stack address. - // - // Try to CAS m->owner from NULL to current thread. -- __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markWord::monitor_value)); -+ __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value)); - __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq, - Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected) - - // Store a non-null value into the box to avoid looking like a re-entrant - // lock. The fast-path monitor unlock code checks for -- // markWord::monitor_value so use markWord::unused_mark which has the -+ // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the - // relevant bit set, and also matches ObjectSynchronizer::slow_enter. -- __ mv(tmp, (address)markWord::unused_mark().value()); -+ __ mv(tmp, (address)markOopDesc::unused_mark()); - __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); - - __ beqz(flag, cont); // CAS success means locking succeeded -@@ -2029,9 +2029,9 @@ encode %{ - - // Recursive lock case - __ mv(flag, zr); -- __ ld(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value)); -+ __ ld(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markOopDesc::monitor_value)); - __ add(tmp, tmp, 1u); -- __ sd(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value)); -+ __ sd(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markOopDesc::monitor_value)); - - __ bind(cont); - %} -@@ -2060,7 +2060,7 @@ encode %{ - - // Handle existing monitor. - __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes())); -- __ andi(t0, disp_hdr, markWord::monitor_value); -+ __ andi(t0, disp_hdr, markOopDesc::monitor_value); - __ bnez(t0, object_has_monitor); - - if (!UseHeavyMonitors) { -@@ -2080,8 +2080,8 @@ encode %{ - - // Handle existing monitor. - __ bind(object_has_monitor); -- STATIC_ASSERT(markWord::monitor_value <= INT_MAX); -- __ add(tmp, tmp, -(int)markWord::monitor_value); // monitor -+ STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX); -+ __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor - __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); - - Label notRecursive; -diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp -index d2a301c6e74..4e388ac4eaa 100644 ---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp -+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp -@@ -3559,7 +3559,7 @@ void TemplateTable::_new() { - - // initialize object hader only. - __ bind(initialize_header); -- __ mv(t0, (intptr_t)markWord::prototype().value()); -+ __ mv(t0, (intptr_t)markOopDesc::prototype()); - __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes())); - __ store_klass_gap(x10, zr); // zero klass gap for compressed oops - __ store_klass(x10, x14); // store klass last - -From 4b27cd8d4cfa8fb5f0f78aecaebb17d19362f300 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Fri, 31 Mar 2023 16:24:36 +0800 -Subject: [PATCH 010/140] Revert: JDK-8239895: assert(_stack_base != 0LL) - failed: Sanity check JDK-8238988: Rename thread "in stack" methods and add - in_stack_range JDK-8234372: Investigate use of Thread::stack_base() and - queries for "in stack" JDK-8203481: Incorrect constraint for unextended_sp in - frame:safe_for_sender - ---- - src/hotspot/cpu/riscv/frame_riscv.cpp | 32 +++++++++++++++++++-------- - 1 file changed, 23 insertions(+), 9 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp -index 41e52a4d491..8e7babe2c61 100644 ---- a/src/hotspot/cpu/riscv/frame_riscv.cpp -+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp -@@ -56,13 +56,21 @@ void RegisterMap::check_location_valid() { - // Profiling/safepoint support - - bool frame::safe_for_sender(JavaThread *thread) { -- address addr_sp = (address)_sp; -- address addr_fp = (address)_fp; -+ address sp = (address)_sp; -+ address fp = (address)_fp; - address unextended_sp = (address)_unextended_sp; - - // consider stack guards when trying to determine "safe" stack pointers -+ static size_t stack_guard_size = os::uses_stack_guard_pages() ? -+ (JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size()) : 0; -+ size_t usable_stack_size = thread->stack_size() - stack_guard_size; -+ - // sp must be within the usable part of the stack (not in guards) -- if (!thread->is_in_usable_stack(addr_sp)) { -+ bool sp_safe = (sp < thread->stack_base()) && -+ (sp >= thread->stack_base() - usable_stack_size); -+ -+ -+ if (!sp_safe) { - return false; - } - -@@ -79,14 +87,15 @@ bool frame::safe_for_sender(JavaThread *thread) { - // So unextended sp must be within the stack but we need not to check - // that unextended sp >= sp - -- if (!thread->is_in_full_stack_checked(unextended_sp)) { -+ bool unextended_sp_safe = (unextended_sp < thread->stack_base()); -+ -+ if (!unextended_sp_safe) { - return false; - } - - // an fp must be within the stack and above (but not equal) sp - // second evaluation on fp+ is added to handle situation where fp is -1 -- bool fp_safe = thread->is_in_stack_range_excl(addr_fp, addr_sp) && -- thread->is_in_full_stack_checked(addr_fp + (return_addr_offset * sizeof(void*))); -+ bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base()))); - - // We know sp/unextended_sp are safe only fp is questionable here - -@@ -147,7 +156,7 @@ bool frame::safe_for_sender(JavaThread *thread) { - - sender_sp = _unextended_sp + _cb->frame_size(); - // Is sender_sp safe? -- if (!thread->is_in_full_stack_checked((address)sender_sp)) { -+ if ((address)sender_sp >= thread->stack_base()) { - return false; - } - -@@ -163,7 +172,10 @@ bool frame::safe_for_sender(JavaThread *thread) { - // fp is always saved in a recognizable place in any code we generate. However - // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp - // is really a frame pointer. -- if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) { -+ -+ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); -+ -+ if (!saved_fp_safe) { - return false; - } - -@@ -196,7 +208,9 @@ bool frame::safe_for_sender(JavaThread *thread) { - - // Could be the call_stub - if (StubRoutines::returns_to_call_stub(sender_pc)) { -- if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) { -+ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); -+ -+ if (!saved_fp_safe) { - return false; - } - - -From d1b463b6c00c75664a49719f75bef8e6408f12df Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Fri, 31 Mar 2023 17:10:33 +0800 -Subject: [PATCH 011/140] Revert JDK-8173585: Intrinsify - StringLatin1.indexOf(char) - ---- - src/hotspot/cpu/riscv/riscv.ad | 19 ------------------- - 1 file changed, 19 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index 44ab44dece1..8c7a8ede815 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -9826,7 +9826,6 @@ instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, - iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) - %{ - match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); -- predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U); - effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, - TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); - -@@ -9840,24 +9839,6 @@ instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, - %} - - --instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, -- iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, -- iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) --%{ -- match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); -- predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L); -- effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, -- TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); -- -- format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %} -- ins_encode %{ -- __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, -- $result$$Register, $tmp1$$Register, $tmp2$$Register, -- $tmp3$$Register, $tmp4$$Register, true /* isL */); -- %} -- ins_pipe(pipe_class_memory); --%} -- - // clearing of an array - instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy) - %{ - -From a0cdf8dfb05dbff34d2ca23104d08ae21b2d7f70 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 4 Apr 2023 12:25:36 +0800 -Subject: [PATCH 012/140] Revert JDK-8281632: riscv: Improve interpreter stack - banging, and change the register t1->t0 - ---- - .../templateInterpreterGenerator_riscv.cpp | 42 ++++--------------- - 1 file changed, 8 insertions(+), 34 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -index 6537b2dbd94..76ae6f89e27 100644 ---- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -@@ -895,42 +895,16 @@ address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(Abstract - } - - void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) { -- // See more discussion in stackOverflow.hpp. -- -- const int shadow_zone_size = checked_cast(StackOverflow::stack_shadow_zone_size()); -+ // Bang each page in the shadow zone. We can't assume it's been done for -+ // an interpreter frame with greater than a page of locals, so each page -+ // needs to be checked. Only true for non-native. -+ const int n_shadow_pages = JavaThread::stack_shadow_zone_size() / os::vm_page_size(); -+ const int start_page = native_call ? n_shadow_pages : 1; - const int page_size = os::vm_page_size(); -- const int n_shadow_pages = shadow_zone_size / page_size; -- --#ifdef ASSERT -- Label L_good_limit; -- __ ld(t0, Address(xthread, JavaThread::shadow_zone_safe_limit())); -- __ bnez(t0, L_good_limit); -- __ stop("shadow zone safe limit is not initialized"); -- __ bind(L_good_limit); -- -- Label L_good_watermark; -- __ ld(t0, Address(xthread, JavaThread::shadow_zone_growth_watermark())); -- __ bnez(t0, L_good_watermark); -- __ stop("shadow zone growth watermark is not initialized"); -- __ bind(L_good_watermark); --#endif -- -- Label L_done; -- -- __ ld(t0, Address(xthread, JavaThread::shadow_zone_growth_watermark())); -- __ bgtu(sp, t0, L_done); -- -- for (int p = 1; p <= n_shadow_pages; p++) { -- __ bang_stack_with_offset(p * page_size); -+ for (int pages = start_page; pages <= n_shadow_pages ; pages++) { -+ __ sub(t0, sp, pages * page_size); -+ __ sd(zr, Address(t0)); - } -- -- // Record the new watermark, but only if the update is above the safe limit. -- // Otherwise, the next time around the check above would pass the safe limit. -- __ ld(t0, Address(xthread, JavaThread::shadow_zone_safe_limit())); -- __ bleu(sp, t0, L_done); -- __ sd(sp, Address(xthread, JavaThread::shadow_zone_growth_watermark())); -- -- __ bind(L_done); - } - - // Interpreter stub for calling a native method. (asm interpreter) - -From 8db4bf1400d92c80a0adef8a5ec12adbf595c03f Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 4 Apr 2023 14:56:25 +0800 -Subject: [PATCH 013/140] Port aarch64 style sig handler from - os_linux_aarch64.cpp - ---- - .../os_cpu/linux_riscv/os_linux_riscv.cpp | 224 +++++++++++++----- - 1 file changed, 168 insertions(+), 56 deletions(-) - -diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp -index 1f46bbab0a2..db15f1946e2 100644 ---- a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp -+++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp -@@ -48,7 +48,6 @@ - #include "runtime/stubRoutines.hpp" - #include "runtime/thread.inline.hpp" - #include "runtime/timer.hpp" --#include "signals_posix.hpp" - #include "utilities/debug.hpp" - #include "utilities/events.hpp" - #include "utilities/vmError.hpp" -@@ -172,31 +171,138 @@ NOINLINE frame os::current_frame() { - } - - // Utility functions --bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, -- ucontext_t* uc, JavaThread* thread) { -+extern "C" JNIEXPORT int -+JVM_handle_linux_signal(int sig, -+ siginfo_t* info, -+ void* ucVoid, -+ int abort_if_unrecognized) { -+ ucontext_t* uc = (ucontext_t*) ucVoid; -+ -+ Thread* t = Thread::current_or_null_safe(); -+ -+ // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away -+ // (no destructors can be run) -+ os::ThreadCrashProtection::check_crash_protection(sig, t); -+ -+ SignalHandlerMark shm(t); -+ -+ // Note: it's not uncommon that JNI code uses signal/sigset to install -+ // then restore certain signal handler (e.g. to temporarily block SIGPIPE, -+ // or have a SIGILL handler when detecting CPU type). When that happens, -+ // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To -+ // avoid unnecessary crash when libjsig is not preloaded, try handle signals -+ // that do not require siginfo/ucontext first. -+ -+ if (sig == SIGPIPE || sig == SIGXFSZ) { -+ // allow chained handler to go first -+ if (os::Linux::chained_handler(sig, info, ucVoid)) { -+ return true; -+ } else { -+ // Ignoring SIGPIPE/SIGXFSZ - see bugs 4229104 or 6499219 -+ return true; -+ } -+ } -+ -+#ifdef CAN_SHOW_REGISTERS_ON_ASSERT -+ if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) { -+ if (handle_assert_poison_fault(ucVoid, info->si_addr)) { -+ return 1; -+ } -+ } -+#endif -+ -+ JavaThread* thread = NULL; -+ VMThread* vmthread = NULL; -+ if (os::Linux::signal_handlers_are_installed) { -+ if (t != NULL ){ -+ if(t->is_Java_thread()) { -+ thread = (JavaThread *) t; -+ } -+ else if(t->is_VM_thread()){ -+ vmthread = (VMThread *)t; -+ } -+ } -+ } -+ -+ // Handle SafeFetch faults -+ if ((sig == SIGSEGV || sig == SIGBUS) && uc != NULL) { -+ address const pc = (address) os::Linux::ucontext_get_pc(uc); -+ if (pc && StubRoutines::is_safefetch_fault(pc)) { -+ os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc)); -+ return 1; -+ } -+ } - - // decide if this trap can be handled by a stub - address stub = NULL; - -- address pc = NULL; -+ address pc = NULL; - - //%note os_trap_1 - if (info != NULL && uc != NULL && thread != NULL) { -- pc = (address) os::Posix::ucontext_get_pc(uc); -- -- address addr = (address) info->si_addr; -- -- // Make sure the high order byte is sign extended, as it may be masked away by the hardware. -- if ((uintptr_t(addr) & (uintptr_t(1) << 55)) != 0) { -- addr = address(uintptr_t(addr) | (uintptr_t(0xFF) << 56)); -- } -+ pc = (address) os::Linux::ucontext_get_pc(uc); - - // Handle ALL stack overflow variations here - if (sig == SIGSEGV) { -+ address addr = (address) info->si_addr; -+ - // check if fault address is within thread stack -- if (thread->is_in_full_stack(addr)) { -- if (os::Posix::handle_stack_overflow(thread, addr, pc, uc, &stub)) { -- return true; // continue -+ if (thread->on_local_stack(addr)) { -+ // stack overflow -+ if (thread->in_stack_yellow_reserved_zone(addr)) { -+ if (thread->thread_state() == _thread_in_Java) { -+ if (thread->in_stack_reserved_zone(addr)) { -+ frame fr; -+ if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) { -+ assert(fr.is_java_frame(), "Must be a Java frame"); -+ frame activation = -+ SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr); -+ if (activation.sp() != NULL) { -+ thread->disable_stack_reserved_zone(); -+ if (activation.is_interpreted_frame()) { -+ thread->set_reserved_stack_activation((address)( -+ activation.fp() + frame::interpreter_frame_initial_sp_offset)); -+ } else { -+ thread->set_reserved_stack_activation((address)activation.unextended_sp()); -+ } -+ return 1; -+ } -+ } -+ } -+ // Throw a stack overflow exception. Guard pages will be reenabled -+ // while unwinding the stack. -+ thread->disable_stack_yellow_reserved_zone(); -+ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW); -+ } else { -+ // Thread was in the vm or native code. Return and try to finish. -+ thread->disable_stack_yellow_reserved_zone(); -+ return 1; -+ } -+ } else if (thread->in_stack_red_zone(addr)) { -+ // Fatal red zone violation. Disable the guard pages and fall through -+ // to handle_unexpected_exception way down below. -+ thread->disable_stack_red_zone(); -+ tty->print_raw_cr("An irrecoverable stack overflow has occurred."); -+ -+ // This is a likely cause, but hard to verify. Let's just print -+ // it as a hint. -+ tty->print_raw_cr("Please check if any of your loaded .so files has " -+ "enabled executable stack (see man page execstack(8))"); -+ } else { -+ // Accessing stack address below sp may cause SEGV if current -+ // thread has MAP_GROWSDOWN stack. This should only happen when -+ // current thread was created by user code with MAP_GROWSDOWN flag -+ // and then attached to VM. See notes in os_linux.cpp. -+ if (thread->osthread()->expanding_stack() == 0) { -+ thread->osthread()->set_expanding_stack(); -+ if (os::Linux::manually_expand_stack(thread, addr)) { -+ thread->osthread()->clear_expanding_stack(); -+ return 1; -+ } -+ thread->osthread()->clear_expanding_stack(); -+ } else { -+ fatal("recursive segv. expanding stack."); -+ } - } - } - } -@@ -212,7 +318,7 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, - tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL"); - } - stub = SharedRuntime::get_handle_wrong_method_stub(); -- } else if (sig == SIGSEGV && SafepointMechanism::is_poll_address((address)info->si_addr)) { -+ } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) { - stub = SharedRuntime::get_poll_stub(pc); - } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) { - // BugId 4454115: A read from a MappedByteBuffer can fault -@@ -220,34 +326,12 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, - // Do not crash the VM in such a case. - CodeBlob* cb = CodeCache::find_blob_unsafe(pc); - CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; -- bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc)); -- if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) { -+ if (nm != NULL && nm->has_unsafe_access()) { - address next_pc = pc + NativeCall::instruction_size; -- if (is_unsafe_arraycopy) { -- next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); -- } - stub = SharedRuntime::handle_unsafe_access(thread, next_pc); - } -- } else if (sig == SIGILL && nativeInstruction_at(pc)->is_stop()) { -- // Pull a pointer to the error message out of the instruction -- // stream. -- const uint64_t *detail_msg_ptr -- = (uint64_t*)(pc + NativeInstruction::instruction_size); -- const char *detail_msg = (const char *)*detail_msg_ptr; -- const char *msg = "stop"; -- if (TraceTraps) { -- tty->print_cr("trap: %s: (SIGILL)", msg); -- } -- -- // End life with a fatal error, message and detail message and the context. -- // Note: no need to do any post-processing here (e.g. signal chaining) -- va_list va_dummy; -- VMError::report_and_die(thread, uc, NULL, 0, msg, detail_msg, va_dummy); -- va_end(va_dummy); -- -- ShouldNotReachHere(); - } else if (sig == SIGFPE && -- (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) { -+ (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) { - stub = - SharedRuntime:: - continuation_for_implicit_exception(thread, -@@ -255,42 +339,70 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, - SharedRuntime:: - IMPLICIT_DIVIDE_BY_ZERO); - } else if (sig == SIGSEGV && -- MacroAssembler::uses_implicit_null_check((void*)addr)) { -+ !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) { - // Determination of interpreter/vtable stub/compiled code null exception - stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); - } -- } else if ((thread->thread_state() == _thread_in_vm || -- thread->thread_state() == _thread_in_native) && -- sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ -- thread->doing_unsafe_access()) { -+ } else if (thread->thread_state() == _thread_in_vm && -+ sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ -+ thread->doing_unsafe_access()) { - address next_pc = pc + NativeCall::instruction_size; -- if (UnsafeCopyMemory::contains_pc(pc)) { -- next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); -- } - stub = SharedRuntime::handle_unsafe_access(thread, next_pc); - } - - // jni_fast_GetField can trap at certain pc's if a GC kicks in - // and the heap gets shrunk before the field access. - if ((sig == SIGSEGV) || (sig == SIGBUS)) { -- address addr_slow = JNI_FastGetField::find_slowcase_pc(pc); -- if (addr_slow != (address)-1) { -- stub = addr_slow; -+ address addr = JNI_FastGetField::find_slowcase_pc(pc); -+ if (addr != (address)-1) { -+ stub = addr; - } - } -+ -+ // Check to see if we caught the safepoint code in the -+ // process of write protecting the memory serialization page. -+ // It write enables the page immediately after protecting it -+ // so we can just return to retry the write. -+ if ((sig == SIGSEGV) && -+ os::is_memory_serialize_page(thread, (address) info->si_addr)) { -+ // Block current thread until the memory serialize page permission restored. -+ os::block_on_serialize_page_trap(); -+ return true; -+ } - } - - if (stub != NULL) { - // save all thread context in case we need to restore it -- if (thread != NULL) { -- thread->set_saved_exception_pc(pc); -- } -+ if (thread != NULL) thread->set_saved_exception_pc(pc); - -- os::Posix::ucontext_set_pc(uc, stub); -+ os::Linux::ucontext_set_pc(uc, stub); - return true; - } - -- return false; // Mute compiler -+ // signal-chaining -+ if (os::Linux::chained_handler(sig, info, ucVoid)) { -+ return true; -+ } -+ -+ if (!abort_if_unrecognized) { -+ // caller wants another chance, so give it to him -+ return false; -+ } -+ -+ if (pc == NULL && uc != NULL) { -+ pc = os::Linux::ucontext_get_pc(uc); -+ } -+ -+ // unmask current signal -+ sigset_t newset; -+ sigemptyset(&newset); -+ sigaddset(&newset, sig); -+ sigprocmask(SIG_UNBLOCK, &newset, NULL); -+ -+ VMError::report_and_die(t, sig, pc, info, ucVoid); -+ -+ ShouldNotReachHere(); -+ return true; // Mute compiler - } - - void os::Linux::init_thread_fpu_state(void) { - -From fd3897410308e2fc54d84a9bd453b1b375e6aace Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 4 Apr 2023 15:24:57 +0800 -Subject: [PATCH 014/140] Revert: JDK-8248240: Remove extendedPC.hpp and - fetch_frame_from_ucontext JDK-8253742: POSIX signal code cleanup - ---- - .../os_cpu/linux_riscv/os_linux_riscv.cpp | 38 ++++++++++++++----- - .../os_cpu/linux_riscv/thread_linux_riscv.cpp | 9 +++-- - 2 files changed, 33 insertions(+), 14 deletions(-) - -diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp -index db15f1946e2..4f1c84c60a0 100644 ---- a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp -+++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp -@@ -37,6 +37,7 @@ - #include "prims/jniFastGetField.hpp" - #include "prims/jvm_misc.hpp" - #include "runtime/arguments.hpp" -+#include "runtime/extendedPC.hpp" - #include "runtime/frame.inline.hpp" - #include "runtime/interfaceSupport.inline.hpp" - #include "runtime/java.hpp" -@@ -85,11 +86,11 @@ char* os::non_memory_address_word() { - return (char*) -1; - } - --address os::Posix::ucontext_get_pc(const ucontext_t * uc) { -+address os::Linux::ucontext_get_pc(const ucontext_t * uc) { - return (address)uc->uc_mcontext.__gregs[REG_PC]; - } - --void os::Posix::ucontext_set_pc(ucontext_t * uc, address pc) { -+void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) { - uc->uc_mcontext.__gregs[REG_PC] = (intptr_t)pc; - } - -@@ -101,13 +102,29 @@ intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) { - return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP]; - } - --address os::fetch_frame_from_context(const void* ucVoid, -- intptr_t** ret_sp, intptr_t** ret_fp) { -- address epc; -+// For Forte Analyzer AsyncGetCallTrace profiling support - thread -+// is currently interrupted by SIGPROF. -+// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal -+// frames. Currently we don't do that on Linux, so it's the same as -+// os::fetch_frame_from_context(). -+ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread, -+ const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) { -+ -+ assert(thread != NULL, "just checking"); -+ assert(ret_sp != NULL, "just checking"); -+ assert(ret_fp != NULL, "just checking"); -+ -+ return os::fetch_frame_from_context(uc, ret_sp, ret_fp); -+} -+ -+ExtendedPC os::fetch_frame_from_context(const void* ucVoid, -+ intptr_t** ret_sp, intptr_t** ret_fp) { -+ -+ ExtendedPC epc; - const ucontext_t* uc = (const ucontext_t*)ucVoid; - - if (uc != NULL) { -- epc = os::Posix::ucontext_get_pc(uc); -+ epc = ExtendedPC(os::Linux::ucontext_get_pc(uc)); - if (ret_sp != NULL) { - *ret_sp = os::Linux::ucontext_get_sp(uc); - } -@@ -115,7 +132,8 @@ address os::fetch_frame_from_context(const void* ucVoid, - *ret_fp = os::Linux::ucontext_get_fp(uc); - } - } else { -- epc = NULL; -+ // construct empty ExtendedPC for return value checking -+ epc = ExtendedPC(NULL); - if (ret_sp != NULL) { - *ret_sp = (intptr_t *)NULL; - } -@@ -142,8 +160,8 @@ frame os::fetch_compiled_frame_from_context(const void* ucVoid) { - frame os::fetch_frame_from_context(const void* ucVoid) { - intptr_t* frame_sp = NULL; - intptr_t* frame_fp = NULL; -- address epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp); -- return frame(frame_sp, frame_fp, epc); -+ ExtendedPC epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp); -+ return frame(frame_sp, frame_fp, epc.pc()); - } - - // By default, gcc always saves frame pointer rfp on this stack. This -@@ -465,7 +483,7 @@ void os::print_context(outputStream *st, const void *context) { - // Note: it may be unsafe to inspect memory near pc. For example, pc may - // point to garbage if entry point in an nmethod is corrupted. Leave - // this at the end, and hope for the best. -- address pc = os::Posix::ucontext_get_pc(uc); -+ address pc = os::Linux::ucontext_get_pc(uc); - print_instructions(st, pc, sizeof(char)); - st->cr(); - } -diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp -index 3100572e9fd..e46efc420b0 100644 ---- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp -+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp -@@ -61,16 +61,17 @@ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) - - intptr_t* ret_fp = NULL; - intptr_t* ret_sp = NULL; -- address addr = os::fetch_frame_from_context(uc, &ret_sp, &ret_fp); -- if (addr == NULL || ret_sp == NULL ) { -+ ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc, -+ &ret_sp, &ret_fp); -+ if (addr.pc() == NULL || ret_sp == NULL ) { - // ucontext wasn't useful - return false; - } - -- frame ret_frame(ret_sp, ret_fp, addr); -+ frame ret_frame(ret_sp, ret_fp, addr.pc()); - if (!ret_frame.safe_for_sender(this)) { - #ifdef COMPILER2 -- frame ret_frame2(ret_sp, NULL, addr); -+ frame ret_frame2(ret_sp, NULL, addr.pc()); - if (!ret_frame2.safe_for_sender(this)) { - // nothing else to try if the frame isn't good - return false; - -From 892b40a435ae3f7e85659100ef68db1aeda7ef23 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 4 Apr 2023 15:33:50 +0800 -Subject: [PATCH 015/140] Revert JDK-8263002: Remove CDS MiscCode region - ---- - src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 10 ++++++++++ - src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp | 6 ++++++ - 2 files changed, 16 insertions(+) - -diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -index 4daed17df10..21aa3b58c09 100644 ---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -@@ -187,6 +187,16 @@ bool SharedRuntime::is_wide_vector(int size) { - return false; - } - -+size_t SharedRuntime::trampoline_size() { -+ return 6 * NativeInstruction::instruction_size; -+} -+ -+void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) { -+ int32_t offset = 0; -+ __ movptr_with_offset(t0, destination, offset); -+ __ jalr(x0, t0, offset); -+} -+ - // The java_calling_convention describes stack locations as ideal slots on - // a frame with no abi restrictions. Since we must observe abi restrictions - // (like the placement of the register window) the slots must be biased by -diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp -index e46efc420b0..31d9254d8ad 100644 ---- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp -+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp -@@ -68,6 +68,12 @@ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) - return false; - } - -+ if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) { -+ // In the middle of a trampoline call. Bail out for safety. -+ // This happens rarely so shouldn't affect profiling. -+ return false; -+ } -+ - frame ret_frame(ret_sp, ret_fp, addr.pc()); - if (!ret_frame.safe_for_sender(this)) { - #ifdef COMPILER2 - -From 945a317797bc96efe3f0717ca7258f081b96b14d Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 4 Apr 2023 15:52:43 +0800 -Subject: [PATCH 016/140] Revert JDK-8254158: Consolidate per-platform stack - overflow handling code - ---- - .../os_cpu/linux_riscv/os_linux_riscv.cpp | 52 ++++++++++++++----- - 1 file changed, 40 insertions(+), 12 deletions(-) - -diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp -index 4f1c84c60a0..8b772892b4b 100644 ---- a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp -+++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp -@@ -145,18 +145,6 @@ ExtendedPC os::fetch_frame_from_context(const void* ucVoid, - return epc; - } - --frame os::fetch_compiled_frame_from_context(const void* ucVoid) { -- const ucontext_t* uc = (const ucontext_t*)ucVoid; -- // In compiled code, the stack banging is performed before RA -- // has been saved in the frame. RA is live, and SP and FP -- // belong to the caller. -- intptr_t* frame_fp = os::Linux::ucontext_get_fp(uc); -- intptr_t* frame_sp = os::Linux::ucontext_get_sp(uc); -- address frame_pc = (address)(uc->uc_mcontext.__gregs[REG_LR] -- - NativeInstruction::instruction_size); -- return frame(frame_sp, frame_fp, frame_pc); --} -- - frame os::fetch_frame_from_context(const void* ucVoid) { - intptr_t* frame_sp = NULL; - intptr_t* frame_fp = NULL; -@@ -164,6 +152,46 @@ frame os::fetch_frame_from_context(const void* ucVoid) { - return frame(frame_sp, frame_fp, epc.pc()); - } - -+bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) { -+ address pc = (address) os::Linux::ucontext_get_pc(uc); -+ if (Interpreter::contains(pc)) { -+ // interpreter performs stack banging after the fixed frame header has -+ // been generated while the compilers perform it before. To maintain -+ // semantic consistency between interpreted and compiled frames, the -+ // method returns the Java sender of the current frame. -+ *fr = os::fetch_frame_from_context(uc); -+ if (!fr->is_first_java_frame()) { -+ assert(fr->safe_for_sender(thread), "Safety check"); -+ *fr = fr->java_sender(); -+ } -+ } else { -+ // more complex code with compiled code -+ assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above"); -+ CodeBlob* cb = CodeCache::find_blob(pc); -+ if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) { -+ // Not sure where the pc points to, fallback to default -+ // stack overflow handling -+ return false; -+ } else { -+ // In compiled code, the stack banging is performed before RA -+ // has been saved in the frame. RA is live, and SP and FP -+ // belong to the caller. -+ intptr_t* fp = os::Linux::ucontext_get_fp(uc); -+ intptr_t* sp = os::Linux::ucontext_get_sp(uc); -+ address pc = (address)(uc->uc_mcontext.__gregs[REG_LR] -+ - NativeInstruction::instruction_size); -+ *fr = frame(sp, fp, pc); -+ if (!fr->is_java_frame()) { -+ assert(fr->safe_for_sender(thread), "Safety check"); -+ assert(!fr->is_first_frame(), "Safety check"); -+ *fr = fr->java_sender(); -+ } -+ } -+ } -+ assert(fr->is_java_frame(), "Safety check"); -+ return true; -+} -+ - // By default, gcc always saves frame pointer rfp on this stack. This - // may get turned off by -fomit-frame-pointer. - frame os::get_sender_for_C_frame(frame* fr) { - -From c1a03e0a376cc2c8748d83d66b576b66ee2e6962 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 4 Apr 2023 16:14:19 +0800 -Subject: [PATCH 017/140] Revert JDK-8202579: Revisit VM_Version and - VM_Version_ext for overlap and consolidation - ---- - .../cpu/riscv/vm_version_ext_riscv.cpp | 87 +++++++++++++++++++ - .../cpu/riscv/vm_version_ext_riscv.hpp | 55 ++++++++++++ - src/hotspot/cpu/riscv/vm_version_riscv.cpp | 14 --- - 3 files changed, 142 insertions(+), 14 deletions(-) - create mode 100644 src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp - -diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp -new file mode 100644 -index 00000000000..6bdce51506e ---- /dev/null -+++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp -@@ -0,0 +1,87 @@ -+/* -+ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+#include "precompiled.hpp" -+#include "memory/allocation.hpp" -+#include "memory/allocation.inline.hpp" -+#include "runtime/os.inline.hpp" -+#include "vm_version_ext_riscv.hpp" -+ -+// VM_Version_Ext statics -+int VM_Version_Ext::_no_of_threads = 0; -+int VM_Version_Ext::_no_of_cores = 0; -+int VM_Version_Ext::_no_of_sockets = 0; -+bool VM_Version_Ext::_initialized = false; -+char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0}; -+char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0}; -+ -+void VM_Version_Ext::initialize_cpu_information(void) { -+ // do nothing if cpu info has been initialized -+ if (_initialized) { -+ return; -+ } -+ -+ _no_of_cores = os::processor_count(); -+ _no_of_threads = _no_of_cores; -+ _no_of_sockets = _no_of_cores; -+ snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64"); -+ snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string); -+ _initialized = true; -+} -+ -+int VM_Version_Ext::number_of_threads(void) { -+ initialize_cpu_information(); -+ return _no_of_threads; -+} -+ -+int VM_Version_Ext::number_of_cores(void) { -+ initialize_cpu_information(); -+ return _no_of_cores; -+} -+ -+int VM_Version_Ext::number_of_sockets(void) { -+ initialize_cpu_information(); -+ return _no_of_sockets; -+} -+ -+const char* VM_Version_Ext::cpu_name(void) { -+ initialize_cpu_information(); -+ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing); -+ if (NULL == tmp) { -+ return NULL; -+ } -+ strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE); -+ return tmp; -+} -+ -+const char* VM_Version_Ext::cpu_description(void) { -+ initialize_cpu_information(); -+ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing); -+ if (NULL == tmp) { -+ return NULL; -+ } -+ strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); -+ return tmp; -+} -diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp -new file mode 100644 -index 00000000000..711e4aeaf68 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp -@@ -0,0 +1,55 @@ -+/* -+ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+#ifndef CPU_RISCV_VM_VERSION_EXT_RISCV_HPP -+#define CPU_RISCV_VM_VERSION_EXT_RISCV_HPP -+ -+#include "runtime/vm_version.hpp" -+#include "utilities/macros.hpp" -+ -+class VM_Version_Ext : public VM_Version { -+ private: -+ static const size_t CPU_TYPE_DESC_BUF_SIZE = 256; -+ static const size_t CPU_DETAILED_DESC_BUF_SIZE = 4096; -+ -+ static int _no_of_threads; -+ static int _no_of_cores; -+ static int _no_of_sockets; -+ static bool _initialized; -+ static char _cpu_name[CPU_TYPE_DESC_BUF_SIZE]; -+ static char _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE]; -+ -+ public: -+ static int number_of_threads(void); -+ static int number_of_cores(void); -+ static int number_of_sockets(void); -+ -+ static const char* cpu_name(void); -+ static const char* cpu_description(void); -+ static void initialize_cpu_information(void); -+ -+}; -+ -+#endif // CPU_RISCV_VM_VERSION_EXT_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp -index 2c15a834542..dd65f32277f 100644 ---- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp -+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp -@@ -210,17 +210,3 @@ void VM_Version::c2_initialize() { - } - } - #endif // COMPILER2 -- --void VM_Version::initialize_cpu_information(void) { -- // do nothing if cpu info has been initialized -- if (_initialized) { -- return; -- } -- -- _no_of_cores = os::processor_count(); -- _no_of_threads = _no_of_cores; -- _no_of_sockets = _no_of_cores; -- snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64"); -- snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string); -- _initialized = true; --} - -From 0cfdbd8595c710b71be008bb531b59acf9c4b016 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 4 Apr 2023 17:16:05 +0800 -Subject: [PATCH 018/140] Revert JDK-8191278: MappedByteBuffer bulk access - memory failures are not handled gracefully - ---- - src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 19 ++----------------- - 1 file changed, 2 insertions(+), 17 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp -index 39416441bdf..8392b768847 100644 ---- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp -+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp -@@ -1049,12 +1049,7 @@ class StubGenerator: public StubCodeGenerator { - __ push_reg(RegSet::of(d, count), sp); - } - -- { -- // UnsafeCopyMemory page error: continue after ucm -- bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size); -- UnsafeCopyMemoryMark ucmm(this, add_entry, true); -- copy_memory(aligned, s, d, count, t0, size); -- } -+ copy_memory(aligned, s, d, count, t0, size); - - if (is_oop) { - __ pop_reg(RegSet::of(d, count), sp); -@@ -1122,12 +1117,7 @@ class StubGenerator: public StubCodeGenerator { - __ push_reg(RegSet::of(d, count), sp); - } - -- { -- // UnsafeCopyMemory page error: continue after ucm -- bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size); -- UnsafeCopyMemoryMark ucmm(this, add_entry, true); -- copy_memory(aligned, s, d, count, t0, -size); -- } -+ copy_memory(aligned, s, d, count, t0, -size); - - if (is_oop) { - __ pop_reg(RegSet::of(d, count), sp); -@@ -3734,11 +3724,6 @@ class StubGenerator: public StubCodeGenerator { - ~StubGenerator() {} - }; // end class declaration - --#define UCM_TABLE_MAX_ENTRIES 8 - void StubGenerator_generate(CodeBuffer* code, bool all) { -- if (UnsafeCopyMemory::_table == NULL) { -- UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES); -- } -- - StubGenerator g(code, all); - } - -From dd6a7c520a5adeef5b6686c161554adcba61113f Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Thu, 6 Apr 2023 15:55:09 +0800 -Subject: [PATCH 019/140] Revert JDK-8282085: The REGISTER_DEFINITION macro is - useless after JDK-8269122 - ---- - .../cpu/riscv/register_definitions_riscv.cpp | 192 ++++++++++++++++++ - 1 file changed, 192 insertions(+) - create mode 100644 src/hotspot/cpu/riscv/register_definitions_riscv.cpp - -diff --git a/src/hotspot/cpu/riscv/register_definitions_riscv.cpp b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp -new file mode 100644 -index 00000000000..583f67573ca ---- /dev/null -+++ b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp -@@ -0,0 +1,192 @@ -+/* -+ * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+#include "precompiled.hpp" -+#include "asm/assembler.hpp" -+#include "asm/register.hpp" -+#include "interp_masm_riscv.hpp" -+#include "register_riscv.hpp" -+ -+REGISTER_DEFINITION(Register, noreg); -+ -+REGISTER_DEFINITION(Register, x0); -+REGISTER_DEFINITION(Register, x1); -+REGISTER_DEFINITION(Register, x2); -+REGISTER_DEFINITION(Register, x3); -+REGISTER_DEFINITION(Register, x4); -+REGISTER_DEFINITION(Register, x5); -+REGISTER_DEFINITION(Register, x6); -+REGISTER_DEFINITION(Register, x7); -+REGISTER_DEFINITION(Register, x8); -+REGISTER_DEFINITION(Register, x9); -+REGISTER_DEFINITION(Register, x10); -+REGISTER_DEFINITION(Register, x11); -+REGISTER_DEFINITION(Register, x12); -+REGISTER_DEFINITION(Register, x13); -+REGISTER_DEFINITION(Register, x14); -+REGISTER_DEFINITION(Register, x15); -+REGISTER_DEFINITION(Register, x16); -+REGISTER_DEFINITION(Register, x17); -+REGISTER_DEFINITION(Register, x18); -+REGISTER_DEFINITION(Register, x19); -+REGISTER_DEFINITION(Register, x20); -+REGISTER_DEFINITION(Register, x21); -+REGISTER_DEFINITION(Register, x22); -+REGISTER_DEFINITION(Register, x23); -+REGISTER_DEFINITION(Register, x24); -+REGISTER_DEFINITION(Register, x25); -+REGISTER_DEFINITION(Register, x26); -+REGISTER_DEFINITION(Register, x27); -+REGISTER_DEFINITION(Register, x28); -+REGISTER_DEFINITION(Register, x29); -+REGISTER_DEFINITION(Register, x30); -+REGISTER_DEFINITION(Register, x31); -+ -+REGISTER_DEFINITION(FloatRegister, fnoreg); -+ -+REGISTER_DEFINITION(FloatRegister, f0); -+REGISTER_DEFINITION(FloatRegister, f1); -+REGISTER_DEFINITION(FloatRegister, f2); -+REGISTER_DEFINITION(FloatRegister, f3); -+REGISTER_DEFINITION(FloatRegister, f4); -+REGISTER_DEFINITION(FloatRegister, f5); -+REGISTER_DEFINITION(FloatRegister, f6); -+REGISTER_DEFINITION(FloatRegister, f7); -+REGISTER_DEFINITION(FloatRegister, f8); -+REGISTER_DEFINITION(FloatRegister, f9); -+REGISTER_DEFINITION(FloatRegister, f10); -+REGISTER_DEFINITION(FloatRegister, f11); -+REGISTER_DEFINITION(FloatRegister, f12); -+REGISTER_DEFINITION(FloatRegister, f13); -+REGISTER_DEFINITION(FloatRegister, f14); -+REGISTER_DEFINITION(FloatRegister, f15); -+REGISTER_DEFINITION(FloatRegister, f16); -+REGISTER_DEFINITION(FloatRegister, f17); -+REGISTER_DEFINITION(FloatRegister, f18); -+REGISTER_DEFINITION(FloatRegister, f19); -+REGISTER_DEFINITION(FloatRegister, f20); -+REGISTER_DEFINITION(FloatRegister, f21); -+REGISTER_DEFINITION(FloatRegister, f22); -+REGISTER_DEFINITION(FloatRegister, f23); -+REGISTER_DEFINITION(FloatRegister, f24); -+REGISTER_DEFINITION(FloatRegister, f25); -+REGISTER_DEFINITION(FloatRegister, f26); -+REGISTER_DEFINITION(FloatRegister, f27); -+REGISTER_DEFINITION(FloatRegister, f28); -+REGISTER_DEFINITION(FloatRegister, f29); -+REGISTER_DEFINITION(FloatRegister, f30); -+REGISTER_DEFINITION(FloatRegister, f31); -+ -+REGISTER_DEFINITION(VectorRegister, vnoreg); -+ -+REGISTER_DEFINITION(VectorRegister, v0); -+REGISTER_DEFINITION(VectorRegister, v1); -+REGISTER_DEFINITION(VectorRegister, v2); -+REGISTER_DEFINITION(VectorRegister, v3); -+REGISTER_DEFINITION(VectorRegister, v4); -+REGISTER_DEFINITION(VectorRegister, v5); -+REGISTER_DEFINITION(VectorRegister, v6); -+REGISTER_DEFINITION(VectorRegister, v7); -+REGISTER_DEFINITION(VectorRegister, v8); -+REGISTER_DEFINITION(VectorRegister, v9); -+REGISTER_DEFINITION(VectorRegister, v10); -+REGISTER_DEFINITION(VectorRegister, v11); -+REGISTER_DEFINITION(VectorRegister, v12); -+REGISTER_DEFINITION(VectorRegister, v13); -+REGISTER_DEFINITION(VectorRegister, v14); -+REGISTER_DEFINITION(VectorRegister, v15); -+REGISTER_DEFINITION(VectorRegister, v16); -+REGISTER_DEFINITION(VectorRegister, v17); -+REGISTER_DEFINITION(VectorRegister, v18); -+REGISTER_DEFINITION(VectorRegister, v19); -+REGISTER_DEFINITION(VectorRegister, v20); -+REGISTER_DEFINITION(VectorRegister, v21); -+REGISTER_DEFINITION(VectorRegister, v22); -+REGISTER_DEFINITION(VectorRegister, v23); -+REGISTER_DEFINITION(VectorRegister, v24); -+REGISTER_DEFINITION(VectorRegister, v25); -+REGISTER_DEFINITION(VectorRegister, v26); -+REGISTER_DEFINITION(VectorRegister, v27); -+REGISTER_DEFINITION(VectorRegister, v28); -+REGISTER_DEFINITION(VectorRegister, v29); -+REGISTER_DEFINITION(VectorRegister, v30); -+REGISTER_DEFINITION(VectorRegister, v31); -+ -+REGISTER_DEFINITION(Register, c_rarg0); -+REGISTER_DEFINITION(Register, c_rarg1); -+REGISTER_DEFINITION(Register, c_rarg2); -+REGISTER_DEFINITION(Register, c_rarg3); -+REGISTER_DEFINITION(Register, c_rarg4); -+REGISTER_DEFINITION(Register, c_rarg5); -+REGISTER_DEFINITION(Register, c_rarg6); -+REGISTER_DEFINITION(Register, c_rarg7); -+ -+REGISTER_DEFINITION(FloatRegister, c_farg0); -+REGISTER_DEFINITION(FloatRegister, c_farg1); -+REGISTER_DEFINITION(FloatRegister, c_farg2); -+REGISTER_DEFINITION(FloatRegister, c_farg3); -+REGISTER_DEFINITION(FloatRegister, c_farg4); -+REGISTER_DEFINITION(FloatRegister, c_farg5); -+REGISTER_DEFINITION(FloatRegister, c_farg6); -+REGISTER_DEFINITION(FloatRegister, c_farg7); -+ -+REGISTER_DEFINITION(Register, j_rarg0); -+REGISTER_DEFINITION(Register, j_rarg1); -+REGISTER_DEFINITION(Register, j_rarg2); -+REGISTER_DEFINITION(Register, j_rarg3); -+REGISTER_DEFINITION(Register, j_rarg4); -+REGISTER_DEFINITION(Register, j_rarg5); -+REGISTER_DEFINITION(Register, j_rarg6); -+REGISTER_DEFINITION(Register, j_rarg7); -+ -+REGISTER_DEFINITION(FloatRegister, j_farg0); -+REGISTER_DEFINITION(FloatRegister, j_farg1); -+REGISTER_DEFINITION(FloatRegister, j_farg2); -+REGISTER_DEFINITION(FloatRegister, j_farg3); -+REGISTER_DEFINITION(FloatRegister, j_farg4); -+REGISTER_DEFINITION(FloatRegister, j_farg5); -+REGISTER_DEFINITION(FloatRegister, j_farg6); -+REGISTER_DEFINITION(FloatRegister, j_farg7); -+ -+REGISTER_DEFINITION(Register, zr); -+REGISTER_DEFINITION(Register, gp); -+REGISTER_DEFINITION(Register, tp); -+REGISTER_DEFINITION(Register, xmethod); -+REGISTER_DEFINITION(Register, ra); -+REGISTER_DEFINITION(Register, sp); -+REGISTER_DEFINITION(Register, fp); -+REGISTER_DEFINITION(Register, xheapbase); -+REGISTER_DEFINITION(Register, xcpool); -+REGISTER_DEFINITION(Register, xmonitors); -+REGISTER_DEFINITION(Register, xlocals); -+REGISTER_DEFINITION(Register, xthread); -+REGISTER_DEFINITION(Register, xbcp); -+REGISTER_DEFINITION(Register, xdispatch); -+REGISTER_DEFINITION(Register, esp); -+ -+REGISTER_DEFINITION(Register, t0); -+REGISTER_DEFINITION(Register, t1); -+REGISTER_DEFINITION(Register, t2); - -From 561261b051d88ddb0053733f03cbefc75dedcea8 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Thu, 6 Apr 2023 16:41:03 +0800 -Subject: [PATCH 020/140] Revert JDK-7175279: Don't use x87 FPU on x86-64 - ---- - src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -index 0e383a3c139..977563fe5f4 100644 ---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -@@ -2019,6 +2019,18 @@ address LIR_Assembler::int_constant(jlong n) { - } - } - -+void LIR_Assembler::set_24bit_FPU() { Unimplemented(); } -+ -+void LIR_Assembler::reset_FPU() { Unimplemented(); } -+ -+void LIR_Assembler::fpop() { Unimplemented(); } -+ -+void LIR_Assembler::fxch(int i) { Unimplemented(); } -+ -+void LIR_Assembler::fld(int i) { Unimplemented(); } -+ -+void LIR_Assembler::ffree(int i) { Unimplemented(); } -+ - void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) { - __ cmpxchg(addr, cmpval, newval, Assembler::int32, Assembler::aq /* acquire */, - Assembler::rl /* release */, t0, true /* result as bool */); - -From ff4e1443fd000208714b506d52c0fab1c91e4ac8 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Thu, 6 Apr 2023 16:41:15 +0800 -Subject: [PATCH 021/140] Revert JDK-8255909: Remove unused delayed_value - methods - ---- - src/hotspot/cpu/riscv/assembler_riscv.hpp | 7 +++++++ - src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 16 ++++++++++++++++ - src/hotspot/cpu/riscv/macroAssembler_riscv.hpp | 4 ++++ - 3 files changed, 27 insertions(+) - -diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp -index 4923962a496..44e8d4b4ff1 100644 ---- a/src/hotspot/cpu/riscv/assembler_riscv.hpp -+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp -@@ -3027,6 +3027,13 @@ enum Nf { - Assembler(CodeBuffer* code) : AbstractAssembler(code), _in_compressible_region(false) { - } - -+ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, -+ Register tmp, -+ int offset) { -+ ShouldNotCallThis(); -+ return RegisterOrConstant(); -+ } -+ - // Stack overflow checking - virtual void bang_stack_with_offset(int offset) { Unimplemented(); } - -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -index 269d76ba69e..878957cbede 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -@@ -191,6 +191,22 @@ void MacroAssembler::call_VM(Register oop_result, - void MacroAssembler::check_and_handle_earlyret(Register java_thread) {} - void MacroAssembler::check_and_handle_popframe(Register java_thread) {} - -+RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, -+ Register tmp, -+ int offset) { -+ intptr_t value = *delayed_value_addr; -+ if (value != 0) -+ return RegisterOrConstant(value + offset); -+ -+ // load indirectly to solve generation ordering problem -+ ld(tmp, ExternalAddress((address) delayed_value_addr)); -+ -+ if (offset != 0) -+ add(tmp, tmp, offset); -+ -+ return RegisterOrConstant(tmp); -+} -+ - // Calls to C land - // - // When entering C land, the fp, & esp of the last Java frame have to be recorded -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -index b59bdadb8bf..f23f7e7d1e6 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -@@ -625,6 +625,10 @@ class MacroAssembler: public Assembler { - - void reserved_stack_check(); - -+ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, -+ Register tmp, -+ int offset); -+ - void get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype); - void read_polling_page(Register r, address page, relocInfo::relocType rtype); - void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype); - -From afe35a3fdc705645bfe2a2e797a95ce1d5203872 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Thu, 6 Apr 2023 16:51:39 +0800 -Subject: [PATCH 022/140] Revert JDK-8263679: C1: Remove vtable call - ---- - src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -index 977563fe5f4..a0ecc63d851 100644 ---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -@@ -1382,6 +1382,11 @@ void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { - add_call_info(code_offset(), op->info()); - } - -+/* Currently, vtable-dispatch is only enabled for sparc platforms */ -+void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) { -+ ShouldNotReachHere(); -+} -+ - void LIR_Assembler::emit_static_call_stub() { - address call_pc = __ pc(); - assert((__ offset() % 4) == 0, "bad alignment"); - -From 655b34c00ec5ff6fa7e82de96a78a0c58ba91985 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Thu, 6 Apr 2023 16:55:57 +0800 -Subject: [PATCH 023/140] Revert JDK-8264063: Outer Safepoint poll load should - not reference the head of inner strip mined loop. - ---- - src/hotspot/cpu/riscv/riscv.ad | 14 ++++++++++++++ - 1 file changed, 14 insertions(+) - -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index 8c7a8ede815..fcddf752564 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -952,6 +952,20 @@ int CallDynamicJavaDirectNode::compute_padding(int current_offset) const - return align_up(current_offset, alignment_required()) - current_offset; - } - -+// Indicate if the safepoint node needs the polling page as an input -+ -+// the shared code plants the oop data at the start of the generated -+// code for the safepoint node and that needs ot be at the load -+// instruction itself. so we cannot plant a mov of the safepoint poll -+// address followed by a load. setting this to true means the mov is -+// scheduled as a prior instruction. that's better for scheduling -+// anyway. -+ -+bool SafePointNode::needs_polling_address_input() -+{ -+ return true; -+} -+ - //============================================================================= - - #ifndef PRODUCT - -From 4a6f7dafdb4e0cf054b7867de60f789d4ca1d9f3 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Thu, 6 Apr 2023 17:26:29 +0800 -Subject: [PATCH 024/140] Revert: JDK-8266810: Move trivial Matcher code to - cpu-specific header files JDK-8254966: Remove unused code from Matcher - ---- - src/hotspot/cpu/riscv/matcher_riscv.hpp | 129 ------------------------ - src/hotspot/cpu/riscv/riscv.ad | 108 +++++++++++++++++++- - 2 files changed, 107 insertions(+), 130 deletions(-) - delete mode 100644 src/hotspot/cpu/riscv/matcher_riscv.hpp - -diff --git a/src/hotspot/cpu/riscv/matcher_riscv.hpp b/src/hotspot/cpu/riscv/matcher_riscv.hpp -deleted file mode 100644 -index 4c7fabd7240..00000000000 ---- a/src/hotspot/cpu/riscv/matcher_riscv.hpp -+++ /dev/null -@@ -1,129 +0,0 @@ --/* -- * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. -- * Copyright (c) 2021, 2022, Huawei Technologies Co., Ltd. All rights reserved. -- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -- * -- * This code is free software; you can redistribute it and/or modify it -- * under the terms of the GNU General Public License version 2 only, as -- * published by the Free Software Foundation. -- * -- * This code is distributed in the hope that it will be useful, but WITHOUT -- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- * version 2 for more details (a copy is included in the LICENSE file that -- * accompanied this code). -- * -- * You should have received a copy of the GNU General Public License version -- * 2 along with this work; if not, write to the Free Software Foundation, -- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -- * -- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -- * or visit www.oracle.com if you need additional information or have any -- * questions. -- * -- */ -- --#ifndef CPU_RISCV_MATCHER_RISCV_HPP --#define CPU_RISCV_MATCHER_RISCV_HPP -- -- // Defined within class Matcher -- -- // false => size gets scaled to BytesPerLong, ok. -- static const bool init_array_count_is_in_bytes = false; -- -- // riscv doesn't support misaligned vectors store/load on JDK11. -- static constexpr bool misaligned_vectors_ok() { -- return false; -- } -- -- // Whether code generation need accurate ConvI2L types. -- static const bool convi2l_type_required = false; -- -- // Does the CPU require late expand (see block.cpp for description of late expand)? -- static const bool require_postalloc_expand = false; -- -- // Do we need to mask the count passed to shift instructions or does -- // the cpu only look at the lower 5/6 bits anyway? -- static const bool need_masked_shift_count = false; -- -- static constexpr bool isSimpleConstant64(jlong value) { -- // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. -- // Probably always true, even if a temp register is required. -- return true; -- } -- -- // Use conditional move (CMOVL) -- static constexpr int long_cmove_cost() { -- // long cmoves are no more expensive than int cmoves -- return 0; -- } -- -- static constexpr int float_cmove_cost() { -- // float cmoves are no more expensive than int cmoves -- return 0; -- } -- -- // This affects two different things: -- // - how Decode nodes are matched -- // - how ImplicitNullCheck opportunities are recognized -- // If true, the matcher will try to remove all Decodes and match them -- // (as operands) into nodes. NullChecks are not prepared to deal with -- // Decodes by final_graph_reshaping(). -- // If false, final_graph_reshaping() forces the decode behind the Cmp -- // for a NullCheck. The matcher matches the Decode node into a register. -- // Implicit_null_check optimization moves the Decode along with the -- // memory operation back up before the NullCheck. -- static bool narrow_oop_use_complex_address() { -- return CompressedOops::shift() == 0; -- } -- -- static bool narrow_klass_use_complex_address() { -- return false; -- } -- -- static bool const_oop_prefer_decode() { -- // Prefer ConN+DecodeN over ConP in simple compressed oops mode. -- return CompressedOops::base() == NULL; -- } -- -- static bool const_klass_prefer_decode() { -- // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode. -- return CompressedKlassPointers::base() == NULL; -- } -- -- // Is it better to copy float constants, or load them directly from -- // memory? Intel can load a float constant from a direct address, -- // requiring no extra registers. Most RISCs will have to materialize -- // an address into a register first, so they would do better to copy -- // the constant from stack. -- static const bool rematerialize_float_constants = false; -- -- // If CPU can load and store mis-aligned doubles directly then no -- // fixup is needed. Else we split the double into 2 integer pieces -- // and move it piece-by-piece. Only happens when passing doubles into -- // C code as the Java calling convention forces doubles to be aligned. -- static const bool misaligned_doubles_ok = true; -- -- // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode. -- static const bool strict_fp_requires_explicit_rounding = false; -- -- // Are floats converted to double when stored to stack during -- // deoptimization? -- static constexpr bool float_in_double() { return false; } -- -- // Do ints take an entire long register or just half? -- // The relevant question is how the int is callee-saved: -- // the whole long is written but de-opt'ing will have to extract -- // the relevant 32 bits. -- static const bool int_in_long = true; -- -- // true means we have fast l2f convers -- // false means that conversion is done by runtime call -- static constexpr bool convL2FSupported(void) { -- return true; -- } -- -- // Implements a variant of EncodeISOArrayNode that encode ASCII only -- static const bool supports_encode_ascii_array = false; -- --#endif // CPU_RISCV_MATCHER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index fcddf752564..a9e5f2e6841 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -330,7 +330,9 @@ alloc_class chunk2(RFLAGS); - // Several register classes are automatically defined based upon information in - // this architecture description. - // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) --// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) -+// 2) reg_class compiler_method_reg ( /* as def'd in frame section */ ) -+// 2) reg_class interpreter_method_reg ( /* as def'd in frame section */ ) -+// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) - // - - // Class for all 32 bit general purpose registers -@@ -1548,6 +1550,17 @@ bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { - return (-4096 <= offs && offs < 4096); - } - -+const bool Matcher::isSimpleConstant64(jlong value) { -+ // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. -+ // Probably always true, even if a temp register is required. -+ return true; -+} -+ -+// true just means we have fast l2f conversion -+const bool Matcher::convL2FSupported(void) { -+ return true; -+} -+ - // Vector width in bytes. - const int Matcher::vector_width_in_bytes(BasicType bt) { - return 0; -@@ -1567,6 +1580,94 @@ const uint Matcher::vector_ideal_reg(int len) { - return 0; - } - -+// RISC-V supports misaligned vectors store/load. -+const bool Matcher::misaligned_vectors_ok() { -+ return true; -+} -+ -+// false => size gets scaled to BytesPerLong, ok. -+const bool Matcher::init_array_count_is_in_bytes = false; -+ -+// Use conditional move (CMOVL) -+const int Matcher::long_cmove_cost() { -+ // long cmoves are no more expensive than int cmoves -+ return 0; -+} -+ -+const int Matcher::float_cmove_cost() { -+ // float cmoves are no more expensive than int cmoves -+ return 0; -+} -+ -+// Does the CPU require late expand (see block.cpp for description of late expand)? -+const bool Matcher::require_postalloc_expand = false; -+ -+// Do we need to mask the count passed to shift instructions or does -+// the cpu only look at the lower 5/6 bits anyway? -+const bool Matcher::need_masked_shift_count = false; -+ -+// This affects two different things: -+// - how Decode nodes are matched -+// - how ImplicitNullCheck opportunities are recognized -+// If true, the matcher will try to remove all Decodes and match them -+// (as operands) into nodes. NullChecks are not prepared to deal with -+// Decodes by final_graph_reshaping(). -+// If false, final_graph_reshaping() forces the decode behind the Cmp -+// for a NullCheck. The matcher matches the Decode node into a register. -+// Implicit_null_check optimization moves the Decode along with the -+// memory operation back up before the NullCheck. -+bool Matcher::narrow_oop_use_complex_address() { -+ return Universe::narrow_oop_shift() == 0; -+} -+ -+bool Matcher::narrow_klass_use_complex_address() { -+// TODO -+// decide whether we need to set this to true -+ return false; -+} -+ -+bool Matcher::const_oop_prefer_decode() { -+ // Prefer ConN+DecodeN over ConP in simple compressed oops mode. -+ return Universe::narrow_oop_base() == NULL; -+} -+ -+bool Matcher::const_klass_prefer_decode() { -+ // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode. -+ return Universe::narrow_klass_base() == NULL; -+} -+ -+// Is it better to copy float constants, or load them directly from -+// memory? Intel can load a float constant from a direct address, -+// requiring no extra registers. Most RISCs will have to materialize -+// an address into a register first, so they would do better to copy -+// the constant from stack. -+const bool Matcher::rematerialize_float_constants = false; -+ -+// If CPU can load and store mis-aligned doubles directly then no -+// fixup is needed. Else we split the double into 2 integer pieces -+// and move it piece-by-piece. Only happens when passing doubles into -+// C code as the Java calling convention forces doubles to be aligned. -+const bool Matcher::misaligned_doubles_ok = true; -+ -+// No-op on amd64 -+void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { -+ Unimplemented(); -+} -+ -+// Advertise here if the CPU requires explicit rounding operations to -+// implement the UseStrictFP mode. -+const bool Matcher::strict_fp_requires_explicit_rounding = false; -+ -+// Are floats converted to double when stored to stack during -+// deoptimization? -+bool Matcher::float_in_double() { return false; } -+ -+// Do ints take an entire long register or just half? -+// The relevant question is how the int is callee-saved: -+// the whole long is written but de-opt'ing will have to extract -+// the relevant 32 bits. -+const bool Matcher::int_in_long = true; -+ - // Return whether or not this register is ever used as an argument. - // This function is used on startup to build the trampoline stubs in - // generateOptoStub. Registers not mentioned will be killed by the VM -@@ -1671,6 +1772,8 @@ bool size_fits_all_mem_uses(AddPNode* addp, int shift) { - return true; - } - -+const bool Matcher::convi2l_type_required = false; -+ - // Should the Matcher clone input 'm' of node 'n'? - bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { - assert_cond(m != NULL); -@@ -2250,6 +2353,9 @@ frame %{ - // Inline Cache Register or methodOop for I2C. - inline_cache_reg(R31); - -+ // Method Oop Register when calling interpreter. -+ interpreter_method_oop_reg(R31); -+ - // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] - cisc_spilling_operand_name(indOffset); - - -From 4b0f20882cd9b5e5da92d61c2fa02e0cbea0ef0c Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Thu, 6 Apr 2023 17:30:42 +0800 -Subject: [PATCH 025/140] Revert JDK-8256238: Remove - Matcher::pass_original_key_for_aes - ---- - src/hotspot/cpu/riscv/riscv.ad | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index a9e5f2e6841..0d1afd5584a 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -1580,6 +1580,11 @@ const uint Matcher::vector_ideal_reg(int len) { - return 0; - } - -+// AES support not yet implemented -+const bool Matcher::pass_original_key_for_aes() { -+ return false; -+} -+ - // RISC-V supports misaligned vectors store/load. - const bool Matcher::misaligned_vectors_ok() { - return true; - -From 36d7ecedbcd95911d1b355bbab3e8fdf81b36e7d Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Thu, 6 Apr 2023 17:42:37 +0800 -Subject: [PATCH 026/140] Revert JDK-8242492: C2: Remove - Matcher::vector_shift_count_ideal_reg() - ---- - src/hotspot/cpu/riscv/riscv.ad | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index 0d1afd5584a..c10e91633a5 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -1580,6 +1580,11 @@ const uint Matcher::vector_ideal_reg(int len) { - return 0; - } - -+const uint Matcher::vector_shift_count_ideal_reg(int size) { -+ fatal("vector shift is not supported"); -+ return Node::NotAMachineReg; -+} -+ - // AES support not yet implemented - const bool Matcher::pass_original_key_for_aes() { - return false; - -From b78e448a460fcdc66553e66342e93e5ac87c0c61 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Thu, 6 Apr 2023 17:47:13 +0800 -Subject: [PATCH 027/140] Revert JDK-8266937: Remove Compile::reshape_address - ---- - src/hotspot/cpu/riscv/riscv.ad | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index c10e91633a5..2c5ec0451b8 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -1801,6 +1801,9 @@ bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, - return clone_base_plus_offset_address(m, mstack, address_visited); - } - -+void Compile::reshape_address(AddPNode* addp) { -+} -+ - %} - - - -From cd34a5ce5d120cdac939217976d1e7b7e98bf654 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Thu, 6 Apr 2023 17:49:09 +0800 -Subject: [PATCH 028/140] Revert JDK-8272771: frame::pd_ps() is not implemented - on any platform - ---- - src/hotspot/cpu/riscv/frame_riscv.cpp | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp -index 8e7babe2c61..8e4f20fe561 100644 ---- a/src/hotspot/cpu/riscv/frame_riscv.cpp -+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp -@@ -683,6 +683,7 @@ frame::frame(void* ptr_sp, void* ptr_fp, void* pc) { - init((intptr_t*)ptr_sp, (intptr_t*)ptr_fp, (address)pc); - } - -+void frame::pd_ps() {} - #endif - - void JavaFrameAnchor::make_walkable(JavaThread* thread) { - -From bdb16daf6d809d0c38256be99ecbe922d24b889b Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Thu, 6 Apr 2023 17:56:27 +0800 -Subject: [PATCH 029/140] Revert JDK-8268858: Determine register pressure - automatically by the number of available registers for allocation - ---- - src/hotspot/cpu/riscv/riscv.ad | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index 2c5ec0451b8..a6aa52de29e 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -1527,6 +1527,10 @@ const bool Matcher::has_predicated_vectors(void) { - return false; - } - -+const int Matcher::float_pressure(int default_pressure_threshold) { -+ return default_pressure_threshold; -+} -+ - // Is this branch offset short enough that a short branch can be used? - // - // NOTE: If the platform does not provide any short branch variants, then - -From bbaa7a97b5d8110ead9dc44f31e2c5fe3bcd83d5 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Thu, 6 Apr 2023 17:58:16 +0800 -Subject: [PATCH 030/140] Revert JDK-8253040: Remove unused - Matcher::regnum_to_fpu_offset() - ---- - src/hotspot/cpu/riscv/riscv.ad | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index a6aa52de29e..2d847cb6454 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -1531,6 +1531,12 @@ const int Matcher::float_pressure(int default_pressure_threshold) { - return default_pressure_threshold; - } - -+int Matcher::regnum_to_fpu_offset(int regnum) -+{ -+ Unimplemented(); -+ return 0; -+} -+ - // Is this branch offset short enough that a short branch can be used? - // - // NOTE: If the platform does not provide any short branch variants, then - -From ce9ad0af72e405153534369bff1b1725697f3e40 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Thu, 6 Apr 2023 18:03:23 +0800 -Subject: [PATCH 031/140] Revert JDK-8254084: Remove - TemplateTable::pd_initialize - ---- - src/hotspot/cpu/riscv/templateTable_riscv.cpp | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp -index 4e388ac4eaa..c9d399ccdaf 100644 ---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp -+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp -@@ -48,6 +48,12 @@ - - #define __ _masm-> - -+// Platform-dependent initialization -+ -+void TemplateTable::pd_initialize() { -+ // No RISC-V specific initialization -+} -+ - // Address computation: local variables - - static inline Address iaddress(int n) { - -From 49429187846e6f2b00ab2853e27097eae274a947 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Thu, 6 Apr 2023 20:17:07 +0800 -Subject: [PATCH 032/140] Revert JDK-8224815: 8224815: Remove non-GC uses of - CollectedHeap::is_in_reserved() - ---- - src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -index 878957cbede..cf01d7d74bb 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -@@ -1632,7 +1632,7 @@ void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) { - #ifdef ASSERT - { - ThreadInVMfromUnknown tiv; -- assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); -+ assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop"); - } - #endif - oop_index = oop_recorder()->find_index(obj); -@@ -2800,7 +2800,7 @@ void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { - assert (UseCompressedOops, "should only be used for compressed oops"); - assert (Universe::heap() != NULL, "java heap should be initialized"); - assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); -- assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); -+ assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop"); - } - #endif - int oop_index = oop_recorder()->find_index(obj); -@@ -2815,7 +2815,7 @@ void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { - assert (UseCompressedClassPointers, "should only be used for compressed headers"); - assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); - int index = oop_recorder()->find_index(k); -- assert(!Universe::heap()->is_in(k), "should not be an oop"); -+ assert(!Universe::heap()->is_in_reserved(k), "should not be an oop"); - - InstructionMark im(this); - RelocationHolder rspec = metadata_Relocation::spec(index); - -From a71fabb1ff05db9955557a888be6cd1b5f87deea Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Thu, 6 Apr 2023 21:14:30 +0800 -Subject: [PATCH 033/140] Revert JDK-8253540: InterpreterRuntime::monitorexit - should be a JRT_LEAF function - ---- - src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 8 ++++++-- - 1 file changed, 6 insertions(+), 2 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -index 8adc7b1320d..48957803fdc 100644 ---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -@@ -839,7 +839,9 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) - assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1"); - - if (UseHeavyMonitors) { -- call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); -+ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), -+ lock_reg); - } else { - Label done; - -@@ -871,7 +873,9 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) - - // Call the runtime routine for slow case. - sd(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj -- call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); -+ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), -+ lock_reg); - - bind(done); - - -From a0b18eea3c83ef8f1de2c1b3cd55452f0f6b9af2 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Fri, 7 Apr 2023 12:51:33 +0800 -Subject: [PATCH 034/140] Revert JDK-8278387: riscv: Implement UseHeavyMonitors - consistently && JDK-8279826: riscv: Preserve result in native wrapper with - +UseHeavyMonitors - ---- - .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 8 +- - src/hotspot/cpu/riscv/riscv.ad | 92 +++++++++---------- - src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 85 ++++++++--------- - 3 files changed, 80 insertions(+), 105 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -index a0ecc63d851..dd657963438 100644 ---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -@@ -367,11 +367,7 @@ int LIR_Assembler::emit_unwind_handler() { - if (method()->is_synchronized()) { - monitor_address(0, FrameMap::r10_opr); - stub = new MonitorExitStub(FrameMap::r10_opr, true, 0); -- if (UseHeavyMonitors) { -- __ j(*stub->entry()); -- } else { -- __ unlock_object(x15, x14, x10, *stub->entry()); -- } -+ __ unlock_object(x15, x14, x10, *stub->entry()); - __ bind(*stub->continuation()); - } - -@@ -1512,7 +1508,7 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) { - Register obj = op->obj_opr()->as_register(); // may not be an oop - Register hdr = op->hdr_opr()->as_register(); - Register lock = op->lock_opr()->as_register(); -- if (UseHeavyMonitors) { -+ if (!UseFastLocking) { - __ j(*op->stub()->entry()); - } else if (op->code() == lir_lock) { - assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index 2d847cb6454..29027d594a0 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -2109,40 +2109,36 @@ encode %{ - __ andi(t0, disp_hdr, markOopDesc::monitor_value); - __ bnez(t0, object_has_monitor); - -- if (!UseHeavyMonitors) { -- // Set tmp to be (markWord of object | UNLOCK_VALUE). -- __ ori(tmp, disp_hdr, markOopDesc::unlocked_value); -- -- // Initialize the box. (Must happen before we update the object mark!) -- __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); -- -- // Compare object markWord with an unlocked value (tmp) and if -- // equal exchange the stack address of our box with object markWord. -- // On failure disp_hdr contains the possibly locked markWord. -- __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq, -- Assembler::rl, /*result*/disp_hdr); -- __ mv(flag, zr); -- __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas -- -- assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); -- -- // If the compare-and-exchange succeeded, then we found an unlocked -- // object, will have now locked it will continue at label cont -- // We did not see an unlocked object so try the fast recursive case. -- -- // Check if the owner is self by comparing the value in the -- // markWord of object (disp_hdr) with the stack pointer. -- __ sub(disp_hdr, disp_hdr, sp); -- __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place)); -- // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont, -- // hence we can store 0 as the displaced header in the box, which indicates that it is a -- // recursive lock. -- __ andr(tmp/*==0?*/, disp_hdr, tmp); -- __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); -- __ mv(flag, tmp); // we can use the value of tmp as the result here -- } else { -- __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow-path -- } -+ // Set tmp to be (markWord of object | UNLOCK_VALUE). -+ __ ori(tmp, disp_hdr, markOopDesc::unlocked_value); -+ -+ // Initialize the box. (Must happen before we update the object mark!) -+ __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); -+ -+ // Compare object markWord with an unlocked value (tmp) and if -+ // equal exchange the stack address of our box with object markWord. -+ // On failure disp_hdr contains the possibly locked markWord. -+ __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq, -+ Assembler::rl, /*result*/disp_hdr); -+ __ mv(flag, zr); -+ __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas -+ -+ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); -+ -+ // If the compare-and-exchange succeeded, then we found an unlocked -+ // object, will have now locked it will continue at label cont -+ // We did not see an unlocked object so try the fast recursive case. -+ -+ // Check if the owner is self by comparing the value in the -+ // markWord of object (disp_hdr) with the stack pointer. -+ __ sub(disp_hdr, disp_hdr, sp); -+ __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place)); -+ // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont, -+ // hence we can store 0 as the displaced header in the box, which indicates that it is a -+ // recursive lock. -+ __ andr(tmp/*==0?*/, disp_hdr, tmp); -+ __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); -+ __ mv(flag, tmp); // we can use the value of tmp as the result here - - __ j(cont); - -@@ -2189,31 +2185,25 @@ encode %{ - - assert_different_registers(oop, box, tmp, disp_hdr, flag); - -- if (!UseHeavyMonitors) { -- // Find the lock address and load the displaced header from the stack. -- __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); -+ // Find the lock address and load the displaced header from the stack. -+ __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); - -- // If the displaced header is 0, we have a recursive unlock. -- __ mv(flag, disp_hdr); -- __ beqz(disp_hdr, cont); -- } -+ // If the displaced header is 0, we have a recursive unlock. -+ __ mv(flag, disp_hdr); -+ __ beqz(disp_hdr, cont); - - // Handle existing monitor. - __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes())); - __ andi(t0, disp_hdr, markOopDesc::monitor_value); - __ bnez(t0, object_has_monitor); - -- if (!UseHeavyMonitors) { -- // Check if it is still a light weight lock, this is true if we -- // see the stack address of the basicLock in the markWord of the -- // object. -+ // Check if it is still a light weight lock, this is true if we -+ // see the stack address of the basicLock in the markWord of the -+ // object. - -- __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed, -- Assembler::rl, /*result*/tmp); -- __ xorr(flag, box, tmp); // box == tmp if cas succeeds -- } else { -- __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow path -- } -+ __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed, -+ Assembler::rl, /*result*/tmp); -+ __ xorr(flag, box, tmp); // box == tmp if cas succeeds - __ j(cont); - - assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); -diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -index 21aa3b58c09..5203200b068 100644 ---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -@@ -1488,39 +1488,35 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, - // Load the oop from the handle - __ ld(obj_reg, Address(oop_handle_reg, 0)); - -- if (!UseHeavyMonitors) { -- // Load (object->mark() | 1) into swap_reg % x10 -- __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); -- __ ori(swap_reg, t0, 1); -- -- // Save (object->mark() | 1) into BasicLock's displaced header -- __ sd(swap_reg, Address(lock_reg, mark_word_offset)); -- -- // src -> dest if dest == x10 else x10 <- dest -- { -- Label here; -- __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL); -- } -+ // Load (object->mark() | 1) into swap_reg % x10 -+ __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); -+ __ ori(swap_reg, t0, 1); - -- // Test if the oopMark is an obvious stack pointer, i.e., -- // 1) (mark & 3) == 0, and -- // 2) sp <= mark < mark + os::pagesize() -- // These 3 tests can be done by evaluating the following -- // expression: ((mark - sp) & (3 - os::vm_page_size())), -- // assuming both stack pointer and pagesize have their -- // least significant 2 bits clear. -- // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg -- -- __ sub(swap_reg, swap_reg, sp); -- __ andi(swap_reg, swap_reg, 3 - os::vm_page_size()); -- -- // Save the test result, for recursive case, the result is zero -- __ sd(swap_reg, Address(lock_reg, mark_word_offset)); -- __ bnez(swap_reg, slow_path_lock); -- } else { -- __ j(slow_path_lock); -+ // Save (object->mark() | 1) into BasicLock's displaced header -+ __ sd(swap_reg, Address(lock_reg, mark_word_offset)); -+ -+ // src -> dest if dest == x10 else x10 <- dest -+ { -+ Label here; -+ __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL); - } - -+ // Test if the oopMark is an obvious stack pointer, i.e., -+ // 1) (mark & 3) == 0, and -+ // 2) sp <= mark < mark + os::pagesize() -+ // These 3 tests can be done by evaluating the following -+ // expression: ((mark - sp) & (3 - os::vm_page_size())), -+ // assuming both stack pointer and pagesize have their -+ // least significant 2 bits clear. -+ // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg -+ -+ __ sub(swap_reg, swap_reg, sp); -+ __ andi(swap_reg, swap_reg, 3 - os::vm_page_size()); -+ -+ // Save the test result, for recursive case, the result is zero -+ __ sd(swap_reg, Address(lock_reg, mark_word_offset)); -+ __ bnez(swap_reg, slow_path_lock); -+ - // Slow path will re-enter here - __ bind(lock_done); - } -@@ -1608,31 +1604,24 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, - - Label done; - -- if (!UseHeavyMonitors) { -- // Simple recursive lock? -- __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); -- __ beqz(t0, done); -- } -- -+ // Simple recursive lock? -+ __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); -+ __ beqz(t0, done); - - // Must save x10 if if it is live now because cmpxchg must use it - if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { - save_native_result(masm, ret_type, stack_slots); - } - -- if (!UseHeavyMonitors) { -- // get address of the stack lock -- __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); -- // get old displaced header -- __ ld(old_hdr, Address(x10, 0)); -+ // get address of the stack lock -+ __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); -+ // get old displaced header -+ __ ld(old_hdr, Address(x10, 0)); - -- // Atomic swap old header if oop still contains the stack lock -- Label succeed; -- __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock); -- __ bind(succeed); -- } else { -- __ j(slow_path_unlock); -- } -+ // Atomic swap old header if oop still contains the stack lock -+ Label succeed; -+ __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock); -+ __ bind(succeed); - - // slow path re-enters here - __ bind(unlock_done); - -From 1e844b8019cb3516c0843826de2bd3fcd2222f41 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 4 Apr 2023 16:49:19 +0800 -Subject: [PATCH 035/140] Revert JDK-8258192: Obsolete the CriticalJNINatives - flag. CriticalJNINatives is unimplemented() even on AArch64. See - https://bugs.openjdk.org/browse/JDK-8254694. - -Also following up 8191129: AARCH64: Invalid value passed to critical JNI function ---- - src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 3 ++- - src/hotspot/cpu/riscv/vm_version_riscv.cpp | 2 ++ - .../criticalnatives/argumentcorruption/CheckLongArgs.java | 2 +- - .../jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java | 2 +- - 4 files changed, 6 insertions(+), 3 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -index 5203200b068..f8585afbdc2 100644 ---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -@@ -1111,7 +1111,8 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, - int compile_id, - BasicType* in_sig_bt, - VMRegPair* in_regs, -- BasicType ret_type) { -+ BasicType ret_type, -+ address critical_entry) { - if (method->is_method_handle_intrinsic()) { - vmIntrinsics::ID iid = method->intrinsic_id(); - intptr_t start = (intptr_t)__ pc(); -diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp -index dd65f32277f..c0491d23fa6 100644 ---- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp -+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp -@@ -147,6 +147,8 @@ void VM_Version::initialize() { - #ifdef COMPILER2 - c2_initialize(); - #endif // COMPILER2 -+ -+ UNSUPPORTED_OPTION(CriticalJNINatives); - } - - #ifdef COMPILER2 -diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java -index acb86812d25..2c866f26f08 100644 ---- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java -+++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java -@@ -24,7 +24,7 @@ - - /* @test - * @bug 8167409 -- * @requires (os.arch != "aarch64") & (os.arch != "arm") -+ * @requires (os.arch != "aarch64") & (os.arch != "riscv64") & (os.arch != "arm") - * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.argumentcorruption.CheckLongArgs - */ - package compiler.runtime.criticalnatives.argumentcorruption; -diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java -index eab36f93113..1da369fde23 100644 ---- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java -+++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java -@@ -24,7 +24,7 @@ - - /* @test - * @bug 8167408 -- * @requires (os.arch != "aarch64") & (os.arch != "arm") -+ * @requires (os.arch != "aarch64") & (os.arch != "riscv64") & (os.arch != "arm") - * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.lookup.LookUp - */ - package compiler.runtime.criticalnatives.lookup; - -From 58ad930e78501c6fad024e7ef05066ec19eb6219 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 11 Apr 2023 11:45:04 +0800 -Subject: [PATCH 036/140] 8202976: Add C1 lea patching support for x86 (RISC-V - part) - ---- - src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -index dd657963438..46a20a64194 100644 ---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -@@ -1818,6 +1818,7 @@ void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, C - return; - } - -+ assert(patch_code == lir_patch_none, "Patch code not supported"); - LIR_Address* adr = addr->as_address_ptr(); - Register dst = dest->as_register_lo(); - - -From 2074b8ec0ea3562f3999b4f4010b3f5b57dbe502 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 11 Apr 2023 12:15:44 +0800 -Subject: [PATCH 037/140] Revert 8232365: Implementation for JEP 363: Remove - the Concurrent Mark Sweep (CMS) Garbage Collector - ---- - src/hotspot/cpu/riscv/globals_riscv.hpp | 3 +++ - src/hotspot/cpu/riscv/riscv.ad | 27 +++++++++++++++++++++++++ - 2 files changed, 30 insertions(+) - -diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp -index 845064d6cbc..50bbb6a77b8 100644 ---- a/src/hotspot/cpu/riscv/globals_riscv.hpp -+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp -@@ -63,6 +63,9 @@ define_pd_global(bool, RewriteFrequentPairs, true); - - define_pd_global(bool, PreserveFramePointer, false); - -+// GC Ergo Flags -+define_pd_global(uintx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread -+ - define_pd_global(uintx, TypeProfileLevel, 111); - - define_pd_global(bool, CompactStrings, true); -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index 29027d594a0..386ef731696 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -752,6 +752,9 @@ bool is_CAS(int opcode, bool maybe_volatile); - // predicate controlling translation of CompareAndSwapX - bool needs_acquiring_load_reserved(const Node *load); - -+// predicate controlling translation of StoreCM -+bool unnecessary_storestore(const Node *storecm); -+ - // predicate controlling addressing modes - bool size_fits_all_mem_uses(AddPNode* addp, int shift); - %} -@@ -874,6 +877,29 @@ bool needs_acquiring_load_reserved(const Node *n) - // so we can just return true here - return true; - } -+ -+// predicate controlling translation of StoreCM -+// -+// returns true if a StoreStore must precede the card write otherwise -+// false -+ -+bool unnecessary_storestore(const Node *storecm) -+{ -+ assert(storecm->Opcode() == Op_StoreCM, "expecting a StoreCM"); -+ -+ // we need to generate a dmb ishst between an object put and the -+ // associated card mark when we are using CMS without conditional -+ // card marking -+ -+ if (UseConcMarkSweepGC && !UseCondCardMark) { -+ return false; -+ } -+ -+ // a storestore is unnecesary in all other cases -+ -+ return true; -+} -+ - #define __ _masm. - - // advance declarations for helper functions to convert register -@@ -4566,6 +4592,7 @@ instruct loadConD0(fRegD dst, immD0 con) %{ - instruct storeimmCM0(immI0 zero, memory mem) - %{ - match(Set mem (StoreCM mem zero)); -+ predicate(unnecessary_storestore(n)); - - ins_cost(STORE_COST); - format %{ "storestore (elided)\n\t" - -From f838cf41b48c6bc17d052531ab5594de236b1302 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 11 Apr 2023 22:06:58 +0800 -Subject: [PATCH 038/140] Revert 8220051: Remove global safepoint code - ---- - src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 3 +- - .../cpu/riscv/macroAssembler_riscv.cpp | 26 ++++++++++- - .../cpu/riscv/macroAssembler_riscv.hpp | 3 +- - src/hotspot/cpu/riscv/riscv.ad | 43 +++++++++++++++++++ - src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 4 +- - .../templateInterpreterGenerator_riscv.cpp | 2 +- - 6 files changed, 75 insertions(+), 6 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -index 48957803fdc..74dded77d19 100644 ---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -@@ -515,7 +515,8 @@ void InterpreterMacroAssembler::dispatch_base(TosState state, - - Label safepoint; - address* const safepoint_table = Interpreter::safept_table(state); -- bool needs_thread_local_poll = generate_poll && table != safepoint_table; -+ bool needs_thread_local_poll = generate_poll && -+ SafepointMechanism::uses_thread_local_poll() && table != safepoint_table; - - if (needs_thread_local_poll) { - NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -index cf01d7d74bb..73629e3dba3 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -@@ -264,6 +264,30 @@ void MacroAssembler::set_last_Java_frame(Register last_java_sp, - } - } - -+// Just like safepoint_poll, but use an acquiring load for thread- -+// local polling. -+// -+// We need an acquire here to ensure that any subsequent load of the -+// global SafepointSynchronize::_state flag is ordered after this load -+// of the local Thread::_polling page. We don't want this poll to -+// return false (i.e. not safepointing) and a later poll of the global -+// SafepointSynchronize::_state spuriously to return true. -+// -+// This is to avoid a race when we're in a native->Java transition -+// racing the code which wakes up from a safepoint. -+// -+void MacroAssembler::safepoint_poll_acquire(Label& slow_path) { -+ if (SafepointMechanism::uses_thread_local_poll()) { -+ membar(MacroAssembler::AnyAny); -+ ld(t1, Address(xthread, Thread::polling_page_offset())); -+ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); -+ andi(t0, t1, SafepointMechanism::poll_bit()); -+ bnez(t0, slow_path); -+ } else { -+ safepoint_poll(slow_path); -+ } -+} -+ - void MacroAssembler::reset_last_Java_frame(bool clear_fp) { - // we must set sp to zero to clear frame - sd(zr, Address(xthread, JavaThread::last_Java_sp_offset())); -@@ -2137,7 +2161,7 @@ void MacroAssembler::check_klass_subtype(Register sub_klass, - bind(L_failure); - } - --void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) { -+void MacroAssembler::safepoint_poll(Label& slow_path) { - if (SafepointMechanism::uses_thread_local_poll()) { - ld(t1, Address(xthread, Thread::polling_page_offset())); - andi(t0, t1, SafepointMechanism::poll_bit()); -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -index f23f7e7d1e6..8a2c6e07d88 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -@@ -44,7 +44,8 @@ class MacroAssembler: public Assembler { - } - virtual ~MacroAssembler() {} - -- void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod); -+ void safepoint_poll(Label& slow_path); -+ void safepoint_poll_acquire(Label& slow_path); - - // Place a fence.i after code may have been modified due to a safepoint. - void safepoint_ifence(); -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index 386ef731696..2dde4453dac 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -1213,6 +1213,14 @@ const Pipeline * MachEpilogNode::pipeline() const { - return MachNode::pipeline_class(); - } - -+// This method seems to be obsolete. It is declared in machnode.hpp -+// and defined in all *.ad files, but it is never called. Should we -+// get rid of it? -+int MachEpilogNode::safepoint_offset() const { -+ assert(do_polling(), "no return for this epilog node"); -+ return 4; -+} -+ - //============================================================================= - - // Figure out which register class each belongs in: rc_int, rc_float or -@@ -1907,6 +1915,17 @@ encode %{ - __ li(dst_reg, 1); - %} - -+ enc_class riscv_enc_mov_poll_page(iRegP dst, immPollPage src) %{ -+ MacroAssembler _masm(&cbuf); -+ int32_t offset = 0; -+ address page = (address)$src$$constant; -+ unsigned long align = (unsigned long)page & 0xfff; -+ assert(align == 0, "polling page must be page aligned"); -+ Register dst_reg = as_Register($dst$$reg); -+ __ la_patchable(dst_reg, Address(page, relocInfo::poll_type), offset); -+ __ addi(dst_reg, dst_reg, offset); -+ %} -+ - enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{ - C2_MacroAssembler _masm(&cbuf); - __ load_byte_map_base($dst$$Register); -@@ -2688,6 +2707,17 @@ operand immP_1() - interface(CONST_INTER); - %} - -+// Polling Page Pointer Immediate -+operand immPollPage() -+%{ -+ predicate((address)n->get_ptr() == os::get_polling_page()); -+ match(ConP); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ - // Card Table Byte Map Base - operand immByteMapBase() - %{ -@@ -4476,6 +4506,19 @@ instruct loadConP1(iRegPNoSp dst, immP_1 con) - ins_pipe(ialu_imm); - %} - -+// Load Poll Page Constant -+instruct loadConPollPage(iRegPNoSp dst, immPollPage con) -+%{ -+ match(Set dst con); -+ -+ ins_cost(ALU_COST * 6); -+ format %{ "movptr $dst, $con\t# Poll Page Ptr, #@loadConPollPage" %} -+ -+ ins_encode(riscv_enc_mov_poll_page(dst, con)); -+ -+ ins_pipe(ialu_imm); -+%} -+ - // Load Byte Map Base Constant - instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con) - %{ -diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -index f8585afbdc2..c501c8f7bac 100644 ---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -@@ -1573,7 +1573,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, - // This is to avoid a race when we're in a native->Java transition - // racing the code which wakes up from a safepoint. - -- __ safepoint_poll(safepoint_in_progress, true /* at_return */, true /* acquire */, false /* in_nmethod */); -+ __ safepoint_poll_acquire(safepoint_in_progress); - __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset())); - __ bnez(t0, safepoint_in_progress); - __ bind(safepoint_in_progress_done); -@@ -2439,7 +2439,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t - __ bind(noException); - - Label no_adjust, bail; -- if (!cause_return) { -+ if (SafepointMechanism::uses_thread_local_poll() && !cause_return) { - // If our stashed return pc was modified by the runtime we avoid touching it - __ ld(t0, Address(fp, frame::return_addr_offset * wordSize)); - __ bne(x18, t0, no_adjust); -diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -index 76ae6f89e27..2d4baab2ab7 100644 ---- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -@@ -1143,7 +1143,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { - // - // This is to avoid a race when we're in a native->Java transition - // racing the code which wakes up from a safepoint. -- __ safepoint_poll(L, true /* at_return */, true /* acquire */, false /* in_nmethod */); -+ __ safepoint_poll_acquire(L); - __ lwu(t1, Address(xthread, JavaThread::suspend_flags_offset())); - __ beqz(t1, Continue); - __ bind(L); - -From 13faeae35312c59a1366d4f9c84da7157f06efc7 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 11 Apr 2023 22:15:14 +0800 -Subject: [PATCH 039/140] Revert 8253180: ZGC: Implementation of JEP 376: ZGC: - Concurrent Thread-Stack Processing - ---- - src/hotspot/cpu/riscv/frame_riscv.cpp | 8 ++------ - src/hotspot/cpu/riscv/frame_riscv.hpp | 3 --- - src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 1 - - src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 8 -------- - .../cpu/riscv/templateInterpreterGenerator_riscv.cpp | 9 --------- - 5 files changed, 2 insertions(+), 27 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp -index 8e4f20fe561..b056eb2488a 100644 ---- a/src/hotspot/cpu/riscv/frame_riscv.cpp -+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp -@@ -495,8 +495,8 @@ frame frame::sender_for_compiled_frame(RegisterMap* map) const { - } - - //------------------------------------------------------------------------------ --// frame::sender_raw --frame frame::sender_raw(RegisterMap* map) const { -+// frame::sender -+frame frame::sender(RegisterMap* map) const { - // Default is we done have to follow them. The sender_for_xxx will - // update it accordingly - assert(map != NULL, "map must be set"); -@@ -521,10 +521,6 @@ frame frame::sender_raw(RegisterMap* map) const { - return frame(sender_sp(), link(), sender_pc()); - } - --frame frame::sender(RegisterMap* map) const { -- return sender_raw(map); --} -- - bool frame::is_interpreted_frame_valid(JavaThread* thread) const { - assert(is_interpreted_frame(), "Not an interpreted frame"); - // These are reasonable sanity checks -diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp -index c06aaa9e391..3b88f6d5a1a 100644 ---- a/src/hotspot/cpu/riscv/frame_riscv.hpp -+++ b/src/hotspot/cpu/riscv/frame_riscv.hpp -@@ -196,7 +196,4 @@ - - static jint interpreter_frame_expression_stack_direction() { return -1; } - -- // returns the sending frame, without applying any barriers -- frame sender_raw(RegisterMap* map) const; -- - #endif // CPU_RISCV_FRAME_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -index 74dded77d19..4e642af87c4 100644 ---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -@@ -571,7 +571,6 @@ void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { - - // remove activation - // --// Apply stack watermark barrier. - // Unlock the receiver if this is a synchronized method. - // Unlock any Java monitors from syncronized blocks. - // Remove the activation from the stack. -diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -index c501c8f7bac..d740c99c979 100644 ---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -@@ -1565,14 +1565,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, - - // check for safepoint operation in progress and/or pending suspend requests - { -- // We need an acquire here to ensure that any subsequent load of the -- // global SafepointSynchronize::_state flag is ordered after this load -- // of the thread-local polling word. We don't want this poll to -- // return false (i.e. not safepointing) and a later poll of the global -- // SafepointSynchronize::_state spuriously to return true. -- // This is to avoid a race when we're in a native->Java transition -- // racing the code which wakes up from a safepoint. -- - __ safepoint_poll_acquire(safepoint_in_progress); - __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset())); - __ bnez(t0, safepoint_in_progress); -diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -index 2d4baab2ab7..a07dea35b73 100644 ---- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -@@ -1134,15 +1134,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { - // check for safepoint operation in progress and/or pending suspend requests - { - Label L, Continue; -- -- // We need an acquire here to ensure that any subsequent load of the -- // global SafepointSynchronize::_state flag is ordered after this load -- // of the thread-local polling word. We don't want this poll to -- // return false (i.e. not safepointing) and a later poll of the global -- // SafepointSynchronize::_state spuriously to return true. -- // -- // This is to avoid a race when we're in a native->Java transition -- // racing the code which wakes up from a safepoint. - __ safepoint_poll_acquire(L); - __ lwu(t1, Address(xthread, JavaThread::suspend_flags_offset())); - __ beqz(t1, Continue); - -From 99ca43f1e7e74f161b40466f49fc61aa734d334d Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Wed, 12 Apr 2023 12:35:33 +0800 -Subject: [PATCH 040/140] JDK-8243155: AArch64: Add support for SqrtVF - ---- - src/hotspot/cpu/riscv/riscv.ad | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index 2dde4453dac..9da8a76c190 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -7206,7 +7206,7 @@ instruct absD_reg(fRegD dst, fRegD src) %{ - %} - - instruct sqrtF_reg(fRegF dst, fRegF src) %{ -- match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); -+ match(Set dst (SqrtF src)); - - ins_cost(FSQRT_COST); - format %{ "fsqrt.s $dst, $src\t#@sqrtF_reg" %} - -From 4bbd814dfbc33d3f1277dbb64f19a18f9f8c1a81 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Wed, 12 Apr 2023 15:11:49 +0800 -Subject: [PATCH 041/140] Revert JDK-8267098: AArch64: C1 StubFrames end - confusingly - ---- - src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp | 52 ++++++++++----------- - 1 file changed, 24 insertions(+), 28 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp -index f523c9ed50a..1f58bde4df5 100644 ---- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp -@@ -167,19 +167,14 @@ int StubAssembler::call_RT(Register oop_result, Register metadata_result, addres - return call_RT(oop_result, metadata_result, entry, arg_num); - } - --enum return_state_t { -- does_not_return, requires_return --}; -- - // Implementation of StubFrame - - class StubFrame: public StackObj { - private: - StubAssembler* _sasm; -- bool _return_state; - - public: -- StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state=requires_return); -+ StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments); - void load_argument(int offset_in_words, Register reg); - - ~StubFrame(); -@@ -197,9 +192,8 @@ void StubAssembler::epilogue() { - - #define __ _sasm-> - --StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state) { -+StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) { - _sasm = sasm; -- _return_state = return_state; - __ prologue(name, must_gc_arguments); - } - -@@ -211,11 +205,7 @@ void StubFrame::load_argument(int offset_in_words, Register reg) { - - - StubFrame::~StubFrame() { -- if (_return_state == requires_return) { -- __ epilogue(); -- } else { -- __ should_not_reach_here(); -- } -+ __ epilogue(); - _sasm = NULL; - } - -@@ -378,6 +368,7 @@ OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address targe - assert_cond(oop_maps != NULL); - oop_maps->add_gc_map(call_offset, oop_map); - -+ __ should_not_reach_here(); - return oop_maps; - } - -@@ -425,7 +416,9 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { - sasm->set_frame_size(frame_size); - break; - } -- default: ShouldNotReachHere(); -+ default: -+ __ should_not_reach_here(); -+ break; - } - - // verify that only x10 and x13 are valid at this time -@@ -481,6 +474,9 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { - restore_live_registers(sasm, id != handle_exception_nofpu_id); - break; - case handle_exception_from_callee_id: -+ // Pop the return address. -+ __ leave(); -+ __ ret(); // jump to exception handler - break; - default: ShouldNotReachHere(); - } -@@ -641,13 +637,13 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { - - case throw_div0_exception_id: - { -- StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments, does_not_return); -+ StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments); - oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false); - } - break; - - case throw_null_pointer_exception_id: -- { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments, does_not_return); -+ { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments); - oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false); - } - break; -@@ -926,14 +922,14 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { - - case throw_class_cast_exception_id: - { -- StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments, does_not_return); -+ StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments); - oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true); - } - break; - - case throw_incompatible_class_change_error_id: - { -- StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments, does_not_return); -+ StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments); - oop_maps = generate_exception_throw(sasm, - CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); - } -@@ -1027,7 +1023,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { - - case deoptimize_id: - { -- StubFrame f(sasm, "deoptimize", dont_gc_arguments, does_not_return); -+ StubFrame f(sasm, "deoptimize", dont_gc_arguments); - OopMap* oop_map = save_live_registers(sasm); - assert_cond(oop_map != NULL); - f.load_argument(0, c_rarg1); -@@ -1046,7 +1042,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { - - case throw_range_check_failed_id: - { -- StubFrame f(sasm, "range_check_failed", dont_gc_arguments, does_not_return); -+ StubFrame f(sasm, "range_check_failed", dont_gc_arguments); - oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true); - } - break; -@@ -1062,7 +1058,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { - - case access_field_patching_id: - { -- StubFrame f(sasm, "access_field_patching", dont_gc_arguments, does_not_return); -+ StubFrame f(sasm, "access_field_patching", dont_gc_arguments); - // we should set up register map - oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching)); - } -@@ -1070,7 +1066,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { - - case load_klass_patching_id: - { -- StubFrame f(sasm, "load_klass_patching", dont_gc_arguments, does_not_return); -+ StubFrame f(sasm, "load_klass_patching", dont_gc_arguments); - // we should set up register map - oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching)); - } -@@ -1078,7 +1074,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { - - case load_mirror_patching_id: - { -- StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments, does_not_return); -+ StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments); - // we should set up register map - oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching)); - } -@@ -1086,7 +1082,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { - - case load_appendix_patching_id: - { -- StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments, does_not_return); -+ StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments); - // we should set up register map - oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching)); - } -@@ -1109,14 +1105,14 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { - - case throw_index_exception_id: - { -- StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments, does_not_return); -+ StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments); - oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true); - } - break; - - case throw_array_store_exception_id: - { -- StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments, does_not_return); -+ StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments); - // tos + 0: link - // + 1: return address - oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true); -@@ -1125,7 +1121,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { - - case predicate_failed_trap_id: - { -- StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments, does_not_return); -+ StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments); - - OopMap* map = save_live_registers(sasm); - assert_cond(map != NULL); -@@ -1156,7 +1152,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { - - default: - { -- StubFrame f(sasm, "unimplemented entry", dont_gc_arguments, does_not_return); -+ StubFrame f(sasm, "unimplemented entry", dont_gc_arguments); - __ li(x10, (int) id); - __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), x10); - __ should_not_reach_here(); - -From eb37cfd42e7801c5ce64666c3cd25d40cfb22e76 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Wed, 12 Apr 2023 18:06:40 +0800 -Subject: [PATCH 042/140] Revert JDK-8247691: [aarch64] Incorrect handling of - VM exceptions in C1 deopt stub/traps - ---- - src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp | 87 +++++++++++++++------ - 1 file changed, 65 insertions(+), 22 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp -index 1f58bde4df5..1f45fba9de0 100644 ---- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp -@@ -581,37 +581,80 @@ OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) { - #endif - __ reset_last_Java_frame(true); - --#ifdef ASSERT -- // Check that fields in JavaThread for exception oop and issuing pc are empty -- Label oop_empty; -- __ ld(t0, Address(xthread, Thread::pending_exception_offset())); -- __ beqz(t0, oop_empty); -- __ stop("exception oop must be empty"); -- __ bind(oop_empty); -+ // check for pending exceptions -+ { Label L; -+ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); -+ __ beqz(t0, L); -+ // exception pending => remove activation and forward to exception handler - -- Label pc_empty; -- __ ld(t0, Address(xthread, JavaThread::exception_pc_offset())); -- __ beqz(t0, pc_empty); -- __ stop("exception pc must be empty"); -- __ bind(pc_empty); -+ { Label L1; -+ __ bnez(x10, L1); // have we deoptimized? -+ __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id))); -+ __ bind(L1); -+ } -+ -+ // the deopt blob expects exceptions in the special fields of -+ // JavaThread, so copy and clear pending exception. -+ -+ // load and clear pending exception -+ __ ld(x10, Address(xthread, Thread::pending_exception_offset())); -+ __ sd(zr, Address(xthread, Thread::pending_exception_offset())); -+ -+ // check that there is really a valid exception -+ __ verify_not_null_oop(x10); -+ -+ // load throwing pc: this is the return address of the stub -+ __ ld(x13, Address(fp, wordSize)); -+ -+#ifdef ASSERT -+ // Check that fields in JavaThread for exception oop and issuing pc are empty -+ Label oop_empty; -+ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); -+ __ beqz(t0, oop_empty); -+ __ stop("exception oop must be empty"); -+ __ bind(oop_empty); -+ -+ Label pc_empty; -+ __ ld(t0, Address(xthread, JavaThread::exception_pc_offset())); -+ __ beqz(t0, pc_empty); -+ __ stop("exception pc must be empty"); -+ __ bind(pc_empty); - #endif - -- // Runtime will return true if the nmethod has been deoptimized, this is the -- // expected scenario and anything else is an error. Note that we maintain a -- // check on the result purely as a defensive measure. -- Label no_deopt; -- __ beqz(x10, no_deopt); // Have we deoptimized? -+ // store exception oop and throwing pc to JavaThread -+ __ sd(x10, Address(xthread, JavaThread::exception_oop_offset())); -+ __ sd(x13, Address(xthread, JavaThread::exception_pc_offset())); -+ -+ restore_live_registers(sasm); - -- // Perform a re-execute. The proper return address is already on the stack, -- // we just need to restore registers, pop all of our frames but the return -- // address and jump to the deopt blob. -+ __ leave(); -+ -+ // Forward the exception directly to deopt blob. We can blow no -+ // registers and must leave throwing pc on the stack. A patch may -+ // have values live in registers so the entry point with the -+ // exception in tls. -+ __ far_jump(RuntimeAddress(deopt_blob->unpack_with_exception_in_tls())); -+ -+ __ bind(L); -+ } -+ -+ // Runtime will return true if the nmethod has been deoptimized during -+ // the patching process. In that case we must do a deopt reexecute instead. -+ Label cont; -+ -+ __ beqz(x10, cont); // have we deoptimized? -+ -+ // Will reexecute. Proper return address is already on the stack we just restore -+ // registers, pop all of our frame but the return address and jump to the deopt blob - - restore_live_registers(sasm); - __ leave(); - __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); - -- __ bind(no_deopt); -- __ stop("deopt not performed"); -+ __ bind(cont); -+ restore_live_registers(sasm); -+ __ leave(); -+ __ ret(); - - return oop_maps; - } - -From 3fa279b459fffd1bd1ce158a7fdaa9d8704450a8 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Thu, 13 Apr 2023 18:29:27 +0800 -Subject: [PATCH 043/140] Revert JDK-8212681: Refactor IC locking to use a fine - grained CompiledICLocker - ---- - src/hotspot/cpu/riscv/compiledIC_riscv.cpp | 2 +- - src/hotspot/cpu/riscv/nativeInst_riscv.cpp | 3 +-- - 2 files changed, 2 insertions(+), 3 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp -index 75bc4be7840..4d1687301fc 100644 ---- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp -+++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp -@@ -113,10 +113,10 @@ void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, ad - } - - void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { -+ assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); - // Reset stub. - address stub = static_stub->addr(); - assert(stub != NULL, "stub not found"); -- assert(CompiledICLocker::is_safe(stub), "mt unsafe call"); - // Creation also verifies the object. - NativeMovConstReg* method_holder - = nativeMovConstReg_at(stub + NativeFenceI::instruction_size()); -diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp -index 0a05c577860..459683735e9 100644 ---- a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp -+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp -@@ -146,8 +146,7 @@ address NativeCall::destination() const { - // during code generation, where no patching lock is needed. - void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) { - assert(!assert_lock || -- (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()) || -- CompiledICLocker::is_safe(addr_at(0)), -+ (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()), - "concurrent code patching"); - - ResourceMark rm; - -From 727f1a8f9b4a6dfbb0cf2002f12b86b5d5f23362 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Thu, 13 Apr 2023 18:36:11 +0800 -Subject: [PATCH 044/140] Revert JDK-8225681: - vmTestbase/nsk/jvmti/RedefineClasses/StressRedefine fails due a) MT-unsafe - modification of inline cache - ---- - src/hotspot/cpu/riscv/compiledIC_riscv.cpp | 9 +++++++-- - 1 file changed, 7 insertions(+), 2 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp -index 4d1687301fc..0b13e44c8d6 100644 ---- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp -+++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp -@@ -99,10 +99,15 @@ void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, ad - // Creation also verifies the object. - NativeMovConstReg* method_holder - = nativeMovConstReg_at(stub + NativeFenceI::instruction_size()); --#ifdef ASSERT -+#ifndef PRODUCT - NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); - -- verify_mt_safe(callee, entry, method_holder, jump); -+ // read the value once -+ volatile intptr_t data = method_holder->data(); -+ assert(data == 0 || data == (intptr_t)callee(), -+ "a) MT-unsafe modification of inline cache"); -+ assert(data == 0 || jump->jump_destination() == entry, -+ "b) MT-unsafe modification of inline cache"); - #endif - // Update stub. - method_holder->set_data((intptr_t)callee()); - -From 26e37551ecc41db0cf8eeb775a5501b4f45b4ffa Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Thu, 13 Apr 2023 18:39:52 +0800 -Subject: [PATCH 045/140] Revert JDK-8232046: AArch64 build failure after - JDK-8225681 - ---- - src/hotspot/cpu/riscv/compiledIC_riscv.cpp | 2 -- - src/hotspot/cpu/riscv/nativeInst_riscv.cpp | 19 ++++--------------- - 2 files changed, 4 insertions(+), 17 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp -index 0b13e44c8d6..1cfc92b28fa 100644 ---- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp -+++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp -@@ -126,8 +126,6 @@ void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_ - NativeMovConstReg* method_holder - = nativeMovConstReg_at(stub + NativeFenceI::instruction_size()); - method_holder->set_data(0); -- NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); -- jump->set_jump_destination((address)-1); - } - - //----------------------------------------------------------------------------- -diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp -index 459683735e9..bfe84fa4e30 100644 ---- a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp -+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp -@@ -272,15 +272,9 @@ address NativeJump::jump_destination() const { - - // We use jump to self as the unresolved address which the inline - // cache code (and relocs) know about -- // As a special case we also use sequence movptr_with_offset(r,0), jalr(r,0) -- // i.e. jump to 0 when we need leave space for a wide immediate -- // load -- -- // return -1 if jump to self or to 0 -- if ((dest == (address) this) || dest == 0) { -- dest = (address) -1; -- } - -+ // return -1 if jump to self -+ dest = (dest == (address) this) ? (address) -1 : dest; - return dest; - }; - -@@ -302,14 +296,9 @@ address NativeGeneralJump::jump_destination() const { - - // We use jump to self as the unresolved address which the inline - // cache code (and relocs) know about -- // As a special case we also use jump to 0 when first generating -- // a general jump -- -- // return -1 if jump to self or to 0 -- if ((dest == (address) this) || dest == 0) { -- dest = (address) -1; -- } - -+ // return -1 if jump to self -+ dest = (dest == (address) this) ? (address) -1 : dest; - return dest; - } - - -From 4fc68bc3cd13e623276965947d6c8cb14da15873 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Thu, 13 Apr 2023 18:47:08 +0800 -Subject: [PATCH 046/140] Revert JDK-8213084: Rework and enhance - Print[Opto]Assembly output - ---- - src/hotspot/cpu/riscv/assembler_riscv.hpp | 8 -------- - src/hotspot/cpu/riscv/disassembler_riscv.hpp | 20 -------------------- - 2 files changed, 28 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp -index 44e8d4b4ff1..b4e7287ce08 100644 ---- a/src/hotspot/cpu/riscv/assembler_riscv.hpp -+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp -@@ -268,14 +268,6 @@ class Assembler : public AbstractAssembler { - - enum { instruction_size = 4 }; - -- //---< calculate length of instruction >--- -- // We just use the values set above. -- // instruction must start at passed address -- static unsigned int instr_len(unsigned char *instr) { return instruction_size; } -- -- //---< longest instructions >--- -- static unsigned int instr_maxlen() { return instruction_size; } -- - enum RoundingMode { - rne = 0b000, // round to Nearest, ties to Even - rtz = 0b001, // round towards Zero -diff --git a/src/hotspot/cpu/riscv/disassembler_riscv.hpp b/src/hotspot/cpu/riscv/disassembler_riscv.hpp -index b0e5560c906..06bca5298cd 100644 ---- a/src/hotspot/cpu/riscv/disassembler_riscv.hpp -+++ b/src/hotspot/cpu/riscv/disassembler_riscv.hpp -@@ -35,24 +35,4 @@ static const char* pd_cpu_opts() { - return ""; - } - --// Returns address of n-th instruction preceding addr, --// NULL if no preceding instruction can be found. --// On riscv, we assume a constant instruction length. --// It might be beneficial to check "is_readable" as we do on ppc and s390. --static address find_prev_instr(address addr, int n_instr) { -- return addr - Assembler::instruction_size * n_instr; --} -- --// special-case instruction decoding. --// There may be cases where the binutils disassembler doesn't do --// the perfect job. In those cases, decode_instruction0 may kick in --// and do it right. --// If nothing had to be done, just return "here", otherwise return "here + instr_len(here)" --static address decode_instruction0(address here, outputStream* st, address virtual_begin = NULL) { -- return here; --} -- --// platform-specific instruction annotations (like value of loaded constants) --static void annotate(address pc, outputStream* st) {} -- - #endif // CPU_RISCV_DISASSEMBLER_RISCV_HPP - -From f660c594eccb174c9779ebdc9ba40fe579aa50cc Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Thu, 13 Apr 2023 19:44:28 +0800 -Subject: [PATCH 047/140] Revert JDK-8241909: Remove useless code cache lookup - in frame::patch_pc - ---- - src/hotspot/cpu/riscv/frame_riscv.cpp | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp -index b056eb2488a..d03adc0bff4 100644 ---- a/src/hotspot/cpu/riscv/frame_riscv.cpp -+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp -@@ -270,7 +270,6 @@ bool frame::safe_for_sender(JavaThread *thread) { - } - - void frame::patch_pc(Thread* thread, address pc) { -- assert(_cb == CodeCache::find_blob(pc), "unexpected pc"); - address* pc_addr = &(((address*) sp())[-1]); - if (TracePcPatching) { - tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", -@@ -280,6 +279,7 @@ void frame::patch_pc(Thread* thread, address pc) { - // patch in the same address that's already there. - assert(_pc == *pc_addr || pc == *pc_addr, "must be"); - *pc_addr = pc; -+ _cb = CodeCache::find_blob(pc); - address original_pc = CompiledMethod::get_deopt_original_pc(this); - if (original_pc != NULL) { - assert(original_pc == _pc, "expected original PC to be stored before patching"); - -From 0d1ed436d9b70c9244c5de42fb492bbfa5e785e8 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 16 Apr 2023 21:10:06 +0800 -Subject: [PATCH 048/140] Revert JDK-8277411: C2 fast_unlock intrinsic on - AArch64 has unnecessary ownership check & JDK-8277180: Intrinsify recursive - ObjectMonitor locking for C2 x64 and A64 - ---- - src/hotspot/cpu/riscv/riscv.ad | 24 ++++-------------------- - 1 file changed, 4 insertions(+), 20 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index 9da8a76c190..c0fbda4f3f9 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -2204,16 +2204,6 @@ encode %{ - __ mv(tmp, (address)markOopDesc::unused_mark()); - __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); - -- __ beqz(flag, cont); // CAS success means locking succeeded -- -- __ bne(flag, xthread, cont); // Check for recursive locking -- -- // Recursive lock case -- __ mv(flag, zr); -- __ ld(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markOopDesc::monitor_value)); -- __ add(tmp, tmp, 1u); -- __ sd(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markOopDesc::monitor_value)); -- - __ bind(cont); - %} - -@@ -2257,18 +2247,12 @@ encode %{ - __ bind(object_has_monitor); - STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX); - __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor -+ __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); - __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); -+ __ xorr(flag, flag, xthread); // Will be 0 if we are the owner. -+ __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions -+ __ bnez(flag, cont); - -- Label notRecursive; -- __ beqz(disp_hdr, notRecursive); // Will be 0 if not recursive. -- -- // Recursive lock -- __ addi(disp_hdr, disp_hdr, -1); -- __ sd(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); -- __ mv(flag, zr); -- __ j(cont); -- -- __ bind(notRecursive); - __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes())); - __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes())); - __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0. - -From cac7117dfc03023a81030e274944921df07bbead Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 16 Apr 2023 21:13:21 +0800 -Subject: [PATCH 049/140] Revert JDK-8210381: Obsolete EmitSync - ---- - src/hotspot/cpu/riscv/riscv.ad | 100 ++++++++++++++++++++------------- - 1 file changed, 60 insertions(+), 40 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index c0fbda4f3f9..c3ef648b21d 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -2150,9 +2150,17 @@ encode %{ - // Load markWord from object into displaced_header. - __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes())); - -+ // Always do locking in runtime. -+ if (EmitSync & 0x01) { -+ __ mv(flag, 1); -+ return; -+ } -+ - // Check for existing monitor -- __ andi(t0, disp_hdr, markOopDesc::monitor_value); -- __ bnez(t0, object_has_monitor); -+ if ((EmitSync & 0x02) == 0) { -+ __ andi(t0, disp_hdr, markOopDesc::monitor_value); -+ __ bnez(t0, object_has_monitor); -+ } - - // Set tmp to be (markWord of object | UNLOCK_VALUE). - __ ori(tmp, disp_hdr, markOopDesc::unlocked_value); -@@ -2185,24 +2193,26 @@ encode %{ - __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); - __ mv(flag, tmp); // we can use the value of tmp as the result here - -- __ j(cont); -- -- // Handle existing monitor. -- __ bind(object_has_monitor); -- // The object's monitor m is unlocked iff m->owner == NULL, -- // otherwise m->owner may contain a thread or a stack address. -- // -- // Try to CAS m->owner from NULL to current thread. -- __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value)); -- __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq, -- Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected) -- -- // Store a non-null value into the box to avoid looking like a re-entrant -- // lock. The fast-path monitor unlock code checks for -- // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the -- // relevant bit set, and also matches ObjectSynchronizer::slow_enter. -- __ mv(tmp, (address)markOopDesc::unused_mark()); -- __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); -+ if ((EmitSync & 0x02) == 0) { -+ __ j(cont); -+ -+ // Handle existing monitor. -+ __ bind(object_has_monitor); -+ // The object's monitor m is unlocked iff m->owner == NULL, -+ // otherwise m->owner may contain a thread or a stack address. -+ // -+ // Try to CAS m->owner from NULL to current thread. -+ __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value)); -+ __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq, -+ Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected) -+ -+ // Store a non-null value into the box to avoid looking like a re-entrant -+ // lock. The fast-path monitor unlock code checks for -+ // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the -+ // relevant bit set, and also matches ObjectSynchronizer::slow_enter. -+ __ mv(tmp, (address)markOopDesc::unused_mark()); -+ __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); -+ } - - __ bind(cont); - %} -@@ -2220,6 +2230,12 @@ encode %{ - - assert_different_registers(oop, box, tmp, disp_hdr, flag); - -+ // Always do locking in runtime. -+ if (EmitSync & 0x01) { -+ __ mv(flag, 1); -+ return; -+ } -+ - // Find the lock address and load the displaced header from the stack. - __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); - -@@ -2228,9 +2244,11 @@ encode %{ - __ beqz(disp_hdr, cont); - - // Handle existing monitor. -- __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes())); -- __ andi(t0, disp_hdr, markOopDesc::monitor_value); -- __ bnez(t0, object_has_monitor); -+ if ((EmitSync & 0x02) == 0) { -+ __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes())); -+ __ andi(t0, disp_hdr, markOopDesc::monitor_value); -+ __ bnez(t0, object_has_monitor); -+ } - - // Check if it is still a light weight lock, this is true if we - // see the stack address of the basicLock in the markWord of the -@@ -2244,23 +2262,25 @@ encode %{ - assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); - - // Handle existing monitor. -- __ bind(object_has_monitor); -- STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX); -- __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor -- __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); -- __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); -- __ xorr(flag, flag, xthread); // Will be 0 if we are the owner. -- __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions -- __ bnez(flag, cont); -- -- __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes())); -- __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes())); -- __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0. -- __ bnez(flag, cont); -- // need a release store here -- __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); -- __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); -- __ sd(zr, Address(tmp)); // set unowned -+ if ((EmitSync & 0x02) == 0) { -+ __ bind(object_has_monitor); -+ STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX); -+ __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor -+ __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); -+ __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); -+ __ xorr(flag, flag, xthread); // Will be 0 if we are the owner. -+ __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions -+ __ bnez(flag, cont); -+ -+ __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes())); -+ __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes())); -+ __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0. -+ __ bnez(flag, cont); -+ // need a release store here -+ __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); -+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); -+ __ sd(zr, Address(tmp)); // set unowned -+ } - - __ bind(cont); - %} - -From ca7ab86ee886233651e1a79faff631fd7e226d57 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 16 Apr 2023 22:07:21 +0800 -Subject: [PATCH 050/140] Revert JDK-8256425: Obsolete Biased Locking in JDK 18 - ---- - src/hotspot/cpu/riscv/assembler_riscv.hpp | 2 + - .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 6 +- - .../cpu/riscv/c1_LIRGenerator_riscv.cpp | 7 +- - .../cpu/riscv/c1_MacroAssembler_riscv.cpp | 35 ++- - .../cpu/riscv/c1_MacroAssembler_riscv.hpp | 3 +- - src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 27 ++- - .../cpu/riscv/macroAssembler_riscv.cpp | 217 ++++++++++++++++++ - .../cpu/riscv/macroAssembler_riscv.hpp | 28 +++ - src/hotspot/cpu/riscv/riscv.ad | 12 + - src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 8 + - src/hotspot/cpu/riscv/templateTable_riscv.cpp | 8 +- - 11 files changed, 341 insertions(+), 12 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp -index b4e7287ce08..51aa052a0c7 100644 ---- a/src/hotspot/cpu/riscv/assembler_riscv.hpp -+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp -@@ -3043,4 +3043,6 @@ enum Nf { - virtual ~Assembler() {} - }; - -+class BiasedLockingCounters; -+ - #endif // CPU_RISCV_ASSEMBLER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -index 46a20a64194..6a961ee2307 100644 ---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -@@ -1511,9 +1511,13 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) { - if (!UseFastLocking) { - __ j(*op->stub()->entry()); - } else if (op->code() == lir_lock) { -+ Register scratch = noreg; -+ if (UseBiasedLocking) { -+ scratch = op->scratch_opr()->as_register(); -+ } - assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); - // add debug info for NullPointerException only if one is possible -- int null_check_offset = __ lock_object(hdr, obj, lock, *op->stub()->entry()); -+ int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry()); - if (op->info() != NULL) { - add_debug_info_for_null_check(null_check_offset, op->info()); - } -diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp -index e126f148cdf..c45a75b2301 100644 ---- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp -@@ -277,6 +277,11 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { - - // "lock" stores the address of the monitor stack slot, so this is not an oop - LIR_Opr lock = new_register(T_INT); -+ // Need a scratch register for biased locking -+ LIR_Opr scratch = LIR_OprFact::illegalOpr; -+ if (UseBiasedLocking) { -+ scratch = new_register(T_INT); -+ } - - CodeEmitInfo* info_for_exception = NULL; - if (x->needs_null_check()) { -@@ -285,7 +290,7 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { - // this CodeEmitInfo must not have the xhandlers because here the - // object is already locked (xhandlers expect object to be unlocked) - CodeEmitInfo* info = state_for(x, x->state(), true); -- monitor_enter(obj.result(), lock, syncTempOpr(), LIR_OprFact::illegalOpr, -+ monitor_enter(obj.result(), lock, syncTempOpr(), scratch, - x->monitor_no(), info_for_exception, info); - } - -diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp -index 2d52343587e..e486f41948e 100644 ---- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp -@@ -35,6 +35,7 @@ - #include "oops/arrayOop.hpp" - #include "oops/markWord.hpp" - #include "runtime/basicLock.hpp" -+#include "runtime/biasedLocking.hpp" - #include "runtime/os.hpp" - #include "runtime/sharedRuntime.hpp" - #include "runtime/stubRoutines.hpp" -@@ -50,7 +51,7 @@ void C1_MacroAssembler::float_cmp(bool is_float, int unordered_result, - } - } - --int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { -+int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) { - const int aligned_mask = BytesPerWord - 1; - const int hdr_offset = oopDesc::mark_offset_in_bytes(); - assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); -@@ -62,7 +63,12 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr - // save object being locked into the BasicObjectLock - sd(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); - -- null_check_offset = offset(); -+ if (UseBiasedLocking) { -+ assert(scratch != noreg, "should have scratch register at this point"); -+ null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case); -+ } else { -+ null_check_offset = offset(); -+ } - - // Load object header - ld(hdr, Address(obj, hdr_offset)); -@@ -98,6 +104,10 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr - // otherwise we don't care about the result and handle locking via runtime call - bnez(hdr, slow_case, /* is_far */ true); - bind(done); -+ if (PrintBiasedLockingStatistics) { -+ la(t1, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr())); -+ add_memory_int32(Address(t1, 0), 1); -+ } - return null_check_offset; - } - -@@ -107,13 +117,21 @@ void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_ - assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); - Label done; - -+ if (UseBiasedLocking) { -+ // load object -+ ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); -+ biased_locking_exit(obj, hdr, done); -+ } -+ - // load displaced header - ld(hdr, Address(disp_hdr, 0)); - // if the loaded hdr is NULL we had recursive locking - // if we had recursive locking, we are done - beqz(hdr, done); -- // load object -- ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); -+ if (!UseBiasedLocking) { -+ // load object -+ ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); -+ } - verify_oop(obj); - // test if object header is pointing to the displaced header, and if so, restore - // the displaced header in the object - if the object header is not pointing to -@@ -140,8 +158,13 @@ void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, i - - void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2) { - assert_different_registers(obj, klass, len); -- // This assumes that all prototype bits fitr in an int32_t -- mv(tmp1, (int32_t)(intptr_t)markOopDesc::prototype()); -+ if (UseBiasedLocking && !len->is_valid()) { -+ assert_different_registers(obj, klass, len, tmp1, tmp2); -+ ld(tmp1, Address(klass, Klass::prototype_header_offset())); -+ } else { -+ // This assumes that all prototype bits fitr in an int32_t -+ mv(tmp1, (int32_t)(intptr_t)markOopDesc::prototype()); -+ } - sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes())); - - if (UseCompressedClassPointers) { // Take care not to kill klass -diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp -index dfd3c17d7c7..1950cee5dd5 100644 ---- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp -+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp -@@ -59,8 +59,9 @@ using MacroAssembler::null_check; - // hdr : must be x10, contents destroyed - // obj : must point to the object to lock, contents preserved - // disp_hdr: must point to the displaced header location, contents preserved -+ // scratch : scratch register, contents destroyed - // returns code offset at which to add null check debug information -- int lock_object (Register swap, Register obj, Register disp_hdr, Label& slow_case); -+ int lock_object (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case); - - // unlocking - // hdr : contents destroyed -diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -index 4e642af87c4..f0c249f0d26 100644 ---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -@@ -39,6 +39,7 @@ - #include "prims/jvmtiExport.hpp" - #include "prims/jvmtiThreadState.hpp" - #include "runtime/basicLock.hpp" -+#include "runtime/biasedLocking.hpp" - #include "runtime/frame.inline.hpp" - #include "runtime/safepointMechanism.hpp" - #include "runtime/sharedRuntime.hpp" -@@ -782,6 +783,10 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) - // Load object pointer into obj_reg c_rarg3 - ld(obj_reg, Address(lock_reg, obj_offset)); - -+ if (UseBiasedLocking) { -+ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case); -+ } -+ - // Load (object->mark() | 1) into swap_reg - ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - ori(swap_reg, t0, 1); -@@ -792,7 +797,17 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) - assert(lock_offset == 0, - "displached header must be first word in BasicObjectLock"); - -- cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL); -+ if (PrintBiasedLockingStatistics) { -+ Label fail, fast; -+ cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, fast, &fail); -+ bind(fast); -+ atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), -+ t1, t0); -+ j(done); -+ bind(fail); -+ } else { -+ cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL); -+ } - - // Test if the oopMark is an obvious stack pointer, i.e., - // 1) (mark & 7) == 0, and -@@ -809,6 +824,12 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) - - // Save the test result, for recursive case, the result is zero - sd(swap_reg, Address(lock_reg, mark_offset)); -+ -+ if (PrintBiasedLockingStatistics) { -+ bnez(swap_reg, slow_case); -+ atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), -+ t1, t0); -+ } - beqz(swap_reg, done); - - bind(slow_case); -@@ -861,6 +882,10 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) - // Free entry - sd(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); - -+ if (UseBiasedLocking) { -+ biased_locking_exit(obj_reg, header_reg, done); -+ } -+ - // Load the old header from BasicLock structure - ld(header_reg, Address(swap_reg, - BasicLock::displaced_header_offset_in_bytes())); -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -index 73629e3dba3..e557a134b5b 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -@@ -41,6 +41,7 @@ - #include "oops/compressedOops.inline.hpp" - #include "oops/klass.inline.hpp" - #include "oops/oop.hpp" -+#include "runtime/biasedLocking.hpp" - #include "runtime/interfaceSupport.inline.hpp" - #include "runtime/jniHandles.inline.hpp" - #include "runtime/sharedRuntime.hpp" -@@ -2791,6 +2792,222 @@ void MacroAssembler::reserved_stack_check() { - bind(no_reserved_zone_enabling); - } - -+void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) { -+ Label retry_load; -+ bind(retry_load); -+ // flush and load exclusive from the memory location -+ lr_w(tmp, counter_addr); -+ addw(tmp, tmp, 1); -+ // if we store+flush with no intervening write tmp wil be zero -+ sc_w(tmp, tmp, counter_addr); -+ bnez(tmp, retry_load); -+} -+ -+void MacroAssembler::load_prototype_header(Register dst, Register src) { -+ load_klass(dst, src); -+ ld(dst, Address(dst, Klass::prototype_header_offset())); -+} -+ -+int MacroAssembler::biased_locking_enter(Register lock_reg, -+ Register obj_reg, -+ Register swap_reg, -+ Register tmp_reg, -+ bool swap_reg_contains_mark, -+ Label& done, -+ Label* slow_case, -+ BiasedLockingCounters* counters, -+ Register flag) { -+ assert(UseBiasedLocking, "why call this otherwise?"); -+ assert_different_registers(lock_reg, obj_reg, swap_reg); -+ -+ if (PrintBiasedLockingStatistics && counters == NULL) -+ counters = BiasedLocking::counters(); -+ -+ assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, t0); -+ assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); -+ Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); -+ -+ // Biased locking -+ // See whether the lock is currently biased toward our thread and -+ // whether the epoch is still valid -+ // Note that the runtime guarantees sufficient alignment of JavaThread -+ // pointers to allow age to be placed into low bits -+ // First check to see whether biasing is even enabled for this object -+ Label cas_label; -+ int null_check_offset = -1; -+ if (!swap_reg_contains_mark) { -+ null_check_offset = offset(); -+ ld(swap_reg, mark_addr); -+ } -+ andi(tmp_reg, swap_reg, markOopDesc::biased_lock_mask_in_place); -+ li(t0, markOopDesc::biased_lock_pattern); -+ bne(t0, tmp_reg, cas_label); -+ // The bias pattern is present in the object's header. Need to check -+ // whether the bias owner and the epoch are both still current. -+ load_prototype_header(tmp_reg, obj_reg); -+ orr(tmp_reg, tmp_reg, xthread); -+ xorr(tmp_reg, swap_reg, tmp_reg); -+ andi(tmp_reg, tmp_reg, ~((int) markOopDesc::age_mask_in_place)); -+ if (flag->is_valid()) { -+ mv(flag, tmp_reg); -+ } -+ if (counters != NULL) { -+ Label around; -+ bnez(tmp_reg, around); -+ atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, t0); -+ j(done); -+ bind(around); -+ } else { -+ beqz(tmp_reg, done); -+ } -+ -+ Label try_revoke_bias; -+ Label try_rebias; -+ -+ // At this point we know that the header has the bias pattern and -+ // that we are not the bias owner in the current epoch. We need to -+ // figure out more details about the state of the header in order to -+ // know what operations can be legally performed on the object's -+ // header. -+ -+ // If the low three bits in the xor result aren't clear, that means -+ // the prototype header is no longer biased and we have to revoke -+ // the bias on this object. -+ andi(t0, tmp_reg, markOopDesc::biased_lock_mask_in_place); -+ bnez(t0, try_revoke_bias); -+ -+ // Biasing is still enabled for this data type. See whether the -+ // epoch of the current bias is still valid, meaning that the epoch -+ // bits of the mark word are equal to the epoch bits of the -+ // prototype header. (Note that the prototype header's epoch bits -+ // only change at a safepoint.) If not, attempt to rebias the object -+ // toward the current thread. Note that we must be absolutely sure -+ // that the current epoch is invalid in order to do this because -+ // otherwise the manipulations it performs on the mark word are -+ // illegal. -+ andi(t0, tmp_reg, markOopDesc::epoch_mask_in_place); -+ bnez(t0, try_rebias); -+ -+ // The epoch of the current bias is still valid but we know nothing -+ // about the owner; it might be set or it might be clear. Try to -+ // acquire the bias of the object using an atomic operation. If this -+ // fails we will go in to the runtime to revoke the object's bias. -+ // Note that we first construct the presumed unbiased header so we -+ // don't accidentally blow away another thread's valid bias. -+ { -+ Label cas_success; -+ Label counter; -+ mv(t0, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); -+ andr(swap_reg, swap_reg, t0); -+ orr(tmp_reg, swap_reg, xthread); -+ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case); -+ // cas failed here if slow_cass == NULL -+ if (flag->is_valid()) { -+ mv(flag, 1); -+ j(counter); -+ } -+ // If the biasing toward our thread failed, this means that -+ // another thread succeeded in biasing it toward itself and we -+ // need to revoke that bias. The revocation will occur in the -+ // interpreter runtime in the slow case. -+ bind(cas_success); -+ if (flag->is_valid()) { -+ mv(flag, 0); -+ bind(counter); -+ } -+ if (counters != NULL) { -+ atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()), -+ tmp_reg, t0); -+ } -+ } -+ j(done); -+ -+ bind(try_rebias); -+ // At this point we know the epoch has expired, meaning that the -+ // current "bias owner", if any, is actually invalid. Under these -+ // circumstances _only_, we are allowed to use the current header's -+ // value as the comparison value when doing the cas to acquire the -+ // bias in the current epoch. In other words, we allow transfer of -+ // the bias from one thread to another directly in this situation. -+ // -+ // FIXME: due to a lack of registers we currently blow away the age -+ // bits in this situation. Should attempt to preserve them. -+ { -+ Label cas_success; -+ Label counter; -+ load_prototype_header(tmp_reg, obj_reg); -+ orr(tmp_reg, xthread, tmp_reg); -+ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case); -+ // cas failed here if slow_cass == NULL -+ if (flag->is_valid()) { -+ mv(flag, 1); -+ j(counter); -+ } -+ -+ // If the biasing toward our thread failed, then another thread -+ // succeeded in biasing it toward itself and we need to revoke that -+ // bias. The revocation will occur in the runtime in the slow case. -+ bind(cas_success); -+ if (flag->is_valid()) { -+ mv(flag, 0); -+ bind(counter); -+ } -+ if (counters != NULL) { -+ atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()), -+ tmp_reg, t0); -+ } -+ } -+ j(done); -+ -+ bind(try_revoke_bias); -+ // The prototype mark in the klass doesn't have the bias bit set any -+ // more, indicating that objects of this data type are not supposed -+ // to be biased any more. We are going to try to reset the mark of -+ // this object to the prototype value and fall through to the -+ // CAS-based locking scheme. Note that if our CAS fails, it means -+ // that another thread raced us for the privilege of revoking the -+ // bias of this particular object, so it's okay to continue in the -+ // normal locking code. -+ // -+ // FIXME: due to a lack of registers we currently blow away the age -+ // bits in this situation. Should attempt to preserve them. -+ { -+ Label cas_success, nope; -+ load_prototype_header(tmp_reg, obj_reg); -+ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, &nope); -+ bind(cas_success); -+ -+ // Fall through to the normal CAS-based lock, because no matter what -+ // the result of the above CAS, some thread must have succeeded in -+ // removing the bias bit from the object's header. -+ if (counters != NULL) { -+ atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg, -+ t0); -+ } -+ bind(nope); -+ } -+ -+ bind(cas_label); -+ -+ return null_check_offset; -+} -+ -+void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag) { -+ assert(UseBiasedLocking, "why call this otherwise?"); -+ -+ // Check for biased locking unlock case, which is a no-op -+ // Note: we do not have to check the thread ID for two reasons. -+ // First, the interpreter checks for IllegalMonitorStateException at -+ // a higher level. Second, if the bias was revoked while we held the -+ // lock, the object could not be rebiased toward another thread, so -+ // the bias bit would be clear. -+ ld(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); -+ andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place); -+ sub(tmp_reg, tmp_reg, markOopDesc::biased_lock_pattern); -+ if (flag->is_valid()) { mv(flag, tmp_reg); } -+ beqz(tmp_reg, done); -+} -+ - // Move the address of the polling page into dest. - void MacroAssembler::get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype) { - if (SafepointMechanism::uses_thread_local_poll()) { -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -index 8a2c6e07d88..c1ffa120774 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -@@ -47,6 +47,32 @@ class MacroAssembler: public Assembler { - void safepoint_poll(Label& slow_path); - void safepoint_poll_acquire(Label& slow_path); - -+ // Biased locking support -+ // lock_reg and obj_reg must be loaded up with the appropriate values. -+ // swap_reg is killed. -+ // tmp_reg must be supplied and must not be rscratch1 or rscratch2 -+ // Optional slow case is for implementations (interpreter and C1) which branch to -+ // slow case directly. Leaves condition codes set for C2's Fast_Lock node. -+ // Returns offset of first potentially-faulting instruction for null -+ // check info (currently consumed only by C1). If -+ // swap_reg_contains_mark is true then returns -1 as it is assumed -+ // the calling code has already passed any potential faults. -+ int biased_locking_enter(Register lock_reg, Register obj_reg, -+ Register swap_reg, Register tmp_reg, -+ bool swap_reg_contains_mark, -+ Label& done, Label* slow_case = NULL, -+ BiasedLockingCounters* counters = NULL, -+ Register flag = noreg); -+ void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done, Register flag = noreg); -+ -+ // Helper functions for statistics gathering. -+ // Unconditional atomic increment. -+ void atomic_incw(Register counter_addr, Register tmp); -+ void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) { -+ la(tmp1, counter_addr); -+ atomic_incw(tmp1, tmp2); -+ } -+ - // Place a fence.i after code may have been modified due to a safepoint. - void safepoint_ifence(); - -@@ -225,6 +251,8 @@ class MacroAssembler: public Assembler { - // stored using routines that take a jobject. - void store_heap_oop_null(Address dst); - -+ void load_prototype_header(Register dst, Register src); -+ - // This dummy is to prevent a call to store_heap_oop from - // converting a zero (linke NULL) into a Register by giving - // the compiler two choices it can't resolve -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index c3ef648b21d..c2a0be140e9 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -2156,6 +2156,10 @@ encode %{ - return; - } - -+ if (UseBiasedLocking && !UseOptoBiasInlining) { -+ __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont, /*slow_case*/NULL, NULL, flag); -+ } -+ - // Check for existing monitor - if ((EmitSync & 0x02) == 0) { - __ andi(t0, disp_hdr, markOopDesc::monitor_value); -@@ -2236,6 +2240,10 @@ encode %{ - return; - } - -+ if (UseBiasedLocking && !UseOptoBiasInlining) { -+ __ biased_locking_exit(oop, tmp, cont, flag); -+ } -+ - // Find the lock address and load the displaced header from the stack. - __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); - -@@ -4961,6 +4969,10 @@ instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFla - ins_pipe(pipe_serial); - %} - -+// storeLConditional is used by PhaseMacroExpand::expand_lock_node -+// when attempting to rebias a lock towards the current thread. We -+// must use the acquire form of cmpxchg in order to guarantee acquire -+// semantics in this case. - instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) - %{ - match(Set cr (StoreLConditional mem (Binary oldval newval))); -diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -index d740c99c979..eaefcc2b595 100644 ---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -@@ -1489,6 +1489,10 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, - // Load the oop from the handle - __ ld(obj_reg, Address(oop_handle_reg, 0)); - -+ if (UseBiasedLocking) { -+ __ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, lock_done, &slow_path_lock); -+ } -+ - // Load (object->mark() | 1) into swap_reg % x10 - __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - __ ori(swap_reg, t0, 1); -@@ -1597,6 +1601,10 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, - - Label done; - -+ if (UseBiasedLocking) { -+ __ biased_locking_exit(obj_reg, old_hdr, done); -+ } -+ - // Simple recursive lock? - __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); - __ beqz(t0, done); -diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp -index c9d399ccdaf..1e23fb4dc09 100644 ---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp -+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp -@@ -3563,9 +3563,13 @@ void TemplateTable::_new() { - __ bnez(x13, loop); - } - -- // initialize object hader only. -+ // initialize object header only. - __ bind(initialize_header); -- __ mv(t0, (intptr_t)markOopDesc::prototype()); -+ if (UseBiasedLocking) { -+ __ ld(t0, Address(x14, Klass::prototype_header_offset())); -+ } else { -+ __ mv(t0, (intptr_t)markOopDesc::prototype()); -+ } - __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes())); - __ store_klass_gap(x10, zr); // zero klass gap for compressed oops - __ store_klass(x10, x14); // store klass last - -From 864e551505bb816f3dc8a3bd1b065328ba7b5d65 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Mon, 17 Apr 2023 19:52:44 +0800 -Subject: [PATCH 051/140] Revert JDK-8227680: FastJNIAccessors: Check for JVMTI - field access event requests at runtime - ---- - .../cpu/riscv/jniFastGetField_riscv.cpp | 32 ++++--------------- - 1 file changed, 6 insertions(+), 26 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp -index 814ed23e471..f6e7351c4fc 100644 ---- a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp -+++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp -@@ -83,28 +83,10 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { - // An even value means there are no ongoing safepoint operations - __ andi(t0, rcounter, 1); - __ bnez(t0, slow); -- -- if (JvmtiExport::can_post_field_access()) { -- // Using barrier to order wrt. JVMTI check and load of result. -- __ membar(MacroAssembler::LoadLoad); -- -- // Check to see if a field access watch has been set before we -- // take the fast path. -- int32_t offset2; -- __ la_patchable(result, -- ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), -- offset2); -- __ lwu(result, Address(result, offset2)); -- __ bnez(result, slow); -- -- __ mv(robj, c_rarg1); -- } else { -- // Using address dependency to order wrt. load of result. -- __ xorr(robj, c_rarg1, rcounter); -- __ xorr(robj, robj, rcounter); // obj, since -- // robj ^ rcounter ^ rcounter == robj -- // robj is address dependent on rcounter. -- } -+ __ xorr(robj, c_rarg1, rcounter); -+ __ xorr(robj, robj, rcounter); // obj, since -+ // robj ^ rcounter ^ rcounter == robj -+ // robj is address dependent on rcounter. - - // Both robj and t0 are clobbered by try_resolve_jobject_in_native. - BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); -@@ -137,10 +119,8 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { - default: ShouldNotReachHere(); - } - -- // Using acquire: Order JVMTI check and load of result wrt. succeeding check -- // (LoadStore for volatile field). -- __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); -- -+ __ xorr(rcounter_addr, rcounter_addr, result); -+ __ xorr(rcounter_addr, rcounter_addr, result); - __ lw(t0, safepoint_counter_addr); - __ bne(rcounter, t0, slow); - - -From b822b64cb6be38cb7806fda3d56675674557c163 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 18 Apr 2023 16:34:32 +0800 -Subject: [PATCH 052/140] Revert JDK-8249768: Move static oops and - NullPointerException oops from Universe into OopStorage - ---- - src/hotspot/cpu/riscv/templateTable_riscv.cpp | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp -index 1e23fb4dc09..fbcdcf60d9c 100644 ---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp -+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp -@@ -411,7 +411,6 @@ void TemplateTable::fast_aldc(bool wide) - int32_t offset = 0; - __ movptr_with_offset(rarg, Universe::the_null_sentinel_addr(), offset); - __ ld(tmp, Address(rarg, offset)); -- __ resolve_oop_handle(tmp); - __ bne(result, tmp, notNull); - __ mv(result, zr); // NULL object reference - __ bind(notNull); - -From c82c482aa065ffd39eab6b87a0ad6c6cbca1e3af Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 18 Apr 2023 16:58:23 +0800 -Subject: [PATCH 053/140] Revert JDK-8217998: Remove method_type field - associated with the appendix field of an indy or method handle call - ---- - src/hotspot/cpu/riscv/templateTable_riscv.cpp | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp -index fbcdcf60d9c..158294f7436 100644 ---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp -+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp -@@ -3192,6 +3192,7 @@ void TemplateTable::prepare_invoke(int byte_no, - // since the parameter_size includes it. - __ push_reg(x9); - __ mv(x9, index); -+ assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0"); - __ load_resolved_reference_at_index(index, x9); - __ pop_reg(x9); - __ push_reg(index); // push appendix (MethodType, CallSite, etc.) - -From 3e50d62dd06c3f8bc586e3ab2b00f2f587d950bf Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 16:04:31 +0800 -Subject: [PATCH 054/140] Revert JDK-8277372: Add getters for BOT and card - table members - ---- - src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp | 4 ++-- - .../riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp | 6 +++--- - 2 files changed, 5 insertions(+), 5 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp -index 1c46b3947d3..6b75bf63781 100644 ---- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp -@@ -215,7 +215,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, - ExternalAddress cardtable((address) ct->byte_map_base()); - const Register card_addr = tmp; - -- __ srli(card_addr, store_addr, CardTable::card_shift()); -+ __ srli(card_addr, store_addr, CardTable::card_shift); - - // get the address of the card - __ load_byte_map_base(tmp2); -@@ -437,7 +437,7 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* - assert_different_registers(card_offset, byte_map_base, t0); - - __ load_parameter(0, card_offset); -- __ srli(card_offset, card_offset, CardTable::card_shift()); -+ __ srli(card_offset, card_offset, CardTable::card_shift); - __ load_byte_map_base(byte_map_base); - - // Convert card offset into an address in card_addr -diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp -index a419f92b5f6..868d022ac74 100644 ---- a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp -@@ -41,7 +41,7 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob - BarrierSet* bs = BarrierSet::barrier_set(); - assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind"); - -- __ srli(obj, obj, CardTable::card_shift()); -+ __ srli(obj, obj, CardTable::card_shift); - - assert(CardTable::dirty_card_val() == 0, "must be"); - -@@ -74,8 +74,8 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl - __ shadd(end, count, start, count, LogBytesPerHeapOop); - __ sub(end, end, BytesPerHeapOop); // last element address to make inclusive - -- __ srli(start, start, CardTable::card_shift()); -- __ srli(end, end, CardTable::card_shift()); -+ __ srli(start, start, CardTable::card_shift); -+ __ srli(end, end, CardTable::card_shift); - __ sub(count, end, start); // number of bytes to copy - - __ load_byte_map_base(tmp); - -From 6a81a820e6c08cfdd8e29a835e953dabffdca98a Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Wed, 19 Apr 2023 11:30:58 +0800 -Subject: [PATCH 055/140] Revert JDK-8260941: Remove the conc_scan parameter - for CardTable - ---- - .../shared/cardTableBarrierSetAssembler_riscv.cpp | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - -diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp -index 868d022ac74..a476e5ec84d 100644 ---- a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp -@@ -41,6 +41,9 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob - BarrierSet* bs = BarrierSet::barrier_set(); - assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind"); - -+ CardTableBarrierSet* ctbs = barrier_set_cast(bs); -+ CardTable* ct = ctbs->card_table(); -+ - __ srli(obj, obj, CardTable::card_shift); - - assert(CardTable::dirty_card_val() == 0, "must be"); -@@ -56,6 +59,9 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob - __ sb(zr, Address(tmp)); - __ bind(L_already_dirty); - } else { -+ if (ct->scanned_concurrently()) { -+ __ membar(MacroAssembler::StoreStore); -+ } - __ sb(zr, Address(tmp)); - } - } -@@ -66,6 +72,10 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl - assert_different_registers(start, tmp); - assert_different_registers(count, tmp); - -+ BarrierSet* bs = BarrierSet::barrier_set(); -+ CardTableBarrierSet* ctbs = barrier_set_cast(bs); -+ CardTable* ct = ctbs->card_table(); -+ - Label L_loop, L_done; - const Register end = count; - -@@ -80,6 +90,9 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl - - __ load_byte_map_base(tmp); - __ add(start, start, tmp); -+ if (ct->scanned_concurrently()) { -+ __ membar(MacroAssembler::StoreStore); -+ } - - __ bind(L_loop); - __ add(tmp, start, count); - -From 24688cb665b16331b491bed2566dc97582a3d73c Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Wed, 19 Apr 2023 11:32:54 +0800 -Subject: [PATCH 056/140] Revert JDK-8220301: Remove jbyte use in CardTable - -Note: An assertion in `CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier` is removed. See the jdk11u backport for AArch64: https://mail.openjdk.org/pipermail/jdk-updates-dev/2019-August/001746.html ---- - src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp | 3 +++ - .../cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp | 1 + - src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 2 +- - src/hotspot/cpu/riscv/riscv.ad | 3 +-- - 4 files changed, 6 insertions(+), 3 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp -index 6b75bf63781..b6786c6b327 100644 ---- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp -@@ -196,6 +196,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, - BarrierSet* bs = BarrierSet::barrier_set(); - CardTableBarrierSet* ctbs = barrier_set_cast(bs); - CardTable* ct = ctbs->card_table(); -+ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); - - Label done; - Label runtime; -@@ -213,6 +214,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, - // storing region crossing non-NULL, is card already dirty? - - ExternalAddress cardtable((address) ct->byte_map_base()); -+ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); - const Register card_addr = tmp; - - __ srli(card_addr, store_addr, CardTable::card_shift); -@@ -419,6 +421,7 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* - BarrierSet* bs = BarrierSet::barrier_set(); - CardTableBarrierSet* ctbs = barrier_set_cast(bs); - CardTable* ct = ctbs->card_table(); -+ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); - - Label done; - Label runtime; -diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp -index a476e5ec84d..81d47d61d4c 100644 ---- a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp -@@ -43,6 +43,7 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob - - CardTableBarrierSet* ctbs = barrier_set_cast(bs); - CardTable* ct = ctbs->card_table(); -+ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); - - __ srli(obj, obj, CardTable::card_shift); - -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -index e557a134b5b..6e4d22db40f 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -@@ -2719,7 +2719,7 @@ void MacroAssembler::get_thread(Register thread) { - } - - void MacroAssembler::load_byte_map_base(Register reg) { -- CardTable::CardValue* byte_map_base = -+ jbyte *byte_map_base = - ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); - li(reg, (uint64_t)byte_map_base); - } -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index c2a0be140e9..ca6a232e1e0 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -2735,8 +2735,7 @@ operand immByteMapBase() - %{ - // Get base of card map - predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) && -- (CardTable::CardValue*)n->get_ptr() == -- ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base()); -+ (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base()); - match(ConP); - - op_cost(0); - -From 6ee27261d406342a5378d4a404319866a9bae804 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Wed, 19 Apr 2023 11:51:20 +0800 -Subject: [PATCH 057/140] Revert JDK-8230486: - G1BarrierSetAssembler::g1_write_barrier_post unnecessarily pushes/pops - new_val - ---- - src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp -index b6786c6b327..d724876ec3a 100644 ---- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp -@@ -250,7 +250,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, - - __ bind(runtime); - // save the live input values -- RegSet saved = RegSet::of(store_addr); -+ RegSet saved = RegSet::of(store_addr, new_val); - __ push_reg(saved, sp); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); - __ pop_reg(saved, sp); - -From 57067a358ffc1b54edfb305549bd460b0fca47f0 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Fri, 21 Apr 2023 12:10:22 +0800 -Subject: [PATCH 058/140] Revert JDK-8242449: AArch64: r27 can be allocated in - CompressedOops mode - ---- - src/hotspot/cpu/riscv/riscv.ad | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index ca6a232e1e0..e3f976faa0d 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -4846,6 +4846,8 @@ instruct storeN(iRegN src, memory mem) - instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem) - %{ - match(Set mem (StoreN mem zero)); -+ predicate(Universe::narrow_oop_base() == NULL && -+ Universe::narrow_klass_base() == NULL); - - ins_cost(STORE_COST); - format %{ "sw rheapbase, $mem\t# compressed ptr (rheapbase==0), #@storeImmN0" %} - -From 0db520768d4d268a9dc641e301df45653c52f6eb Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 23 Apr 2023 14:59:09 +0800 -Subject: [PATCH 059/140] A fix for interpreter frame verification code, - skipping the locals check if there is no locals. See one of the additional - commits in JDK-8286301, the RISC-V loom port. - ---- - src/hotspot/cpu/riscv/frame_riscv.cpp | 11 ++++++++++- - 1 file changed, 10 insertions(+), 1 deletion(-) - -diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp -index d03adc0bff4..13c482b610a 100644 ---- a/src/hotspot/cpu/riscv/frame_riscv.cpp -+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp -@@ -571,7 +571,16 @@ bool frame::is_interpreted_frame_valid(JavaThread* thread) const { - - // validate locals - address locals = (address) *interpreter_frame_locals_addr(); -- if (locals > thread->stack_base() || locals < (address) fp()) { -+ if (locals > thread->stack_base()) { -+ return false; -+ } -+ -+ if (m->max_locals() > 0 && locals < (address) fp()) { -+ // fp in interpreter frame on RISC-V is higher than that on AArch64, -+ // pointing to sender_sp and sender_sp-2 relatively. -+ // On RISC-V, if max_locals is 0, the 'locals' pointer may be below fp, -+ // pointing to sender_sp-1 (with one padding slot). -+ // So we verify the 'locals' pointer only if max_locals > 0. - return false; - } - - -From 795da5afe59658b4d89cd8501b4f4ec56471b14c Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 11 Apr 2023 11:45:40 +0800 -Subject: [PATCH 060/140] ShenandoahGC adaptations on JDK11 for RISC-V backend - ---- - .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 4 +- - .../c1/shenandoahBarrierSetC1_riscv.cpp | 2 +- - .../shenandoahBarrierSetAssembler_riscv.cpp | 229 +++++++++--------- - .../shenandoahBarrierSetAssembler_riscv.hpp | 15 +- - .../riscv/gc/shenandoah/shenandoah_riscv64.ad | 88 ------- - src/hotspot/cpu/riscv/riscv.ad | 6 +- - .../templateInterpreterGenerator_riscv.cpp | 15 +- - 7 files changed, 146 insertions(+), 213 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -index 6a961ee2307..90c4af5d3b0 100644 ---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -@@ -1817,10 +1817,12 @@ void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) { - - - void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) { -- if (patch_code != lir_patch_none) { -+#if INCLUDE_SHENANDOAHGC -+ if (UseShenandoahGC && patch_code != lir_patch_none) { - deoptimize_trap(info); - return; - } -+#endif - - assert(patch_code == lir_patch_none, "Patch code not supported"); - LIR_Address* adr = addr->as_address_ptr(); -diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp -index cd568cc723f..d19f5b859ce 100644 ---- a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp -+++ b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp -@@ -103,7 +103,7 @@ LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRIt - __ xchg(access.resolved_addr(), value_opr, result, tmp); - - if (access.is_oop()) { -- result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), access.decorators()); -+ result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0)); - LIR_Opr tmp_opr = gen->new_register(type); - __ move(result, tmp_opr); - result = tmp_opr; -diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp -index 84e1205bc25..b8534c52e77 100644 ---- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp -@@ -27,7 +27,7 @@ - #include "gc/shenandoah/shenandoahBarrierSet.hpp" - #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" - #include "gc/shenandoah/shenandoahForwarding.hpp" --#include "gc/shenandoah/shenandoahHeap.inline.hpp" -+#include "gc/shenandoah/shenandoahHeap.hpp" - #include "gc/shenandoah/shenandoahHeapRegion.hpp" - #include "gc/shenandoah/shenandoahRuntime.hpp" - #include "gc/shenandoah/shenandoahThreadLocalData.hpp" -@@ -44,6 +44,8 @@ - - #define __ masm-> - -+address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL; -+ - void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, - Register src, Register dst, Register count, RegSet saved_regs) { - if (is_oop) { -@@ -116,10 +118,10 @@ void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, - Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); - - // Is marking active? -- if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { -+ if (in_bytes(ShenandoahSATBMarkQueue::byte_width_of_active()) == 4) { - __ lwu(tmp, in_progress); - } else { -- assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); -+ assert(in_bytes(ShenandoahSATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); - __ lbu(tmp, in_progress); - } - __ beqz(tmp, done); -@@ -225,37 +227,21 @@ void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssemb - __ pop_reg(saved_regs, sp); - } - --void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, -- Register dst, -- Address load_addr, -- DecoratorSet decorators) { -+void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, -+ Register dst, -+ Address load_addr) { - assert(ShenandoahLoadRefBarrier, "Should be enabled"); - assert(dst != t1 && load_addr.base() != t1, "need t1"); - assert_different_registers(load_addr.base(), t0, t1); - -- bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); -- bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); -- bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); -- bool is_native = ShenandoahBarrierSet::is_native_access(decorators); -- bool is_narrow = UseCompressedOops && !is_native; -- -- Label heap_stable, not_cset; -+ Label done; - __ enter(); - Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); - __ lbu(t1, gc_state); - - // Check for heap stability -- if (is_strong) { -- __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED); -- __ beqz(t1, heap_stable); -- } else { -- Label lrb; -- __ andi(t0, t1, ShenandoahHeap::WEAK_ROOTS); -- __ bnez(t0, lrb); -- __ andi(t0, t1, ShenandoahHeap::HAS_FORWARDED); -- __ beqz(t0, heap_stable); -- __ bind(lrb); -- } -+ __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED); -+ __ beqz(t1, done); - - // use x11 for load address - Register result_dst = dst; -@@ -270,43 +256,12 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, - __ la(x11, load_addr); - __ mv(x10, dst); - -- // Test for in-cset -- if (is_strong) { -- __ li(t1, (uint64_t)ShenandoahHeap::in_cset_fast_test_addr()); -- __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint()); -- __ add(t1, t1, t0); -- __ lbu(t1, Address(t1)); -- __ andi(t0, t1, 1); -- __ beqz(t0, not_cset); -- } -+ __ far_call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb()))); - -- __ push_call_clobbered_registers(); -- if (is_strong) { -- if (is_narrow) { -- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow); -- } else { -- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong); -- } -- } else if (is_weak) { -- if (is_narrow) { -- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow); -- } else { -- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak); -- } -- } else { -- assert(is_phantom, "only remaining strength"); -- assert(!is_narrow, "phantom access cannot be narrow"); -- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak); -- } -- __ jalr(ra); -- __ mv(t0, x10); -- __ pop_call_clobbered_registers(); -- __ mv(x10, t0); -- __ bind(not_cset); - __ mv(result_dst, x10); - __ pop_reg(saved_regs, sp); - -- __ bind(heap_stable); -+ __ bind(done); - __ leave(); - } - -@@ -320,6 +275,15 @@ void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register ds - } - } - -+void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr) { -+ if (ShenandoahLoadRefBarrier) { -+ Label is_null; -+ __ beqz(dst, is_null); -+ load_reference_barrier_not_null(masm, dst, load_addr); -+ __ bind(is_null); -+ } -+} -+ - // - // Arguments: - // -@@ -363,7 +327,7 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, - - BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); - -- load_reference_barrier(masm, dst, src, decorators); -+ load_reference_barrier(masm, dst, src); - - if (dst != result_dst) { - __ mv(result_dst, dst); -@@ -555,7 +519,7 @@ void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, Shen - Register pre_val_reg = stub->pre_val()->as_register(); - - if (stub->do_load()) { -- ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */); -+ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/); - } - __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true); - ce->store_parameter(stub->pre_val()->as_register(), 0); -@@ -568,12 +532,6 @@ void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assemble - ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); - __ bind(*stub->entry()); - -- DecoratorSet decorators = stub->decorators(); -- bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); -- bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); -- bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); -- bool is_native = ShenandoahBarrierSet::is_native_access(decorators); -- - Register obj = stub->obj()->as_register(); - Register res = stub->result()->as_register(); - Register addr = stub->addr()->as_pointer_register(); -@@ -587,30 +545,32 @@ void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assemble - __ mv(res, obj); - } - -- if (is_strong) { -- // Check for object in cset. -- __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr()); -- __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint()); -- __ add(tmp2, tmp2, tmp1); -- __ lbu(tmp2, Address(tmp2)); -- __ beqz(tmp2, *stub->continuation(), true /* is_far */); -- } -+ // Check for null. -+ __ beqz(res, *stub->continuation(), /* is_far */ true); -+ -+ // Check for object in cset. -+ __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr()); -+ __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint()); -+ __ add(t0, tmp2, tmp1); -+ __ lb(tmp2, Address(t0)); -+ __ beqz(tmp2, *stub->continuation(), /* is_far */ true); -+ -+ // Check if object is already forwarded. -+ Label slow_path; -+ __ ld(tmp1, Address(res, oopDesc::mark_offset_in_bytes())); -+ __ xori(tmp1, tmp1, -1); -+ __ andi(t0, tmp1, markOopDesc::lock_mask_in_place); -+ __ bnez(t0, slow_path); -+ -+ // Decode forwarded object. -+ __ ori(tmp1, tmp1, markOopDesc::marked_value); -+ __ xori(res, tmp1, -1); -+ __ j(*stub->continuation()); - -+ __ bind(slow_path); - ce->store_parameter(res, 0); - ce->store_parameter(addr, 1); -- -- if (is_strong) { -- if (is_native) { -- __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin())); -- } else { -- __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin())); -- } -- } else if (is_weak) { -- __ far_call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin())); -- } else { -- assert(is_phantom, "only remaining strength"); -- __ far_call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin())); -- } -+ __ far_call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin())); - - __ j(*stub->continuation()); - } -@@ -664,8 +624,7 @@ void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAss - __ epilogue(); - } - --void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, -- DecoratorSet decorators) { -+void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm) { - __ prologue("shenandoah_load_reference_barrier", false); - // arg0 : object to be resolved - -@@ -673,31 +632,10 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s - __ load_parameter(0, x10); - __ load_parameter(1, x11); - -- bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); -- bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); -- bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); -- bool is_native = ShenandoahBarrierSet::is_native_access(decorators); -- if (is_strong) { -- if (is_native) { -- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong); -- } else { -- if (UseCompressedOops) { -- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow); -- } else { -- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong); -- } -- } -- } else if (is_weak) { -- assert(!is_native, "weak must not be called off-heap"); -- if (UseCompressedOops) { -- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow); -- } else { -- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak); -- } -+ if (UseCompressedOops) { -+ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow)); - } else { -- assert(is_phantom, "only remaining strength"); -- assert(is_native, "phantom must only be called off-heap"); -- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_phantom); -+ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier)); - } - __ jalr(ra); - __ mv(t0, x10); -@@ -710,3 +648,68 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s - #undef __ - - #endif // COMPILER1 -+ -+address ShenandoahBarrierSetAssembler::shenandoah_lrb() { -+ assert(_shenandoah_lrb != NULL, "need load reference barrier stub"); -+ return _shenandoah_lrb; -+} -+ -+#define __ cgen->assembler()-> -+ -+// Shenandoah load reference barrier. -+// -+// Input: -+// x10: OOP to evacuate. Not null. -+// x11: load address -+// -+// Output: -+// x10: Pointer to evacuated OOP. -+// -+// Trash t0 t1 Preserve everything else. -+address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) { -+ __ align(6); -+ StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb"); -+ address start = __ pc(); -+ -+ Label slow_path; -+ __ mv(t1, ShenandoahHeap::in_cset_fast_test_addr()); -+ __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint()); -+ __ add(t1, t1, t0); -+ __ lbu(t1, Address(t1, 0)); -+ __ andi(t0, t1, 1); -+ __ bnez(t0, slow_path); -+ __ ret(); -+ -+ __ bind(slow_path); -+ __ enter(); // required for proper stackwalking of RuntimeStub frame -+ -+ __ push_call_clobbered_registers(); -+ -+ if (UseCompressedOops) { -+ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow)); -+ } else { -+ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier)); -+ } -+ __ jalr(ra); -+ __ mv(t0, x10); -+ __ pop_call_clobbered_registers(); -+ __ mv(x10, t0); -+ -+ __ leave(); // required for proper stackwalking of RuntimeStub frame -+ __ ret(); -+ -+ return start; -+} -+ -+#undef __ -+ -+void ShenandoahBarrierSetAssembler::barrier_stubs_init() { -+ if (ShenandoahLoadRefBarrier) { -+ int stub_code_size = 2048; -+ ResourceMark rm; -+ BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size); -+ CodeBuffer buf(bb); -+ StubCodeGenerator cgen(&buf); -+ _shenandoah_lrb = generate_shenandoah_lrb(&cgen); -+ } -+} -diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp -index a705f497667..5d75035e9d4 100644 ---- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp -+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp -@@ -40,6 +40,8 @@ class StubCodeGenerator; - class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { - private: - -+ static address _shenandoah_lrb; -+ - void satb_write_barrier_pre(MacroAssembler* masm, - Register obj, - Register pre_val, -@@ -57,17 +59,22 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { - - void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg); - void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg); -- void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, DecoratorSet decorators); -+ void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr); -+ void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr); -+ -+ address generate_shenandoah_lrb(StubCodeGenerator* cgen); - - public: - -+ static address shenandoah_lrb(); -+ - void iu_barrier(MacroAssembler* masm, Register dst, Register tmp); - - #ifdef COMPILER1 - void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub); - void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub); - void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); -- void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators); -+ void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm); - #endif - - virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, -@@ -81,8 +88,10 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { - virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, - Register obj, Register tmp, Label& slowpath); - -- void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val, -+ virtual void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val, - Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result); -+ -+ virtual void barrier_stubs_init(); - }; - - #endif // CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad -index 6c855f23c2a..bab407a8b76 100644 ---- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad -+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad -@@ -176,48 +176,6 @@ instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldva - ins_pipe(pipe_slow); - %} - --instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ -- predicate(needs_acquiring_load_reserved(n)); -- match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval))); -- ins_cost(10 * DEFAULT_COST); -- -- effect(TEMP_DEF res, TEMP tmp, KILL cr); -- format %{ -- "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq_shenandoah" -- %} -- -- ins_encode %{ -- Register tmp = $tmp$$Register; -- __ mv(tmp, $oldval$$Register); -- ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, -- Assembler::aq /* acquire */, Assembler::rl /* release */, -- true /* is_cae */, $res$$Register); -- %} -- -- ins_pipe(pipe_slow); --%} -- --instruct compareAndExchangePAcq_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ -- predicate(needs_acquiring_load_reserved(n)); -- match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval))); -- ins_cost(10 * DEFAULT_COST); -- -- effect(TEMP_DEF res, TEMP tmp, KILL cr); -- format %{ -- "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq_shenandoah" -- %} -- -- ins_encode %{ -- Register tmp = $tmp$$Register; -- __ mv(tmp, $oldval$$Register); -- ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, -- Assembler::aq /* acquire */, Assembler::rl /* release */, -- true /* is_cae */, $res$$Register); -- %} -- -- ins_pipe(pipe_slow); --%} -- - instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ - match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval))); - ins_cost(10 * DEFAULT_COST); -@@ -237,49 +195,3 @@ instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldva - - ins_pipe(pipe_slow); - %} -- --instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ -- predicate(needs_acquiring_load_reserved(n)); -- match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval))); -- ins_cost(10 * DEFAULT_COST); -- -- effect(TEMP tmp, KILL cr); -- format %{ -- "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapNAcq_shenandoah" -- "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)" -- %} -- -- ins_encode %{ -- Register tmp = $tmp$$Register; -- __ mv(tmp, $oldval$$Register); // Must not clobber oldval. -- // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop -- ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, -- Assembler::aq /* acquire */, Assembler::rl /* release */, -- false /* is_cae */, $res$$Register); -- %} -- -- ins_pipe(pipe_slow); --%} -- --instruct weakCompareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ -- predicate(needs_acquiring_load_reserved(n)); -- match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval))); -- ins_cost(10 * DEFAULT_COST); -- -- effect(TEMP tmp, KILL cr); -- format %{ -- "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapPAcq_shenandoah" -- "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)" -- %} -- -- ins_encode %{ -- Register tmp = $tmp$$Register; -- __ mv(tmp, $oldval$$Register); // Must not clobber oldval. -- // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop -- ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, -- Assembler::aq /* acquire */, Assembler::rl /* release */, -- false /* is_cae */, $res$$Register); -- %} -- -- ins_pipe(pipe_slow); --%} -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index e3f976faa0d..a6061de7a33 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -828,8 +828,10 @@ bool is_CAS(int opcode, bool maybe_volatile) - case Op_CompareAndSwapL: - case Op_CompareAndSwapP: - case Op_CompareAndSwapN: -+#if INCLUDE_SHENANDOAHGC - case Op_ShenandoahCompareAndSwapP: - case Op_ShenandoahCompareAndSwapN: -+#endif - case Op_CompareAndSwapB: - case Op_CompareAndSwapS: - case Op_GetAndSetI: -@@ -851,10 +853,6 @@ bool is_CAS(int opcode, bool maybe_volatile) - case Op_WeakCompareAndSwapL: - case Op_WeakCompareAndSwapP: - case Op_WeakCompareAndSwapN: -- case Op_ShenandoahWeakCompareAndSwapP: -- case Op_ShenandoahWeakCompareAndSwapN: -- case Op_ShenandoahCompareAndExchangeP: -- case Op_ShenandoahCompareAndExchangeN: - return maybe_volatile; - default: - return false; -diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -index a07dea35b73..5a87c687cf7 100644 ---- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -@@ -765,9 +765,18 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { - __ sd(ProfileInterpreter ? t0 : zr, Address(sp, 6 * wordSize)); - - // Get mirror and store it in the frame as GC root for this Method* -- __ load_mirror(t2, xmethod); -- __ sd(zr, Address(sp, 5 * wordSize)); -- __ sd(t2, Address(sp, 4 * wordSize)); -+#if INCLUDE_SHENANDOAHGC -+ if (UseShenandoahGC) { -+ __ load_mirror(x28, xmethod); -+ __ sd(zr, Address(sp, 5 * wordSize)); -+ __ sd(x28, Address(sp, 4 * wordSize)); -+ } else -+#endif -+ { -+ __ load_mirror(t2, xmethod); -+ __ sd(zr, Address(sp, 5 * wordSize)); -+ __ sd(t2, Address(sp, 4 * wordSize)); -+ } - - __ ld(xcpool, Address(xmethod, Method::const_offset())); - __ ld(xcpool, Address(xcpool, ConstMethod::constants_offset())); - -From d8b14fd5e6455b47cfcb02d13c0c24c74e824570 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 25 Apr 2023 14:42:07 +0800 -Subject: [PATCH 061/140] Revert JDK-8248404: AArch64: Remove uses of long and - unsigned long - ---- - src/hotspot/cpu/riscv/assembler_riscv.hpp | 19 +++++++++++++------ - .../cpu/riscv/macroAssembler_riscv.cpp | 6 ------ - .../cpu/riscv/macroAssembler_riscv.hpp | 13 ++++++++----- - 3 files changed, 21 insertions(+), 17 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp -index 51aa052a0c7..31aeeb9b425 100644 ---- a/src/hotspot/cpu/riscv/assembler_riscv.hpp -+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp -@@ -183,13 +183,20 @@ class Address { - : _base(noreg), _index(noreg), _offset(0), _mode(no_mode), _target(NULL) { } - Address(Register r) - : _base(r), _index(noreg), _offset(0), _mode(base_plus_offset), _target(NULL) { } -- -- template::value)> -- Address(Register r, T o) -- : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) {} -- -+ Address(Register r, int o) -+ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } -+ Address(Register r, long o) -+ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } -+ Address(Register r, long long o) -+ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } -+ Address(Register r, unsigned int o) -+ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } -+ Address(Register r, unsigned long o) -+ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } -+ Address(Register r, unsigned long long o) -+ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } - Address(Register r, ByteSize disp) -- : Address(r, in_bytes(disp)) {} -+ : Address(r, in_bytes(disp)) { } - Address(address target, RelocationHolder const& rspec) - : _base(noreg), - _index(noreg), -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -index 6e4d22db40f..b95f69cfcda 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -@@ -1364,12 +1364,6 @@ void MacroAssembler::mv(Register Rd, Address dest) { - movptr(Rd, dest.target()); - } - --void MacroAssembler::mv(Register Rd, address addr) { -- // Here in case of use with relocation, use fix length instruciton -- // movptr instead of li -- movptr(Rd, addr); --} -- - void MacroAssembler::mv(Register Rd, RegisterOrConstant src) { - if (src.is_register()) { - mv(Rd, src.as_register()); -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -index c1ffa120774..76b2716659b 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -@@ -543,15 +543,18 @@ class MacroAssembler: public Assembler { - } - - // mv -- template::value)> -- inline void mv(Register Rd, T o) { -- li(Rd, (int64_t)o); -- } -+ void mv(Register Rd, address addr) { li(Rd, (int64_t)addr); } -+ -+ inline void mv(Register Rd, int imm64) { li(Rd, (int64_t)imm64); } -+ inline void mv(Register Rd, long imm64) { li(Rd, (int64_t)imm64); } -+ inline void mv(Register Rd, long long imm64) { li(Rd, (int64_t)imm64); } -+ inline void mv(Register Rd, unsigned int imm64) { li(Rd, (int64_t)imm64); } -+ inline void mv(Register Rd, unsigned long imm64) { li(Rd, (int64_t)imm64); } -+ inline void mv(Register Rd, unsigned long long imm64) { li(Rd, (int64_t)imm64); } - - inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); } - - void mv(Register Rd, Address dest); -- void mv(Register Rd, address addr); - void mv(Register Rd, RegisterOrConstant src); - - // logic - -From 94c1c9c01e61d0cb7c32596ef19b347c32406546 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 25 Apr 2023 16:54:36 +0800 -Subject: [PATCH 062/140] Revert JDK-8280503: Use allStatic.hpp instead of - allocation.hpp where possible - ---- - src/hotspot/cpu/riscv/bytes_riscv.hpp | 2 -- - src/hotspot/cpu/riscv/jniTypes_riscv.hpp | 1 - - 2 files changed, 3 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/bytes_riscv.hpp b/src/hotspot/cpu/riscv/bytes_riscv.hpp -index 23d982f9abd..f60e0e38ae8 100644 ---- a/src/hotspot/cpu/riscv/bytes_riscv.hpp -+++ b/src/hotspot/cpu/riscv/bytes_riscv.hpp -@@ -27,8 +27,6 @@ - #ifndef CPU_RISCV_BYTES_RISCV_HPP - #define CPU_RISCV_BYTES_RISCV_HPP - --#include "memory/allStatic.hpp" -- - class Bytes: AllStatic { - public: - // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering -diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp -index 83ffcc55d83..bc4e5758256 100644 ---- a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp -+++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp -@@ -27,7 +27,6 @@ - #define CPU_RISCV_JNITYPES_RISCV_HPP - - #include "jni.h" --#include "memory/allStatic.hpp" - #include "oops/oop.hpp" - - // This file holds platform-dependent routines used to write primitive jni - -From 49e6399009b51edafa6904164528e1d051aeae6c Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 25 Apr 2023 17:07:31 +0800 -Subject: [PATCH 063/140] Revert JDK-8276453: Undefined behavior in C1 - LIR_OprDesc causes SEGV in fastdebug build - ---- - src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp | 4 ++-- - src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp | 4 ++-- - src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp | 1 + - 3 files changed, 5 insertions(+), 4 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp -index af7bd067f33..6057d43296b 100644 ---- a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp -@@ -58,7 +58,7 @@ RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array) - } - - RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index) -- : _index(index), _array(), _throw_index_out_of_bounds_exception(true) { -+ : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) { - assert(info != NULL, "must have info"); - _info = new CodeEmitInfo(info); - } -@@ -83,7 +83,7 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) { - if (_throw_index_out_of_bounds_exception) { - stub_id = Runtime1::throw_index_exception_id; - } else { -- assert(_array != LIR_Opr::nullOpr(), "sanity"); -+ assert(_array != NULL, "sanity"); - __ mv(t1, _array->as_pointer_register()); - stub_id = Runtime1::throw_range_check_failed_id; - } -diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp -index 172031941b2..1f8b2b55100 100644 ---- a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp -@@ -156,8 +156,8 @@ LIR_Opr FrameMap::long11_opr; - LIR_Opr FrameMap::fpu10_float_opr; - LIR_Opr FrameMap::fpu10_double_opr; - --LIR_Opr FrameMap::_caller_save_cpu_regs[] = {}; --LIR_Opr FrameMap::_caller_save_fpu_regs[] = {}; -+LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, }; -+LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, }; - - //-------------------------------------------------------- - // FrameMap -diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp -index c45a75b2301..227e7664225 100644 ---- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp -@@ -206,6 +206,7 @@ LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) { - break; - default: - ShouldNotReachHere(); -+ r = NULL; - } - return r; - } - -From b94bda9d1a2c12fa379f8fe813460c498344f543 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 25 Apr 2023 17:19:19 +0800 -Subject: [PATCH 064/140] Revert JDK-8256205: Simplify compiler calling - convention handling - ---- - src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp | 2 +- - src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp | 2 +- - src/hotspot/cpu/riscv/riscv.ad | 25 +++++++++++++++++++ - src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 11 ++------ - 4 files changed, 29 insertions(+), 11 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp -index 6057d43296b..12980c12de6 100644 ---- a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp -@@ -290,7 +290,7 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce) { - const int args_num = 5; - VMRegPair args[args_num]; - BasicType signature[args_num] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT }; -- SharedRuntime::java_calling_convention(signature, args, args_num); -+ SharedRuntime::java_calling_convention(signature, args, args_num, true); - - // push parameters - Register r[args_num]; -diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp -index 1f8b2b55100..682ebe82627 100644 ---- a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp -@@ -314,7 +314,7 @@ void FrameMap::initialize() { - - VMRegPair regs; - BasicType sig_bt = T_OBJECT; -- SharedRuntime::java_calling_convention(&sig_bt, ®s, 1); -+ SharedRuntime::java_calling_convention(&sig_bt, ®s, 1, true); - receiver_opr = as_oop_opr(regs.first()->as_Register()); - - for (i = 0; i < nof_caller_save_fpu_regs; i++) { -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index a6061de7a33..1667994699f 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -2443,6 +2443,12 @@ frame %{ - // Stack alignment requirement - stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes) - -+ // Number of stack slots between incoming argument block and the start of -+ // a new frame. The PROLOG must add this many slots to the stack. The -+ // EPILOG must remove this many slots. RISC-V needs two slots for -+ // return address and fp. -+ in_preserve_stack_slots(2 * VMRegImpl::slots_per_word); -+ - // Number of outgoing stack slots killed above the out_preserve_stack_slots - // for calls to C. Supports the var-args backing area for register parms. - varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes / BytesPerInt); -@@ -2461,6 +2467,25 @@ frame %{ - Compile::current()->fixed_slots()), - stack_alignment_in_slots())); - -+ // Body of function which returns an integer array locating -+ // arguments either in registers or in stack slots. Passed an array -+ // of ideal registers called "sig" and a "length" count. Stack-slot -+ // offsets are based on outgoing arguments, i.e. a CALLER setting up -+ // arguments for a CALLEE. Incoming stack arguments are -+ // automatically biased by the preserve_stack_slots field above. -+ -+ calling_convention -+ %{ -+ // No difference between ingoing/outgoing just pass false -+ SharedRuntime::java_calling_convention(sig_bt, regs, length, false); -+ %} -+ -+ c_calling_convention -+ %{ -+ // This is obviously always outgoing -+ (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length); -+ %} -+ - // Location of compiled Java return values. Same as C for now. - return_value - %{ -diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -index eaefcc2b595..411bddd2ace 100644 ---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -@@ -233,7 +233,8 @@ static int reg2offset_out(VMReg r) { - - int SharedRuntime::java_calling_convention(const BasicType *sig_bt, - VMRegPair *regs, -- int total_args_passed) { -+ int total_args_passed, -+ int is_outgoing) { - // Create the mapping between argument positions and - // registers. - static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = { -@@ -2155,14 +2156,6 @@ void SharedRuntime::generate_deopt_blob() { - _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); - } - --// Number of stack slots between incoming argument block and the start of --// a new frame. The PROLOG must add this many slots to the stack. The --// EPILOG must remove this many slots. --// RISCV needs two words for RA (return address) and FP (frame pointer). --uint SharedRuntime::in_preserve_stack_slots() { -- return 2 * VMRegImpl::slots_per_word; --} -- - uint SharedRuntime::out_preserve_stack_slots() { - return 0; - } - -From 3fc948472c4a0918b967646b45c8886103b839d2 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 25 Apr 2023 17:27:57 +0800 -Subject: [PATCH 065/140] Revert JDK-8183574: Unify the is_power_of_2 functions - ---- - src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp | 4 ++-- - src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp | 1 - - src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp | 3 +-- - src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp | 1 - - src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 1 - - src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 1 - - src/hotspot/cpu/riscv/macroAssembler_riscv.hpp | 1 - - src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 1 - - src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp | 1 - - src/hotspot/cpu/riscv/templateTable_riscv.cpp | 1 - - 10 files changed, 3 insertions(+), 12 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp -index 4c1c13dc290..65d0eda62ef 100644 ---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp -@@ -190,7 +190,7 @@ void LIR_Assembler::arith_op_double_cpu(LIR_Code code, LIR_Opr left, LIR_Opr rig - code == lir_add ? __ add(dreg, lreg_lo, c) : __ sub(dreg, lreg_lo, c); - break; - case lir_div: -- assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); -+ assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant"); - if (c == 1) { - // move lreg_lo to dreg if divisor is 1 - __ mv(dreg, lreg_lo); -@@ -208,7 +208,7 @@ void LIR_Assembler::arith_op_double_cpu(LIR_Code code, LIR_Opr left, LIR_Opr rig - } - break; - case lir_rem: -- assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); -+ assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant"); - if (c == 1) { - // move 0 to dreg if divisor is 1 - __ mv(dreg, zr); -diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -index 90c4af5d3b0..9de89a3b026 100644 ---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -@@ -41,7 +41,6 @@ - #include "oops/objArrayKlass.hpp" - #include "runtime/frame.inline.hpp" - #include "runtime/sharedRuntime.hpp" --#include "utilities/powerOfTwo.hpp" - #include "vmreg_riscv.inline.hpp" - - #ifndef PRODUCT -diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp -index 227e7664225..a9345158749 100644 ---- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp -@@ -38,7 +38,6 @@ - #include "ci/ciTypeArrayKlass.hpp" - #include "runtime/sharedRuntime.hpp" - #include "runtime/stubRoutines.hpp" --#include "utilities/powerOfTwo.hpp" - #include "vmreg_riscv.inline.hpp" - - #ifdef ASSERT -@@ -383,7 +382,7 @@ void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) { - // no need to do div-by-zero check if the divisor is a non-zero constant - if (c != 0) { need_zero_check = false; } - // do not load right if the divisor is a power-of-2 constant -- if (c > 0 && is_power_of_2(c)) { -+ if (c > 0 && is_power_of_2_long(c)) { - right.dont_load_item(); - } else { - right.load_item(); -diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp -index 1f45fba9de0..fc88d5c180e 100644 ---- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp -@@ -46,7 +46,6 @@ - #include "runtime/stubRoutines.hpp" - #include "runtime/vframe.hpp" - #include "runtime/vframeArray.hpp" --#include "utilities/powerOfTwo.hpp" - #include "vmreg_riscv.inline.hpp" - - -diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -index f0c249f0d26..2fc0b00e2cb 100644 ---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -@@ -44,7 +44,6 @@ - #include "runtime/safepointMechanism.hpp" - #include "runtime/sharedRuntime.hpp" - #include "runtime/thread.inline.hpp" --#include "utilities/powerOfTwo.hpp" - - void InterpreterMacroAssembler::narrow(Register result) { - // Get method->_constMethod->_result_type -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -index b95f69cfcda..41a415ef2cf 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -@@ -47,7 +47,6 @@ - #include "runtime/sharedRuntime.hpp" - #include "runtime/stubRoutines.hpp" - #include "runtime/thread.hpp" --#include "utilities/powerOfTwo.hpp" - #ifdef COMPILER2 - #include "opto/compile.hpp" - #include "opto/node.hpp" -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -index 76b2716659b..dd39f67d507 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -@@ -30,7 +30,6 @@ - #include "asm/assembler.hpp" - #include "metaprogramming/enableIf.hpp" - #include "oops/compressedOops.hpp" --#include "utilities/powerOfTwo.hpp" - - // MacroAssembler extends Assembler by frequently used macros. - // -diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp -index 8392b768847..0c5b0e001ee 100644 ---- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp -+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp -@@ -45,7 +45,6 @@ - #include "runtime/stubRoutines.hpp" - #include "runtime/thread.inline.hpp" - #include "utilities/align.hpp" --#include "utilities/powerOfTwo.hpp" - #ifdef COMPILER2 - #include "opto/runtime.hpp" - #endif -diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -index 5a87c687cf7..a10677bf650 100644 ---- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -@@ -51,7 +51,6 @@ - #include "runtime/timer.hpp" - #include "runtime/vframeArray.hpp" - #include "utilities/debug.hpp" --#include "utilities/powerOfTwo.hpp" - #include - - #ifndef PRODUCT -diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp -index 158294f7436..2a92fb9dd49 100644 ---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp -+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp -@@ -44,7 +44,6 @@ - #include "runtime/sharedRuntime.hpp" - #include "runtime/stubRoutines.hpp" - #include "runtime/synchronizer.hpp" --#include "utilities/powerOfTwo.hpp" - - #define __ _masm-> - - -From 31b18aa6a29b83e2cae7ea76c5d4759b2596eca0 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 25 Apr 2023 17:34:39 +0800 -Subject: [PATCH 066/140] Revert JDK-8276976: Rename LIR_OprDesc to LIR_Opr - ---- - src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp | 2 +- - src/hotspot/cpu/riscv/c1_LIR_riscv.cpp | 14 +++++++------- - 2 files changed, 8 insertions(+), 8 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -index 9de89a3b026..70ee6295bfb 100644 ---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -@@ -1261,7 +1261,7 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { - assert(op->addr()->is_address(), "what else?"); - LIR_Address* addr_ptr = op->addr()->as_address_ptr(); - assert(addr_ptr->disp() == 0, "need 0 disp"); -- assert(addr_ptr->index() == LIR_Opr::illegalOpr(), "need 0 index"); -+ assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index"); - addr = as_reg(addr_ptr->base()); - } - Register newval = as_reg(op->new_value()); -diff --git a/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp -index 5f1c394ab3d..0317ed9003e 100644 ---- a/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp -@@ -27,22 +27,22 @@ - #include "asm/register.hpp" - #include "c1/c1_LIR.hpp" - --FloatRegister LIR_Opr::as_float_reg() const { -+FloatRegister LIR_OprDesc::as_float_reg() const { - return as_FloatRegister(fpu_regnr()); - } - --FloatRegister LIR_Opr::as_double_reg() const { -+FloatRegister LIR_OprDesc::as_double_reg() const { - return as_FloatRegister(fpu_regnrLo()); - } - - // Reg2 unused. - LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) { - assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform"); -- return (LIR_Opr)(intptr_t)((reg1 << LIR_Opr::reg1_shift) | -- (reg1 << LIR_Opr::reg2_shift) | -- LIR_Opr::double_type | -- LIR_Opr::fpu_register | -- LIR_Opr::double_size); -+ return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) | -+ (reg1 << LIR_OprDesc::reg2_shift) | -+ LIR_OprDesc::double_type | -+ LIR_OprDesc::fpu_register | -+ LIR_OprDesc::double_size); - } - - #ifndef PRODUCT - -From 2e64fa47eddc271d32b136ace4f062cfb9648b25 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 25 Apr 2023 17:39:16 +0800 -Subject: [PATCH 067/140] Revert JDK-8269672: C1: Remove unaligned move on all - architectures - ---- - src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp | 8 +++++--- - .../cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp | 2 +- - 2 files changed, 6 insertions(+), 4 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -index 70ee6295bfb..e29c0df5f8b 100644 ---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -@@ -673,7 +673,7 @@ void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool po - } - } - --void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide) { -+void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) { - LIR_Address* to_addr = dest->as_address_ptr(); - // t0 was used as tmp reg in as_Address, so we use t1 as compressed_src - Register compressed_src = t1; -@@ -795,7 +795,7 @@ void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) { - reg2stack(temp, dest, dest->type(), false); - } - --void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide) { -+void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) { - assert(src->is_address(), "should not call otherwise"); - assert(dest->is_register(), "should not call otherwise"); - -@@ -910,11 +910,13 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L - Label done; - move_op(opr2, result, type, lir_patch_none, NULL, - false, // pop_fpu_stack -+ false, // unaligned - false); // wide - __ j(done); - __ bind(label); - move_op(opr1, result, type, lir_patch_none, NULL, - false, // pop_fpu_stack -+ false, // unaligned - false); // wide - __ bind(done); - } -@@ -1866,7 +1868,7 @@ void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* arg - - void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) { - if (dest->is_address() || src->is_address()) { -- move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, /* wide */ false); -+ move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, /*unaligned*/ false, /* wide */ false); - } else { - ShouldNotReachHere(); - } -diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp -index d724876ec3a..bc847388f68 100644 ---- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp -@@ -340,7 +340,7 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier - Register pre_val_reg = stub->pre_val()->as_register(); - - if (stub->do_load()) { -- ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */); -+ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/); - } - __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true); - ce->store_parameter(stub->pre_val()->as_register(), 0); - -From 5f15abe61c700cbf59805530c52e8e558354d552 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 25 Apr 2023 17:54:05 +0800 -Subject: [PATCH 068/140] Revert JDK-8264805: Remove the experimental - Ahead-of-Time Compiler - ---- - src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp | 1 + - src/hotspot/cpu/riscv/compiledIC_riscv.cpp | 4 ++-- - 2 files changed, 3 insertions(+), 2 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp -index 051328c3a8a..5c81f1c704c 100644 ---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp -+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp -@@ -73,6 +73,7 @@ friend class ArrayCopyStub; - // CompiledStaticCall::to_interp_stub_size() (14) + CompiledStaticCall::to_trampoline_stub_size() (1 + 3 + address) - _call_stub_size = 14 * NativeInstruction::instruction_size + - (NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size), -+ _call_aot_stub_size = 0, - // See emit_exception_handler for detail - // verify_not_null_oop + far_call + should_not_reach_here + invalidate_registers(DEBUG_ONLY) - _exception_handler_size = DEBUG_ONLY(584) NOT_DEBUG(548), // or smaller -diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp -index 1cfc92b28fa..a29e5be9dbb 100644 ---- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp -+++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp -@@ -86,7 +86,7 @@ int CompiledStaticCall::reloc_to_interp_stub() { - } - - void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) { -- address stub = find_stub(); -+ address stub = find_stub(false /* is_aot */); - guarantee(stub != NULL, "stub not found"); - - if (TraceICs) { -@@ -138,7 +138,7 @@ void CompiledDirectStaticCall::verify() { - _call->verify_alignment(); - - // Verify stub. -- address stub = find_stub(); -+ address stub = find_stub(false /* is_aot */); - assert(stub != NULL, "no stub found for static call"); - // Creation also verifies the object. - NativeMovConstReg* method_holder - -From 4cfd20c7d163188a1a4e63ffaa19708e15be9d96 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 25 Apr 2023 17:59:45 +0800 -Subject: [PATCH 069/140] Revert JDK-8277417: C1 LIR instruction for load-klass - ---- - .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 30 ++++++++----------- - 1 file changed, 12 insertions(+), 18 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -index e29c0df5f8b..49653d04d81 100644 ---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -@@ -840,7 +840,14 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch - __ ld(dest->as_register(), as_Address(from_addr)); - break; - case T_ADDRESS: -- __ ld(dest->as_register(), as_Address(from_addr)); -+ // FIXME: OMG this is a horrible kludge. Any offset from an -+ // address that matches klass_offset_in_bytes() will be loaded -+ // as a word, not a long. -+ if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) { -+ __ lwu(dest->as_register(), as_Address(from_addr)); -+ } else { -+ __ ld(dest->as_register(), as_Address(from_addr)); -+ } - break; - case T_INT: - __ lw(dest->as_register(), as_Address(from_addr)); -@@ -869,6 +876,10 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch - __ decode_heap_oop(dest->as_register()); - } - __ verify_oop(dest->as_register()); -+ } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) { -+ if (UseCompressedClassPointers) { -+ __ decode_klass_not_null(dest->as_register()); -+ } - } - } - -@@ -1531,23 +1542,6 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) { - __ bind(*op->stub()->continuation()); - } - --void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { -- Register obj = op->obj()->as_pointer_register(); -- Register result = op->result_opr()->as_pointer_register(); -- -- CodeEmitInfo* info = op->info(); -- if (info != NULL) { -- add_debug_info_for_null_check_here(info); -- } -- -- if (UseCompressedClassPointers) { -- __ lwu(result, Address(obj, oopDesc::klass_offset_in_bytes())); -- __ decode_klass_not_null(result); -- } else { -- __ ld(result, Address(obj, oopDesc::klass_offset_in_bytes())); -- } --} -- - void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { - ciMethod* method = op->profiled_method(); - int bci = op->profiled_bci(); - -From eb4de6fc8f9b6192d16343382ebbe4035ce71702 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 25 Apr 2023 18:09:31 +0800 -Subject: [PATCH 070/140] Revert JDK-8245957: Remove unused LIR_OpBranch::type - after SPARC port removal - ---- - src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp -index a9345158749..2aba4f4974f 100644 ---- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp -@@ -393,7 +393,7 @@ void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) { - if (need_zero_check) { - CodeEmitInfo* info = state_for(x); - __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0)); -- __ branch(lir_cond_equal, new DivByZeroStub(info)); -+ __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info)); - } - - rlock_result(x); -@@ -467,7 +467,7 @@ void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) { - if (need_zero_check) { - CodeEmitInfo* info = state_for(x); - __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0)); -- __ branch(lir_cond_equal, new DivByZeroStub(info)); -+ __ branch(lir_cond_equal, T_INT, new DivByZeroStub(info)); - } - - LIR_Opr ill = LIR_OprFact::illegalOpr; -@@ -1055,9 +1055,9 @@ void LIRGenerator::do_If(If* x) { - profile_branch(x, cond); - move_to_phi(x->state()); - if (x->x()->type()->is_float_kind()) { -- __ branch(lir_cond(cond), x->tsux(), x->usux()); -+ __ branch(lir_cond(cond), right->type(), x->tsux(), x->usux()); - } else { -- __ branch(lir_cond(cond), x->tsux()); -+ __ branch(lir_cond(cond), right->type(), x->tsux()); - } - assert(x->default_sux() == x->fsux(), "wrong destination above"); - __ jump(x->default_sux()); - -From d34f25c618982d3ac79e6ab2a47b3a199434d01b Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 25 Apr 2023 18:14:10 +0800 -Subject: [PATCH 071/140] Revert JDK-8266950: Remove vestigial support for - non-strict floating-point execution - ---- - src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp | 4 ++++ - src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp | 7 ++++++- - 2 files changed, 10 insertions(+), 1 deletion(-) - -diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp -index 65d0eda62ef..2a99d49c94b 100644 ---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp -@@ -238,7 +238,9 @@ void LIR_Assembler::arith_op_single_fpu(LIR_Code code, LIR_Opr left, LIR_Opr rig - switch (code) { - case lir_add: __ fadd_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; - case lir_sub: __ fsub_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; -+ case lir_mul_strictfp: // fall through - case lir_mul: __ fmul_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; -+ case lir_div_strictfp: // fall through - case lir_div: __ fdiv_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; - default: - ShouldNotReachHere(); -@@ -251,7 +253,9 @@ void LIR_Assembler::arith_op_double_fpu(LIR_Code code, LIR_Opr left, LIR_Opr rig - switch (code) { - case lir_add: __ fadd_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; - case lir_sub: __ fsub_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; -+ case lir_mul_strictfp: // fall through - case lir_mul: __ fmul_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; -+ case lir_div_strictfp: // fall through - case lir_div: __ fdiv_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; - default: - ShouldNotReachHere(); -diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp -index 2aba4f4974f..21ae066e9ab 100644 ---- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp -@@ -360,7 +360,12 @@ void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) { - right.load_item(); - - LIR_Opr reg = rlock(x); -- arithmetic_op_fpu(x->op(), reg, left.result(), right.result()); -+ LIR_Opr tmp = LIR_OprFact::illegalOpr; -+ if (x->is_strictfp() && (x->op() == Bytecodes::_dmul || x->op() == Bytecodes::_ddiv)) { -+ tmp = new_register(T_DOUBLE); -+ } -+ -+ arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp()); - - set_result(x, round_item(reg)); - } - -From 02c0a84d52417d4aeddbdd10c07df446ee45c5de Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 25 Apr 2023 18:19:51 +0800 -Subject: [PATCH 072/140] Revert JDK-8276217: Harmonize StrictMath intrinsics - handling - ---- - src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp | 6 ++---- - 1 file changed, 2 insertions(+), 4 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp -index 21ae066e9ab..f9242251491 100644 ---- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp -@@ -651,16 +651,14 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { - do_LibmIntrinsic(x); - break; - case vmIntrinsics::_dabs: // fall through -- case vmIntrinsics::_dsqrt: // fall through -- case vmIntrinsics::_dsqrt_strict: { -+ case vmIntrinsics::_dsqrt: { - assert(x->number_of_arguments() == 1, "wrong type"); - LIRItem value(x->argument_at(0), this); - value.load_item(); - LIR_Opr dst = rlock_result(x); - - switch (x->id()) { -- case vmIntrinsics::_dsqrt: // fall through -- case vmIntrinsics::_dsqrt_strict: { -+ case vmIntrinsics::_dsqrt: { - __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr); - break; - } - -From 8dbace163d42cbb41ff49463b34f8971437fe82f Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 25 Apr 2023 18:35:08 +0800 -Subject: [PATCH 073/140] Revert JDK-8276209: Some call sites doesn't pass the - parameter 'size' to SharedRuntime::dtrace_object_alloc(_base) - ---- - src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp | 2 +- - src/hotspot/cpu/riscv/templateTable_riscv.cpp | 2 +- - 2 files changed, 2 insertions(+), 2 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp -index fc88d5c180e..329df2e1ca7 100644 ---- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp -@@ -1186,7 +1186,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { - StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments); - save_live_registers(sasm); - -- __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast(SharedRuntime::dtrace_object_alloc)), c_rarg0); -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), c_rarg0); - - restore_live_registers(sasm); - } -diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp -index 2a92fb9dd49..ddc9498dddc 100644 ---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp -+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp -@@ -3577,7 +3577,7 @@ void TemplateTable::_new() { - SkipIfEqual skip(_masm, &DTraceAllocProbes, false); - // Trigger dtrace event for fastpath - __ push(atos); // save the return value -- __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast(SharedRuntime::dtrace_object_alloc)), x10); -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), x10); - __ pop(atos); // restore the return value - } - __ j(done); - -From 8930b6049a5b6e31ec9409c167b0e58d24cf6821 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 25 Apr 2023 18:38:51 +0800 -Subject: [PATCH 074/140] Revert JDK-8229838: Rename markOop files to markWord - ---- - src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp | 1 - - src/hotspot/cpu/riscv/frame_riscv.cpp | 1 - - src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 1 - - 3 files changed, 3 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp -index e486f41948e..44ceccd8bd1 100644 ---- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp -@@ -33,7 +33,6 @@ - #include "gc/shared/collectedHeap.hpp" - #include "interpreter/interpreter.hpp" - #include "oops/arrayOop.hpp" --#include "oops/markWord.hpp" - #include "runtime/basicLock.hpp" - #include "runtime/biasedLocking.hpp" - #include "runtime/os.hpp" -diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp -index 13c482b610a..050595389e9 100644 ---- a/src/hotspot/cpu/riscv/frame_riscv.cpp -+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp -@@ -29,7 +29,6 @@ - #include "interpreter/interpreter.hpp" - #include "memory/resourceArea.hpp" - #include "memory/universe.hpp" --#include "oops/markWord.hpp" - #include "oops/method.hpp" - #include "oops/oop.inline.hpp" - #include "prims/methodHandles.hpp" -diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -index 2fc0b00e2cb..006fe49b155 100644 ---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -@@ -33,7 +33,6 @@ - #include "interpreter/interpreterRuntime.hpp" - #include "logging/log.hpp" - #include "oops/arrayOop.hpp" --#include "oops/markWord.hpp" - #include "oops/method.hpp" - #include "oops/methodData.hpp" - #include "prims/jvmtiExport.hpp" - -From f11c5a2beca94c8248c30899fef90947d478e10c Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 25 Apr 2023 18:42:33 +0800 -Subject: [PATCH 075/140] Revert JDK-8235673: [C1, C2] Split inlining control - flags - ---- - src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp -index fe46f7b21c8..fd25f8f9afd 100644 ---- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp -+++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp -@@ -42,6 +42,7 @@ define_pd_global(bool, TieredCompilation, false); - define_pd_global(intx, CompileThreshold, 1500 ); - - define_pd_global(intx, OnStackReplacePercentage, 933 ); -+define_pd_global(intx, FreqInlineSize, 325 ); - define_pd_global(intx, NewSizeThreadIncrease, 4*K ); - define_pd_global(intx, InitialCodeCacheSize, 160*K); - define_pd_global(intx, ReservedCodeCacheSize, 32*M ); - -From 6908dc58f2c66ca6a5adf4444a7ec2a91a80b9c8 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 25 Apr 2023 18:45:00 +0800 -Subject: [PATCH 076/140] Revert JDK-8262074: Consolidate the default value of - MetaspaceSize - ---- - src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 1 + - src/hotspot/cpu/riscv/c2_globals_riscv.hpp | 3 +++ - 2 files changed, 4 insertions(+) - -diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp -index fd25f8f9afd..1c55a23eecf 100644 ---- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp -+++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp -@@ -53,6 +53,7 @@ define_pd_global(bool, ProfileInterpreter, false); - define_pd_global(intx, CodeCacheExpansionSize, 32*K ); - define_pd_global(uintx, CodeCacheMinBlockLength, 1); - define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); -+define_pd_global(uintx, MetaspaceSize, 12*M ); - define_pd_global(bool, NeverActAsServerClassMachine, true ); - define_pd_global(uint64_t, MaxRAM, 1ULL*G); - define_pd_global(bool, CICompileOSR, true ); -diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp -index 53a41665f4b..d9e5fcc1bb0 100644 ---- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp -+++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp -@@ -75,6 +75,9 @@ define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); - define_pd_global(uintx, CodeCacheMinBlockLength, 6); - define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); - -+// Heap related flags -+define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M)); -+ - // Ergonomics related flags - define_pd_global(bool, NeverActAsServerClassMachine, false); - - -From a3e991b37781d90c822471b54ace915622bee0da Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 25 Apr 2023 18:48:15 +0800 -Subject: [PATCH 077/140] Revert JDK-8246023: Obsolete LIRFillDelaySlot - ---- - src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp -index 1c55a23eecf..bd8d039de03 100644 ---- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp -+++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp -@@ -60,6 +60,7 @@ define_pd_global(bool, CICompileOSR, true ); - #endif // !COMPILER2 - define_pd_global(bool, UseTypeProfile, false); - -+define_pd_global(bool, LIRFillDelaySlots, false); - define_pd_global(bool, OptimizeSinglePrecision, true ); - define_pd_global(bool, CSEArrayLength, false); - define_pd_global(bool, TwoOperandLIRForm, false); - -From 9f6082ae9810e6a26c6803cb37cce62297d15a74 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 25 Apr 2023 18:50:27 +0800 -Subject: [PATCH 078/140] Revert JDK-8136414: Large performance penalty - declaring a method strictfp on strict-only platforms - ---- - src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp -index bd8d039de03..16a87b7aced 100644 ---- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp -+++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp -@@ -59,6 +59,7 @@ define_pd_global(uint64_t, MaxRAM, 1ULL*G); - define_pd_global(bool, CICompileOSR, true ); - #endif // !COMPILER2 - define_pd_global(bool, UseTypeProfile, false); -+define_pd_global(bool, RoundFPResults, true ); - - define_pd_global(bool, LIRFillDelaySlots, false); - define_pd_global(bool, OptimizeSinglePrecision, true ); - -From fbf03fc61be068f7f7c8ca1ab3854cc05519c5a3 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Tue, 25 Apr 2023 18:58:36 +0800 -Subject: [PATCH 079/140] Revert JDK-8251462: Simplify compilation policy - ---- - src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 4 +- - src/hotspot/cpu/riscv/c2_globals_riscv.hpp | 2 +- - src/hotspot/cpu/riscv/globals_riscv.hpp | 2 +- - .../templateInterpreterGenerator_riscv.cpp | 114 +++++++++--- - src/hotspot/cpu/riscv/templateTable_riscv.cpp | 176 ++++++++++++------ - 5 files changed, 210 insertions(+), 88 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp -index 16a87b7aced..8f2f4e0e81d 100644 ---- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp -+++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp -@@ -32,7 +32,7 @@ - // Sets the default values for platform dependent flags used by the client compiler. - // (see c1_globals.hpp) - --#ifndef COMPILER2 -+#ifndef TIERED - define_pd_global(bool, BackgroundCompilation, true ); - define_pd_global(bool, InlineIntrinsics, true ); - define_pd_global(bool, PreferInterpreterNativeStubs, false); -@@ -57,7 +57,7 @@ define_pd_global(uintx, MetaspaceSize, 12*M ); - define_pd_global(bool, NeverActAsServerClassMachine, true ); - define_pd_global(uint64_t, MaxRAM, 1ULL*G); - define_pd_global(bool, CICompileOSR, true ); --#endif // !COMPILER2 -+#endif // !TIERED - define_pd_global(bool, UseTypeProfile, false); - define_pd_global(bool, RoundFPResults, true ); - -diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp -index d9e5fcc1bb0..6c301cdae04 100644 ---- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp -+++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp -@@ -39,7 +39,7 @@ define_pd_global(bool, PreferInterpreterNativeStubs, false); - define_pd_global(bool, ProfileTraps, true); - define_pd_global(bool, UseOnStackReplacement, true); - define_pd_global(bool, ProfileInterpreter, true); --define_pd_global(bool, TieredCompilation, COMPILER1_PRESENT(true) NOT_COMPILER1(false)); -+define_pd_global(bool, TieredCompilation, trueInTiered); - define_pd_global(intx, CompileThreshold, 10000); - - define_pd_global(intx, OnStackReplacePercentage, 140); -diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp -index 50bbb6a77b8..b78f258a764 100644 ---- a/src/hotspot/cpu/riscv/globals_riscv.hpp -+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp -@@ -36,7 +36,7 @@ define_pd_global(bool, ImplicitNullChecks, true); // Generate code for im - define_pd_global(bool, TrapBasedNullChecks, false); - define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs past to check cast - --define_pd_global(uintx, CodeCacheSegmentSize, 64 COMPILER1_AND_COMPILER2_PRESENT(+64)); // Tiered compilation has large code-entry alignment. -+define_pd_global(uintx, CodeCacheSegmentSize, 64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment. - define_pd_global(intx, CodeEntryAlignment, 64); - define_pd_global(intx, OptoLoopAlignment, 16); - -diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -index a10677bf650..8aea4eca048 100644 ---- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -@@ -556,31 +556,81 @@ address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state, - // - // xmethod: method - // --void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow) { -+void TemplateInterpreterGenerator::generate_counter_incr( -+ Label* overflow, -+ Label* profile_method, -+ Label* profile_method_continue) { - Label done; - // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not. -- int increment = InvocationCounter::count_increment; -- Label no_mdo; -- if (ProfileInterpreter) { -- // Are we profiling? -- __ ld(x10, Address(xmethod, Method::method_data_offset())); -- __ beqz(x10, no_mdo); -- // Increment counter in the MDO -- const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) + -- in_bytes(InvocationCounter::counter_offset())); -- const Address mask(x10, in_bytes(MethodData::invoke_mask_offset())); -- __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow); -- __ j(done); -+ if (TieredCompilation) { -+ int increment = InvocationCounter::count_increment; -+ Label no_mdo; -+ if (ProfileInterpreter) { -+ // Are we profiling? -+ __ ld(x10, Address(xmethod, Method::method_data_offset())); -+ __ beqz(x10, no_mdo); -+ // Increment counter in the MDO -+ const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) + -+ in_bytes(InvocationCounter::counter_offset())); -+ const Address mask(x10, in_bytes(MethodData::invoke_mask_offset())); -+ __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow); -+ __ j(done); -+ } -+ __ bind(no_mdo); -+ // Increment counter in MethodCounters -+ const Address invocation_counter(t1, -+ MethodCounters::invocation_counter_offset() + -+ InvocationCounter::counter_offset()); -+ __ get_method_counters(xmethod, t1, done); -+ const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset())); -+ __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow); -+ __ bind(done); -+ } else { // not TieredCompilation -+ const Address backedge_counter(t1, -+ MethodCounters::backedge_counter_offset() + -+ InvocationCounter::counter_offset()); -+ const Address invocation_counter(t1, -+ MethodCounters::invocation_counter_offset() + -+ InvocationCounter::counter_offset()); -+ -+ __ get_method_counters(xmethod, t1, done); -+ -+ if (ProfileInterpreter) { // %%% Merge this into MethodData* -+ __ lwu(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset())); -+ __ addw(x11, x11, 1); -+ __ sw(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset())); -+ } -+ // Update standard invocation counters -+ __ lwu(x11, invocation_counter); -+ __ lwu(x10, backedge_counter); -+ -+ __ addw(x11, x11, InvocationCounter::count_increment); -+ __ andi(x10, x10, InvocationCounter::count_mask_value); -+ -+ __ sw(x11, invocation_counter); -+ __ addw(x10, x10, x11); // add both counters -+ -+ // profile_method is non-null only for interpreted method so -+ // profile_method != NULL == !native_call -+ -+ if (ProfileInterpreter && profile_method != NULL) { -+ // Test to see if we should create a method data oop -+ __ ld(t1, Address(xmethod, Method::method_counters_offset())); -+ __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset()))); -+ __ blt(x10, t1, *profile_method_continue); -+ -+ // if no method data exists, go to profile_method -+ __ test_method_data_pointer(t1, *profile_method); -+ } -+ -+ { -+ __ ld(t1, Address(xmethod, Method::method_counters_offset())); -+ __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_invocation_limit_offset()))); -+ __ bltu(x10, t1, done); -+ __ j(*overflow); -+ } -+ __ bind(done); - } -- __ bind(no_mdo); -- // Increment counter in MethodCounters -- const Address invocation_counter(t1, -- MethodCounters::invocation_counter_offset() + -- InvocationCounter::counter_offset()); -- __ get_method_counters(xmethod, t1, done); -- const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset())); -- __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow); -- __ bind(done); - } - - void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) { -@@ -977,7 +1027,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { - // increment invocation count & check for overflow - Label invocation_counter_overflow; - if (inc_counter) { -- generate_counter_incr(&invocation_counter_overflow); -+ generate_counter_incr(&invocation_counter_overflow, NULL, NULL); - } - - Label continue_after_compile; -@@ -1389,8 +1439,15 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { - - // increment invocation count & check for overflow - Label invocation_counter_overflow; -+ Label profile_method; -+ Label profile_method_continue; - if (inc_counter) { -- generate_counter_incr(&invocation_counter_overflow); -+ generate_counter_incr(&invocation_counter_overflow, -+ &profile_method, -+ &profile_method_continue); -+ if (ProfileInterpreter) { -+ __ bind(profile_method_continue); -+ } - } - - Label continue_after_compile; -@@ -1427,6 +1484,15 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { - - // invocation counter overflow - if (inc_counter) { -+ if (ProfileInterpreter) { -+ // We have decided to profile this method in the interpreter -+ __ bind(profile_method); -+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); -+ __ set_method_data_pointer_for_bcp(); -+ // don't think we need this -+ __ get_method(x11); -+ __ j(profile_method_continue); -+ } - // Handle overflow of counter and compile method - __ bind(invocation_counter_overflow); - generate_counter_overflow(continue_after_compile); -diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp -index ddc9498dddc..bb20f228447 100644 ---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp -+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp -@@ -1745,6 +1745,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) - assert(UseLoopCounter || !UseOnStackReplacement, - "on-stack-replacement requires loop counters"); - Label backedge_counter_overflow; -+ Label profile_method; - Label dispatch; - if (UseLoopCounter) { - // increment backedge counter for backward branches -@@ -1769,31 +1770,75 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) - __ beqz(t0, dispatch); // No MethodCounters allocated, OutOfMemory - __ bind(has_counters); - -- Label no_mdo; -- int increment = InvocationCounter::count_increment; -- if (ProfileInterpreter) { -- // Are we profiling? -- __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset()))); -- __ beqz(x11, no_mdo); -- // Increment the MDO backedge counter -- const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) + -- in_bytes(InvocationCounter::counter_offset())); -- const Address mask(x11, in_bytes(MethodData::backedge_mask_offset())); -- __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, -- x10, t0, false, -+ if (TieredCompilation) { -+ Label no_mdo; -+ int increment = InvocationCounter::count_increment; -+ if (ProfileInterpreter) { -+ // Are we profiling? -+ __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset()))); -+ __ beqz(x11, no_mdo); -+ // Increment the MDO backedge counter -+ const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) + -+ in_bytes(InvocationCounter::counter_offset())); -+ const Address mask(x11, in_bytes(MethodData::backedge_mask_offset())); -+ __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, -+ x10, t0, false, -+ UseOnStackReplacement ? &backedge_counter_overflow : &dispatch); -+ __ j(dispatch); -+ } -+ __ bind(no_mdo); -+ // Increment backedge counter in MethodCounters* -+ __ ld(t0, Address(xmethod, Method::method_counters_offset())); -+ const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset())); -+ __ increment_mask_and_jump(Address(t0, be_offset), increment, mask, -+ x10, t1, false, - UseOnStackReplacement ? &backedge_counter_overflow : &dispatch); -- __ j(dispatch); -+ } else { // not TieredCompilation -+ // increment counter -+ __ ld(t1, Address(xmethod, Method::method_counters_offset())); -+ __ lwu(x10, Address(t1, be_offset)); // load backedge counter -+ __ addw(t0, x10, InvocationCounter::count_increment); // increment counter -+ __ sw(t0, Address(t1, be_offset)); // store counter -+ -+ __ lwu(x10, Address(t1, inv_offset)); // load invocation counter -+ __ andi(x10, x10, (unsigned)InvocationCounter::count_mask_value, x13); // and the status bits -+ __ addw(x10, x10, t0); // add both counters -+ -+ if (ProfileInterpreter) { -+ // Test to see if we should create a method data oop -+ __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset()))); -+ __ blt(x10, t0, dispatch); -+ -+ // if no method data exists, go to profile method -+ __ test_method_data_pointer(x10, profile_method); -+ -+ if (UseOnStackReplacement) { -+ // check for overflow against x11 which is the MDO taken count -+ __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset()))); -+ __ bltu(x11, t0, dispatch); // Intel == Assembler::below, lo:unsigned lower -+ -+ // When ProfileInterpreter is on, the backedge_count comes -+ // from the MethodData*, which value does not get reset on -+ // the call to frequency_counter_overflow(). To avoid -+ // excessive calls to the overflow routine while the method is -+ // being compiled, add a second test to make sure the overflow -+ // function is called only once every overflow_frequency. -+ const int overflow_frequency = 1024; -+ __ andi(x11, x11, overflow_frequency - 1); -+ __ beqz(x11, backedge_counter_overflow); -+ -+ } -+ } else { -+ if (UseOnStackReplacement) { -+ // check for overflow against x10, which is the sum of the -+ // counters -+ __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset()))); -+ __ bgeu(x10, t0, backedge_counter_overflow); // Intel == Assembler::aboveEqual -+ } -+ } - } -- __ bind(no_mdo); -- // Increment backedge counter in MethodCounters* -- __ ld(t0, Address(xmethod, Method::method_counters_offset())); -- const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset())); -- __ increment_mask_and_jump(Address(t0, be_offset), increment, mask, -- x10, t1, false, -- UseOnStackReplacement ? &backedge_counter_overflow : &dispatch); - __ bind(dispatch); - } -- - // Pre-load the next target bytecode into t0 - __ load_unsigned_byte(t0, Address(xbcp, 0)); - -@@ -1802,52 +1847,63 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) - // xbcp: target bcp - __ dispatch_only(vtos, /*generate_poll*/true); - -- if (UseLoopCounter && UseOnStackReplacement) { -- // invocation counter overflow -- __ bind(backedge_counter_overflow); -- __ neg(x12, x12); -- __ add(x12, x12, xbcp); // branch xbcp -- // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp) -- __ call_VM(noreg, -- CAST_FROM_FN_PTR(address, -- InterpreterRuntime::frequency_counter_overflow), -- x12); -- __ load_unsigned_byte(x11, Address(xbcp, 0)); // restore target bytecode -- -- // x10: osr nmethod (osr ok) or NULL (osr not possible) -- // w11: target bytecode -- // x12: temporary -- __ beqz(x10, dispatch); // test result -- no osr if null -- // nmethod may have been invalidated (VM may block upon call_VM return) -- __ lbu(x12, Address(x10, nmethod::state_offset())); -- if (nmethod::in_use != 0) { -- __ sub(x12, x12, nmethod::in_use); -+ if (UseLoopCounter) { -+ if (ProfileInterpreter && !TieredCompilation) { -+ // Out-of-line code to allocate method data oop. -+ __ bind(profile_method); -+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); -+ __ load_unsigned_byte(x11, Address(xbcp, 0)); // restore target bytecode -+ __ set_method_data_pointer_for_bcp(); -+ __ j(dispatch); - } -- __ bnez(x12, dispatch); - -- // We have the address of an on stack replacement routine in x10 -- // We need to prepare to execute the OSR method. First we must -- // migrate the locals and monitors off of the stack. -+ if (UseOnStackReplacement) { -+ // invocation counter overflow -+ __ bind(backedge_counter_overflow); -+ __ neg(x12, x12); -+ __ add(x12, x12, xbcp); // branch xbcp -+ // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp) -+ __ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::frequency_counter_overflow), -+ x12); -+ __ load_unsigned_byte(x11, Address(xbcp, 0)); // restore target bytecode -+ -+ // x10: osr nmethod (osr ok) or NULL (osr not possible) -+ // w11: target bytecode -+ // x12: temporary -+ __ beqz(x10, dispatch); // test result -- no osr if null -+ // nmethod may have been invalidated (VM may block upon call_VM return) -+ __ lbu(x12, Address(x10, nmethod::state_offset())); -+ if (nmethod::in_use != 0) { -+ __ sub(x12, x12, nmethod::in_use); -+ } -+ __ bnez(x12, dispatch); -+ -+ // We have the address of an on stack replacement routine in x10 -+ // We need to prepare to execute the OSR method. First we must -+ // migrate the locals and monitors off of the stack. - -- __ mv(x9, x10); // save the nmethod -+ __ mv(x9, x10); // save the nmethod - -- call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); -+ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); - -- // x10 is OSR buffer, move it to expected parameter location -- __ mv(j_rarg0, x10); -+ // x10 is OSR buffer, move it to expected parameter location -+ __ mv(j_rarg0, x10); - -- // remove activation -- // get sender esp -- __ ld(esp, -- Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); -- // remove frame anchor -- __ leave(); -- // Ensure compiled code always sees stack at proper alignment -- __ andi(sp, esp, -16); -+ // remove activation -+ // get sender esp -+ __ ld(esp, -+ Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); -+ // remove frame anchor -+ __ leave(); -+ // Ensure compiled code always sees stack at proper alignment -+ __ andi(sp, esp, -16); - -- // and begin the OSR nmethod -- __ ld(t0, Address(x9, nmethod::osr_entry_point_offset())); -- __ jr(t0); -+ // and begin the OSR nmethod -+ __ ld(t0, Address(x9, nmethod::osr_entry_point_offset())); -+ __ jr(t0); -+ } - } - } - - -From b1f3fd0510681324d70028443a3532d6084be504 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Thu, 27 Apr 2023 11:37:05 +0800 -Subject: [PATCH 080/140] Revert JDK-8250902: Implement MD5 Intrinsics on x86 - ---- - src/hotspot/cpu/riscv/vm_version_riscv.cpp | 5 ---- - ...nericTestCaseForUnsupportedRISCV64CPU.java | 30 +++++++++---------- - 2 files changed, 15 insertions(+), 20 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp -index c0491d23fa6..d4b79162d84 100644 ---- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp -+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp -@@ -97,11 +97,6 @@ void VM_Version::initialize() { - FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); - } - -- if (UseMD5Intrinsics) { -- warning("MD5 intrinsics are not available on this CPU."); -- FLAG_SET_DEFAULT(UseMD5Intrinsics, false); -- } -- - if (UseRVV) { - if (!(_features & CPU_V)) { - warning("RVV is not supported on this CPU"); -diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java -index 2ecfec07a4c..8566d57c391 100644 ---- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java -+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java -@@ -24,7 +24,7 @@ - - package compiler.intrinsics.sha.cli.testcases; - --import compiler.intrinsics.sha.cli.DigestOptionsBase; -+import compiler.intrinsics.sha.cli.SHAOptionsBase; - import jdk.test.lib.process.ExitCode; - import jdk.test.lib.Platform; - import jdk.test.lib.cli.CommandLineOptionTest; -@@ -36,7 +36,7 @@ - * which don't support instruction required by the tested option. - */ - public class GenericTestCaseForUnsupportedRISCV64CPU extends -- DigestOptionsBase.TestCase { -+ SHAOptionsBase.TestCase { - - final private boolean checkUseSHA; - -@@ -46,7 +46,7 @@ public GenericTestCaseForUnsupportedRISCV64CPU(String optionName) { - - public GenericTestCaseForUnsupportedRISCV64CPU(String optionName, boolean checkUseSHA) { - super(optionName, new AndPredicate(Platform::isRISCV64, -- new NotPredicate(DigestOptionsBase.getPredicateForOption( -+ new NotPredicate(SHAOptionsBase.getPredicateForOption( - optionName)))); - - this.checkUseSHA = checkUseSHA; -@@ -58,27 +58,27 @@ protected void verifyWarnings() throws Throwable { - + "option '-XX:-%s' without any warnings", optionName); - //Verify that option could be disabled without any warnings. - CommandLineOptionTest.verifySameJVMStartup(null, new String[] { -- DigestOptionsBase.getWarningForUnsupportedCPU(optionName) -+ SHAOptionsBase.getWarningForUnsupportedCPU(optionName) - }, shouldPassMessage, shouldPassMessage, ExitCode.OK, -- DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, -+ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, - CommandLineOptionTest.prepareBooleanFlag(optionName, false)); - - if (checkUseSHA) { - shouldPassMessage = String.format("If JVM is started with '-XX:-" - + "%s' '-XX:+%s', output should contain warning.", -- DigestOptionsBase.USE_SHA_OPTION, optionName); -+ SHAOptionsBase.USE_SHA_OPTION, optionName); - - // Verify that when the tested option is enabled, then - // a warning will occur in VM output if UseSHA is disabled. -- if (!optionName.equals(DigestOptionsBase.USE_SHA_OPTION)) { -+ if (!optionName.equals(SHAOptionsBase.USE_SHA_OPTION)) { - CommandLineOptionTest.verifySameJVMStartup( -- new String[] { DigestOptionsBase.getWarningForUnsupportedCPU(optionName) }, -+ new String[] { SHAOptionsBase.getWarningForUnsupportedCPU(optionName) }, - null, - shouldPassMessage, - shouldPassMessage, - ExitCode.OK, -- DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, -- CommandLineOptionTest.prepareBooleanFlag(DigestOptionsBase.USE_SHA_OPTION, false), -+ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, -+ CommandLineOptionTest.prepareBooleanFlag(SHAOptionsBase.USE_SHA_OPTION, false), - CommandLineOptionTest.prepareBooleanFlag(optionName, true)); - } - } -@@ -90,7 +90,7 @@ protected void verifyOptionValues() throws Throwable { - CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", - String.format("Option '%s' should be disabled by default", - optionName), -- DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS); -+ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS); - - if (checkUseSHA) { - // Verify that option is disabled even if it was explicitly enabled -@@ -98,7 +98,7 @@ protected void verifyOptionValues() throws Throwable { - CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", - String.format("Option '%s' should be off on unsupported " - + "RISCV64CPU even if set to true directly", optionName), -- DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, -+ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, - CommandLineOptionTest.prepareBooleanFlag(optionName, true)); - - // Verify that option is disabled when +UseSHA was passed to JVM. -@@ -106,10 +106,10 @@ protected void verifyOptionValues() throws Throwable { - String.format("Option '%s' should be off on unsupported " - + "RISCV64CPU even if %s flag set to JVM", - optionName, CommandLineOptionTest.prepareBooleanFlag( -- DigestOptionsBase.USE_SHA_OPTION, true)), -- DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, -+ SHAOptionsBase.USE_SHA_OPTION, true)), -+ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, - CommandLineOptionTest.prepareBooleanFlag( -- DigestOptionsBase.USE_SHA_OPTION, true)); -+ SHAOptionsBase.USE_SHA_OPTION, true)); - } - } - } - -From b5e96cb7663b2def3a064b9aede7209fb0c5eeda Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Thu, 27 Apr 2023 15:41:48 +0800 -Subject: [PATCH 081/140] Revert JDK-8253555: Make ByteSize and WordSize typed - scoped enums - ---- - src/hotspot/cpu/riscv/assembler_riscv.hpp | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp -index 31aeeb9b425..9959ac1d02c 100644 ---- a/src/hotspot/cpu/riscv/assembler_riscv.hpp -+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp -@@ -195,8 +195,10 @@ class Address { - : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } - Address(Register r, unsigned long long o) - : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } -+#ifdef ASSERT - Address(Register r, ByteSize disp) -- : Address(r, in_bytes(disp)) { } -+ : _base(r), _index(noreg), _offset(in_bytes(disp)), _mode(base_plus_offset), _target(0) { } -+#endif - Address(address target, RelocationHolder const& rspec) - : _base(noreg), - _index(noreg), - -From 592afab705a4d4c8b2773a0808e47efc2a14517d Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sat, 29 Apr 2023 15:18:12 +0800 -Subject: [PATCH 082/140] Revert JDK-8253457: Remove unimplemented register - stack functions - ---- - .../os_cpu/linux_riscv/thread_linux_riscv.hpp | 16 ++++++++++++++++ - 1 file changed, 16 insertions(+) - -diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp -index 61e2cf85b63..313a7b932c3 100644 ---- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp -+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp -@@ -34,15 +34,31 @@ - frame pd_last_frame(); - - public: -+ -+ void set_base_of_stack_pointer(intptr_t* base_sp) { -+ } -+ - static ByteSize last_Java_fp_offset() { - return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset(); - } - -+ intptr_t* base_of_stack_pointer() { -+ return NULL; -+ } -+ void record_base_of_stack_pointer() { -+ } -+ - bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, - bool isInJava); - - bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava); - private: - bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava); -+public: -+ // These routines are only used on cpu architectures that -+ // have separate register stacks (Itanium). -+ static bool register_stack_overflow() { return false; } -+ static void enable_register_stack_guard() {} -+ static void disable_register_stack_guard() {} - - #endif // OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP - -From 28238cf776bd25c9805d9dd686c08fe8d3a1500b Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sat, 29 Apr 2023 15:22:30 +0800 -Subject: [PATCH 083/140] Revert JDK-8253539: Remove unused JavaThread - functions for set_last_Java_fp/pc - ---- - src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp | 3 +++ - src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp | 3 +++ - 2 files changed, 6 insertions(+) - -diff --git a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp -index 9a6084afa1d..5a0c9b812fc 100644 ---- a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp -+++ b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp -@@ -83,4 +83,7 @@ - - intptr_t* last_Java_fp(void) { return _last_Java_fp; } - -+ // Assert (last_Java_sp == NULL || fp == NULL) -+ void set_last_Java_fp(intptr_t* fp) { OrderAccess::release(); _last_Java_fp = fp; } -+ - #endif // CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP -diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp -index 313a7b932c3..4b91fa855ae 100644 ---- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp -+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp -@@ -34,6 +34,9 @@ - frame pd_last_frame(); - - public: -+ // Mutators are highly dangerous.... -+ intptr_t* last_Java_fp() { return _anchor.last_Java_fp(); } -+ void set_last_Java_fp(intptr_t* fp) { _anchor.set_last_Java_fp(fp); } - - void set_base_of_stack_pointer(intptr_t* base_sp) { - } - -From f9322bb6235b603eac825c6e6751093ada1e6cfe Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sat, 29 Apr 2023 15:45:56 +0800 -Subject: [PATCH 084/140] Revert JDK-8269853: Prefetch::read should accept - pointer to const - ---- - src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp -index a6432c84ec7..2bd48e09c34 100644 ---- a/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp -+++ b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp -@@ -29,7 +29,7 @@ - #include "runtime/prefetch.hpp" - - --inline void Prefetch::read (const void *loc, intx interval) { -+inline void Prefetch::read (void *loc, intx interval) { - } - - inline void Prefetch::write(void *loc, intx interval) { - -From aa6f7320d8d849b8e47b6e77a20257e3d99fd14f Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sat, 29 Apr 2023 16:14:55 +0800 -Subject: [PATCH 085/140] Revert: JDK-8254231: Implementation of Foreign Linker - API (Incubator) JDK-8264774: Implementation of Foreign Function and Memory - API (Incubator) - ---- - .../cpu/riscv/foreign_globals_riscv.cpp | 44 ------------------- - .../cpu/riscv/foreign_globals_riscv.hpp | 32 -------------- - src/hotspot/cpu/riscv/frame_riscv.cpp | 15 ------- - src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 12 +---- - src/hotspot/cpu/riscv/riscv.ad | 5 --- - src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 10 +---- - .../riscv/universalNativeInvoker_riscv.cpp | 33 -------------- - .../cpu/riscv/universalUpcallHandle_riscv.cpp | 42 ------------------ - src/hotspot/cpu/riscv/vmreg_riscv.cpp | 5 --- - 9 files changed, 2 insertions(+), 196 deletions(-) - delete mode 100644 src/hotspot/cpu/riscv/foreign_globals_riscv.cpp - delete mode 100644 src/hotspot/cpu/riscv/foreign_globals_riscv.hpp - delete mode 100644 src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp - delete mode 100644 src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp - -diff --git a/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp b/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp -deleted file mode 100644 -index 5c700be9c91..00000000000 ---- a/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp -+++ /dev/null -@@ -1,44 +0,0 @@ --/* -- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. -- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -- * -- * This code is free software; you can redistribute it and/or modify it -- * under the terms of the GNU General Public License version 2 only, as -- * published by the Free Software Foundation. -- * -- * This code is distributed in the hope that it will be useful, but WITHOUT -- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- * version 2 for more details (a copy is included in the LICENSE file that -- * accompanied this code). -- * -- * You should have received a copy of the GNU General Public License version -- * 2 along with this work; if not, write to the Free Software Foundation, -- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -- * -- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -- * or visit www.oracle.com if you need additional information or have any -- * questions. -- * -- */ -- --#include "precompiled.hpp" --#include "prims/foreign_globals.hpp" --#include "utilities/debug.hpp" -- --// Stubbed out, implement later --const ABIDescriptor ForeignGlobals::parse_abi_descriptor_impl(jobject jabi) const { -- Unimplemented(); -- return {}; --} -- --const BufferLayout ForeignGlobals::parse_buffer_layout_impl(jobject jlayout) const { -- Unimplemented(); -- return {}; --} -- --const CallRegs ForeignGlobals::parse_call_regs_impl(jobject jconv) const { -- ShouldNotCallThis(); -- return {}; --} -diff --git a/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp b/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp -deleted file mode 100644 -index 3ac89752c27..00000000000 ---- a/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp -+++ /dev/null -@@ -1,32 +0,0 @@ --/* -- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. -- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -- * -- * This code is free software; you can redistribute it and/or modify it -- * under the terms of the GNU General Public License version 2 only, as -- * published by the Free Software Foundation. -- * -- * This code is distributed in the hope that it will be useful, but WITHOUT -- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- * version 2 for more details (a copy is included in the LICENSE file that -- * accompanied this code). -- * -- * You should have received a copy of the GNU General Public License version -- * 2 along with this work; if not, write to the Free Software Foundation, -- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -- * -- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -- * or visit www.oracle.com if you need additional information or have any -- * questions. -- * -- */ -- --#ifndef CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP --#define CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP -- --class ABIDescriptor {}; --class BufferLayout {}; -- --#endif // CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp -index 050595389e9..40ec584b994 100644 ---- a/src/hotspot/cpu/riscv/frame_riscv.cpp -+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp -@@ -361,21 +361,6 @@ frame frame::sender_for_entry_frame(RegisterMap* map) const { - return fr; - } - --OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const { -- ShouldNotCallThis(); -- return nullptr; --} -- --bool frame::optimized_entry_frame_is_first() const { -- ShouldNotCallThis(); -- return false; --} -- --frame frame::sender_for_optimized_entry_frame(RegisterMap* map) const { -- ShouldNotCallThis(); -- return {}; --} -- - //------------------------------------------------------------------------------ - // frame::verify_deopt_original_pc - // -diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp -index 1f7c0c87c21..3bf5cfb16c3 100644 ---- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp -+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp -@@ -181,13 +181,6 @@ address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* - return NULL; - } - -- // No need in interpreter entry for linkToNative for now. -- // Interpreter calls compiled entry through i2c. -- if (iid == vmIntrinsics::_linkToNative) { -- __ ebreak(); -- return NULL; -- } -- - // x30: sender SP (must preserve; see prepare_to_jump_from_interpreted) - // xmethod: Method* - // x13: argument locator (parameter slot count, added to sp) -@@ -280,10 +273,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, - assert_different_registers(temp1, temp2, temp3, receiver_reg); - assert_different_registers(temp1, temp2, temp3, member_reg); - -- if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) { -- if (iid == vmIntrinsics::_linkToNative) { -- assert(for_compiler_entry, "only compiler entry is supported"); -- } -+ if (iid == vmIntrinsics::_invokeBasic) { - // indirect through MH.form.vmentry.vmtarget - jump_to_lambda_form(_masm, receiver_reg, xmethod, temp1, for_compiler_entry); - } else { -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index 1667994699f..7ec76e72ff0 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -945,11 +945,6 @@ int MachCallRuntimeNode::ret_addr_offset() { - } - } - --int MachCallNativeNode::ret_addr_offset() { -- Unimplemented(); -- return -1; --} -- - // - // Compute padding required for nodes which need alignment - // -diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -index 411bddd2ace..897dafcc99c 100644 ---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -@@ -1037,7 +1037,7 @@ static void gen_special_dispatch(MacroAssembler* masm, - member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument - member_reg = x9; // known to be free at this point - has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); -- } else if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) { -+ } else if (iid == vmIntrinsics::_invokeBasic) { - has_receiver = true; - } else { - fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid)); -@@ -2566,14 +2566,6 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha - } - - #ifdef COMPILER2 --RuntimeStub* SharedRuntime::make_native_invoker(address call_target, -- int shadow_space_bytes, -- const GrowableArray& input_registers, -- const GrowableArray& output_registers) { -- Unimplemented(); -- return nullptr; --} -- - //------------------------------generate_exception_blob--------------------------- - // creates exception blob at the end - // Using exception blob, this code is jumped from a compiled method. -diff --git a/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp b/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp -deleted file mode 100644 -index 4f50adb05c3..00000000000 ---- a/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp -+++ /dev/null -@@ -1,33 +0,0 @@ --/* -- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. -- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -- * -- * This code is free software; you can redistribute it and/or modify it -- * under the terms of the GNU General Public License version 2 only, as -- * published by the Free Software Foundation. -- * -- * This code is distributed in the hope that it will be useful, but WITHOUT -- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- * version 2 for more details (a copy is included in the LICENSE file that -- * accompanied this code). -- * -- * You should have received a copy of the GNU General Public License version -- * 2 along with this work; if not, write to the Free Software Foundation, -- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -- * -- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -- * or visit www.oracle.com if you need additional information or have any -- * questions. -- * -- */ -- --#include "precompiled.hpp" --#include "prims/universalNativeInvoker.hpp" --#include "utilities/debug.hpp" -- --address ProgrammableInvoker::generate_adapter(jobject jabi, jobject jlayout) { -- Unimplemented(); -- return nullptr; --} -diff --git a/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp b/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp -deleted file mode 100644 -index ce70da72f2e..00000000000 ---- a/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp -+++ /dev/null -@@ -1,42 +0,0 @@ --/* -- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. -- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -- * -- * This code is free software; you can redistribute it and/or modify it -- * under the terms of the GNU General Public License version 2 only, as -- * published by the Free Software Foundation. -- * -- * This code is distributed in the hope that it will be useful, but WITHOUT -- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- * version 2 for more details (a copy is included in the LICENSE file that -- * accompanied this code). -- * -- * You should have received a copy of the GNU General Public License version -- * 2 along with this work; if not, write to the Free Software Foundation, -- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -- * -- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -- * or visit www.oracle.com if you need additional information or have any -- * questions. -- * -- */ -- --#include "precompiled.hpp" --#include "prims/universalUpcallHandler.hpp" --#include "utilities/debug.hpp" -- --address ProgrammableUpcallHandler::generate_upcall_stub(jobject jrec, jobject jabi, jobject jlayout) { -- Unimplemented(); -- return nullptr; --} -- --address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject mh, Method* entry, jobject jabi, jobject jconv) { -- ShouldNotCallThis(); -- return nullptr; --} -- --bool ProgrammableUpcallHandler::supports_optimized_upcalls() { -- return false; --} -diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp -index 1f6eff96cba..5d1187c2a27 100644 ---- a/src/hotspot/cpu/riscv/vmreg_riscv.cpp -+++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp -@@ -49,8 +49,3 @@ void VMRegImpl::set_regName() { - regName[i] = "NON-GPR-FPR"; - } - } -- --VMReg VMRegImpl::vmStorageToVMReg(int type, int index) { -- Unimplemented(); -- return VMRegImpl::Bad(); --} - -From a5889735a97f3712bb649c454dee192d75457f96 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sat, 29 Apr 2023 17:35:20 +0800 -Subject: [PATCH 086/140] Revert JDK-8256254: Convert vmIntrinsics::ID to enum - class - ---- - src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 2 +- - src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 2 +- - src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 2 +- - 3 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -index 006fe49b155..1133e80a210 100644 ---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -@@ -1841,7 +1841,7 @@ void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, - beq(t0, tmp, do_profile); - get_method(tmp); - lhu(t0, Address(tmp, Method::intrinsic_id_offset_in_bytes())); -- li(t1, static_cast(vmIntrinsics::_compiledLambdaForm)); -+ li(t1, vmIntrinsics::_compiledLambdaForm); - bne(t0, t1, profile_continue); - bind(do_profile); - } -diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp -index 3bf5cfb16c3..4442b5991b1 100644 ---- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp -+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp -@@ -411,7 +411,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, - } - - default: -- fatal("unexpected intrinsic %d: %s", vmIntrinsics::as_int(iid), vmIntrinsics::name_at(iid)); -+ fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)); - break; - } - -diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -index 897dafcc99c..5b934b04e8e 100644 ---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -@@ -1040,7 +1040,7 @@ static void gen_special_dispatch(MacroAssembler* masm, - } else if (iid == vmIntrinsics::_invokeBasic) { - has_receiver = true; - } else { -- fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid)); -+ fatal("unexpected intrinsic id %d", iid); - } - - if (member_reg != noreg) { - -From 245d01e2cae27e41b875450f5f92751e4f36a095 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Thu, 6 Apr 2023 20:27:58 +0800 -Subject: [PATCH 087/140] Revert JDK-8216557: Aarch64: Add support for - Concurrent Class Unloading - ---- - .../cpu/riscv/c1_MacroAssembler_riscv.cpp | 4 - - .../gc/shared/barrierSetAssembler_riscv.cpp | 71 -------- - .../gc/shared/barrierSetAssembler_riscv.hpp | 3 - - .../gc/shared/barrierSetNMethod_riscv.cpp | 171 ------------------ - .../cpu/riscv/macroAssembler_riscv.cpp | 35 +--- - .../cpu/riscv/macroAssembler_riscv.hpp | 2 - - src/hotspot/cpu/riscv/relocInfo_riscv.cpp | 1 - - src/hotspot/cpu/riscv/riscv.ad | 16 -- - src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 7 - - src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 49 ----- - src/hotspot/cpu/riscv/stubRoutines_riscv.cpp | 1 - - src/hotspot/cpu/riscv/stubRoutines_riscv.hpp | 6 - - 12 files changed, 5 insertions(+), 361 deletions(-) - delete mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp - -diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp -index 44ceccd8bd1..a6d1b1470f9 100644 ---- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp -@@ -322,10 +322,6 @@ void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { - // Note that we do this before creating a frame. - generate_stack_overflow_check(bang_size_in_bytes); - MacroAssembler::build_frame(framesize); -- -- // Insert nmethod entry barrier into frame. -- BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); -- bs->nmethod_entry_barrier(this); - } - - void C1_MacroAssembler::remove_frame(int framesize) { -diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp -index 3c115a2ea02..2b556b95d71 100644 ---- a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp -@@ -27,7 +27,6 @@ - #include "classfile/classLoaderData.hpp" - #include "gc/shared/barrierSet.hpp" - #include "gc/shared/barrierSetAssembler.hpp" --#include "gc/shared/barrierSetNMethod.hpp" - #include "gc/shared/collectedHeap.hpp" - #include "interpreter/interp_masm.hpp" - #include "memory/universe.hpp" -@@ -230,73 +229,3 @@ void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, - } - __ sd(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset()))); - } -- --void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) { -- BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); -- -- if (bs_nm == NULL) { -- return; -- } -- -- // RISCV atomic operations require that the memory address be naturally aligned. -- __ align(4); -- -- Label skip, guard; -- Address thread_disarmed_addr(xthread, in_bytes(bs_nm->thread_disarmed_offset())); -- -- __ lwu(t0, guard); -- -- // Subsequent loads of oops must occur after load of guard value. -- // BarrierSetNMethod::disarm sets guard with release semantics. -- __ membar(MacroAssembler::LoadLoad); -- __ lwu(t1, thread_disarmed_addr); -- __ beq(t0, t1, skip); -- -- int32_t offset = 0; -- __ movptr_with_offset(t0, StubRoutines::riscv::method_entry_barrier(), offset); -- __ jalr(ra, t0, offset); -- __ j(skip); -- -- __ bind(guard); -- -- assert(__ offset() % 4 == 0, "bad alignment"); -- __ emit_int32(0); // nmethod guard value. Skipped over in common case. -- -- __ bind(skip); --} -- --void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) { -- BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod(); -- if (bs == NULL) { -- return; -- } -- -- Label bad_call; -- __ beqz(xmethod, bad_call); -- -- // Pointer chase to the method holder to find out if the method is concurrently unloading. -- Label method_live; -- __ load_method_holder_cld(t0, xmethod); -- -- // Is it a strong CLD? -- __ lwu(t1, Address(t0, ClassLoaderData::keep_alive_offset())); -- __ bnez(t1, method_live); -- -- // Is it a weak but alive CLD? -- __ push_reg(RegSet::of(x28, x29), sp); -- -- __ ld(x28, Address(t0, ClassLoaderData::holder_offset())); -- -- // Uses x28 & x29, so we must pass new temporaries. -- __ resolve_weak_handle(x28, x29); -- __ mv(t0, x28); -- -- __ pop_reg(RegSet::of(x28, x29), sp); -- -- __ bnez(t0, method_live); -- -- __ bind(bad_call); -- -- __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); -- __ bind(method_live); --} -diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp -index b85f7f5582b..984d94f4c3d 100644 ---- a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp -+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp -@@ -28,7 +28,6 @@ - - #include "asm/macroAssembler.hpp" - #include "gc/shared/barrierSet.hpp" --#include "gc/shared/barrierSetNMethod.hpp" - #include "memory/allocation.hpp" - #include "oops/access.hpp" - -@@ -71,8 +70,6 @@ class BarrierSetAssembler: public CHeapObj { - ); - virtual void barrier_stubs_init() {} - -- virtual void nmethod_entry_barrier(MacroAssembler* masm); -- virtual void c2i_entry_barrier(MacroAssembler* masm); - virtual ~BarrierSetAssembler() {} - }; - -diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp -deleted file mode 100644 -index ae7ee4c5a44..00000000000 ---- a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp -+++ /dev/null -@@ -1,171 +0,0 @@ --/* -- * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. -- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -- * -- * This code is free software; you can redistribute it and/or modify it -- * under the terms of the GNU General Public License version 2 only, as -- * published by the Free Software Foundation. -- * -- * This code is distributed in the hope that it will be useful, but WITHOUT -- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- * version 2 for more details (a copy is included in the LICENSE file that -- * accompanied this code). -- * -- * You should have received a copy of the GNU General Public License version -- * 2 along with this work; if not, write to the Free Software Foundation, -- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -- * -- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -- * or visit www.oracle.com if you need additional information or have any -- * questions. -- * -- */ -- --#include "precompiled.hpp" --#include "code/codeCache.hpp" --#include "code/nativeInst.hpp" --#include "gc/shared/barrierSetNMethod.hpp" --#include "logging/log.hpp" --#include "memory/resourceArea.hpp" --#include "runtime/sharedRuntime.hpp" --#include "runtime/registerMap.hpp" --#include "runtime/thread.hpp" --#include "utilities/align.hpp" --#include "utilities/debug.hpp" -- --class NativeNMethodBarrier: public NativeInstruction { -- address instruction_address() const { return addr_at(0); } -- -- int *guard_addr() { -- /* auipc + lwu + fence + lwu + beq + lui + addi + slli + addi + slli + jalr + j */ -- return reinterpret_cast(instruction_address() + 12 * 4); -- } -- --public: -- int get_value() { -- return Atomic::load_acquire(guard_addr()); -- } -- -- void set_value(int value) { -- Atomic::release_store(guard_addr(), value); -- } -- -- void verify() const; --}; -- --// Store the instruction bitmask, bits and name for checking the barrier. --struct CheckInsn { -- uint32_t mask; -- uint32_t bits; -- const char *name; --}; -- --static const struct CheckInsn barrierInsn[] = { -- { 0x00000fff, 0x00000297, "auipc t0, 0 "}, -- { 0x000fffff, 0x0002e283, "lwu t0, 48(t0) "}, -- { 0xffffffff, 0x0aa0000f, "fence ir, ir "}, -- { 0x000fffff, 0x000be303, "lwu t1, 112(xthread)"}, -- { 0x01fff07f, 0x00628063, "beq t0, t1, skip "}, -- { 0x00000fff, 0x000002b7, "lui t0, imm0 "}, -- { 0x000fffff, 0x00028293, "addi t0, t0, imm1 "}, -- { 0xffffffff, 0x00b29293, "slli t0, t0, 11 "}, -- { 0x000fffff, 0x00028293, "addi t0, t0, imm2 "}, -- { 0xffffffff, 0x00529293, "slli t0, t0, 5 "}, -- { 0x000fffff, 0x000280e7, "jalr ra, imm3(t0) "}, -- { 0x00000fff, 0x0000006f, "j skip "} -- /* guard: */ -- /* 32bit nmethod guard value */ -- /* skip: */ --}; -- --// The encodings must match the instructions emitted by --// BarrierSetAssembler::nmethod_entry_barrier. The matching ignores the specific --// register numbers and immediate values in the encoding. --void NativeNMethodBarrier::verify() const { -- intptr_t addr = (intptr_t) instruction_address(); -- for(unsigned int i = 0; i < sizeof(barrierInsn)/sizeof(struct CheckInsn); i++ ) { -- uint32_t inst = *((uint32_t*) addr); -- if ((inst & barrierInsn[i].mask) != barrierInsn[i].bits) { -- tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", addr, inst); -- fatal("not an %s instruction.", barrierInsn[i].name); -- } -- addr += 4; -- } --} -- -- --/* We're called from an nmethod when we need to deoptimize it. We do -- this by throwing away the nmethod's frame and jumping to the -- ic_miss stub. This looks like there has been an IC miss at the -- entry of the nmethod, so we resolve the call, which will fall back -- to the interpreter if the nmethod has been unloaded. */ --void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) { -- -- typedef struct { -- intptr_t *sp; intptr_t *fp; address ra; address pc; -- } frame_pointers_t; -- -- frame_pointers_t *new_frame = (frame_pointers_t *)(return_address_ptr - 5); -- -- JavaThread *thread = JavaThread::current(); -- RegisterMap reg_map(thread, false); -- frame frame = thread->last_frame(); -- -- assert(frame.is_compiled_frame() || frame.is_native_frame(), "must be"); -- assert(frame.cb() == nm, "must be"); -- frame = frame.sender(®_map); -- -- LogTarget(Trace, nmethod, barrier) out; -- if (out.is_enabled()) { -- ResourceMark mark; -- log_trace(nmethod, barrier)("deoptimize(nmethod: %s(%p), return_addr: %p, osr: %d, thread: %p(%s), making rsp: %p) -> %p", -- nm->method()->name_and_sig_as_C_string(), -- nm, *(address *) return_address_ptr, nm->is_osr_method(), thread, -- thread->name(), frame.sp(), nm->verified_entry_point()); -- } -- -- new_frame->sp = frame.sp(); -- new_frame->fp = frame.fp(); -- new_frame->ra = frame.pc(); -- new_frame->pc = SharedRuntime::get_handle_wrong_method_stub(); --} -- --// This is the offset of the entry barrier from where the frame is completed. --// If any code changes between the end of the verified entry where the entry --// barrier resides, and the completion of the frame, then --// NativeNMethodCmpBarrier::verify() will immediately complain when it does --// not find the expected native instruction at this offset, which needs updating. --// Note that this offset is invariant of PreserveFramePointer. -- --// see BarrierSetAssembler::nmethod_entry_barrier --// auipc + lwu + fence + lwu + beq + movptr_with_offset(5 instructions) + jalr + j + int32 --static const int entry_barrier_offset = -4 * 13; -- --static NativeNMethodBarrier* native_nmethod_barrier(nmethod* nm) { -- address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset; -- NativeNMethodBarrier* barrier = reinterpret_cast(barrier_address); -- debug_only(barrier->verify()); -- return barrier; --} -- --void BarrierSetNMethod::disarm(nmethod* nm) { -- if (!supports_entry_barrier(nm)) { -- return; -- } -- -- // Disarms the nmethod guard emitted by BarrierSetAssembler::nmethod_entry_barrier. -- NativeNMethodBarrier* barrier = native_nmethod_barrier(nm); -- -- barrier->set_value(disarmed_value()); --} -- --bool BarrierSetNMethod::is_armed(nmethod* nm) { -- if (!supports_entry_barrier(nm)) { -- return false; -- } -- -- NativeNMethodBarrier* barrier = native_nmethod_barrier(nm); -- return barrier->get_value() != disarmed_value(); --} -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -index 41a415ef2cf..a75bd9dfa89 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -@@ -1638,10 +1638,10 @@ void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, - beq(trial_klass, tmp, L); - } - --// Move an oop into a register. immediate is true if we want --// immediate instructions and nmethod entry barriers are not enabled. --// i.e. we are not going to patch this instruction while the code is being --// executed by another thread. -+// Move an oop into a register. immediate is true if we want -+// immediate instructions, i.e. we are not going to patch this -+// instruction while the code is being executed by another thread. In -+// that case we can use move immediates rather than the constant pool. - void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) { - int oop_index; - if (obj == NULL) { -@@ -1656,11 +1656,7 @@ void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) { - oop_index = oop_recorder()->find_index(obj); - } - RelocationHolder rspec = oop_Relocation::spec(oop_index); -- -- // nmethod entry barrier necessitate using the constant pool. They have to be -- // ordered with respected to oop access. -- // Using immediate literals would necessitate fence.i. -- if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate) { -+ if (!immediate) { - address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address - ld_constant(dst, Address(dummy, rspec)); - } else -@@ -1738,22 +1734,6 @@ void MacroAssembler::resolve_oop_handle(Register result, Register tmp) { - access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, noreg); - } - --// ((WeakHandle)result).resolve() --void MacroAssembler::resolve_weak_handle(Register result, Register tmp) { -- assert_different_registers(result, tmp); -- Label resolved; -- -- // A null weak handle resolves to null. -- beqz(result, resolved); -- -- // Only 64 bit platforms support GCs that require a tmp register -- // Only IN_HEAP loads require a thread_tmp register -- // WeakHandle::resolve is an indirection like jweak. -- access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, -- result, Address(result), tmp, noreg /* tmp_thread */); -- bind(resolved); --} -- - void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, - Register dst, Address src, - Register tmp1, Register thread_tmp) { -@@ -3195,11 +3175,6 @@ void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) { - beq(src1, t0, equal); - } - --void MacroAssembler::load_method_holder_cld(Register result, Register method) { -- load_method_holder(result, method); -- ld(result, Address(result, InstanceKlass::class_loader_data_offset())); --} -- - void MacroAssembler::load_method_holder(Register holder, Register method) { - ld(holder, Address(method, Method::const_offset())); // ConstMethod* - ld(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool* -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -index dd39f67d507..b16fe904888 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -@@ -207,7 +207,6 @@ class MacroAssembler: public Assembler { - virtual void check_and_handle_earlyret(Register java_thread); - virtual void check_and_handle_popframe(Register java_thread); - -- void resolve_weak_handle(Register result, Register tmp); - void resolve_oop_handle(Register result, Register tmp = x15); - void resolve_jobject(Register value, Register thread, Register tmp); - -@@ -673,7 +672,6 @@ class MacroAssembler: public Assembler { - void cmpptr(Register src1, Address src2, Label& equal); - - void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = NULL, Label* L_slow_path = NULL); -- void load_method_holder_cld(Register result, Register method); - void load_method_holder(Register holder, Register method); - - void compute_index(Register str1, Register trailing_zeros, Register match_mask, -diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp -index 228a64eae2c..047ea2276ca 100644 ---- a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp -+++ b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp -@@ -41,7 +41,6 @@ void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { - switch (type()) { - case relocInfo::oop_type: { - oop_Relocation *reloc = (oop_Relocation *)this; -- // in movoop when BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate - if (NativeInstruction::is_load_pc_relative_at(addr())) { - address constptr = (address)code()->oop_addr_at(reloc->oop_index()); - bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr); -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index 7ec76e72ff0..0a1838695e1 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -1068,17 +1068,6 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const { - st->print("sd ra, [sp, #%d]\n\t", - wordSize); - if (PreserveFramePointer) { st->print("sub fp, sp, #%d\n\t", 2 * wordSize); } - st->print("sub sp, sp, #%d\n\t", framesize); -- -- if (C->stub_function() == NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) { -- st->print("ld t0, [guard]\n\t"); -- st->print("membar LoadLoad\n\t"); -- st->print("ld t1, [xthread, #thread_disarmed_offset]\n\t"); -- st->print("beq t0, t1, skip\n\t"); -- st->print("jalr #nmethod_entry_barrier_stub\n\t"); -- st->print("j skip\n\t"); -- st->print("guard: int\n\t"); -- st->print("skip:\n\t"); -- } - } - #endif - -@@ -1114,11 +1103,6 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { - - __ build_frame(framesize); - -- if (C->stub_function() == NULL) { -- BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); -- bs->nmethod_entry_barrier(&_masm); -- } -- - if (VerifyStackAtCalls) { - Unimplemented(); - } -diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -index 5b934b04e8e..326ba62fcb0 100644 ---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -@@ -642,9 +642,6 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm - c2i_no_clinit_check_entry = __ pc(); - } - -- BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); -- bs->c2i_entry_barrier(masm); -- - gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); - - __ flush(); -@@ -1290,10 +1287,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, - // -2 because return address is already present and so is saved fp - __ sub(sp, sp, stack_size - 2 * wordSize); - -- BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); -- assert_cond(bs != NULL); -- bs->nmethod_entry_barrier(masm); -- - // Frame is now completed as far as size and linkage. - int frame_complete = ((intptr_t)__ pc()) - start; - -diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp -index 0c5b0e001ee..74c38c3d044 100644 ---- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp -+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp -@@ -2352,50 +2352,6 @@ class StubGenerator: public StubCodeGenerator { - return entry; - } - -- address generate_method_entry_barrier() { -- __ align(CodeEntryAlignment); -- StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier"); -- -- Label deoptimize_label; -- -- address start = __ pc(); -- -- __ set_last_Java_frame(sp, fp, ra, t0); -- -- __ enter(); -- __ add(t1, sp, wordSize); -- -- __ sub(sp, sp, 4 * wordSize); -- -- __ push_call_clobbered_registers(); -- -- __ mv(c_rarg0, t1); -- __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetNMethod::nmethod_stub_entry_barrier), 1); -- -- __ reset_last_Java_frame(true); -- -- __ mv(t0, x10); -- -- __ pop_call_clobbered_registers(); -- -- __ bnez(t0, deoptimize_label); -- -- __ leave(); -- __ ret(); -- -- __ BIND(deoptimize_label); -- -- __ ld(t0, Address(sp, 0)); -- __ ld(fp, Address(sp, wordSize)); -- __ ld(ra, Address(sp, wordSize * 2)); -- __ ld(t1, Address(sp, wordSize * 3)); -- -- __ mv(sp, t0); -- __ jr(t1); -- -- return start; -- } -- - // x10 = result - // x11 = str1 - // x12 = cnt1 -@@ -3703,11 +3659,6 @@ class StubGenerator: public StubCodeGenerator { - - generate_string_indexof_stubs(); - -- BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); -- if (bs_nm != NULL) { -- StubRoutines::riscv::_method_entry_barrier = generate_method_entry_barrier(); -- } -- - StubRoutines::riscv::set_completed(); - } - -diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp -index 395a2d338e4..9202d9ec4b0 100644 ---- a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp -+++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp -@@ -53,6 +53,5 @@ address StubRoutines::riscv::_string_indexof_linear_ll = NULL; - address StubRoutines::riscv::_string_indexof_linear_uu = NULL; - address StubRoutines::riscv::_string_indexof_linear_ul = NULL; - address StubRoutines::riscv::_large_byte_array_inflate = NULL; --address StubRoutines::riscv::_method_entry_barrier = NULL; - - bool StubRoutines::riscv::_completed = false; -diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp -index 51f07819c33..0c9445e18a7 100644 ---- a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp -+++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp -@@ -67,8 +67,6 @@ class riscv { - static address _string_indexof_linear_ul; - static address _large_byte_array_inflate; - -- static address _method_entry_barrier; -- - static bool _completed; - - public: -@@ -145,10 +143,6 @@ class riscv { - return _large_byte_array_inflate; - } - -- static address method_entry_barrier() { -- return _method_entry_barrier; -- } -- - static bool complete() { - return _completed; - } - -From aee31440dde84c54449b5c0dbdfb43b4d3826f5a Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sat, 29 Apr 2023 17:59:40 +0800 -Subject: [PATCH 088/140] Revert JDK-8223173: Implement fast class - initialization checks on AARCH64 && JDK-8227260: JNI upcalls should bypass - class initialization barrier in c2i adapter - ---- - .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 12 ------- - .../cpu/riscv/c1_MacroAssembler_riscv.cpp | 12 +++---- - src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 12 ------- - src/hotspot/cpu/riscv/interp_masm_riscv.hpp | 2 -- - .../cpu/riscv/macroAssembler_riscv.cpp | 36 ------------------- - .../cpu/riscv/macroAssembler_riscv.hpp | 3 -- - src/hotspot/cpu/riscv/riscv.ad | 11 ------ - src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 30 +--------------- - src/hotspot/cpu/riscv/templateTable_riscv.cpp | 17 +++------ - 9 files changed, 11 insertions(+), 124 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -index 49653d04d81..1e482d7cc2b 100644 ---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -@@ -90,18 +90,6 @@ static void select_different_registers(Register preserve, - - bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; } - --void LIR_Assembler::clinit_barrier(ciMethod* method) { -- assert(VM_Version::supports_fast_class_init_checks(), "sanity"); -- assert(!method->holder()->is_not_initialized(), "initialization should have been started"); -- -- Label L_skip_barrier; -- -- __ mov_metadata(t1, method->holder()->constant_encoding()); -- __ clinit_barrier(t1, t0, &L_skip_barrier /* L_fast_path */); -- __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); -- __ bind(L_skip_barrier); --} -- - LIR_Opr LIR_Assembler::receiverOpr() { - return FrameMap::receiver_opr; - } -diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp -index a6d1b1470f9..99d981f97f4 100644 ---- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp -@@ -317,6 +317,12 @@ void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache, L - } - - void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { -+ // If we have to make this method not-entrant we'll overwrite its -+ // first instruction with a jump. For this action to be legal we -+ // must ensure that this first instruction is a J, JAL or NOP. -+ // Make it a NOP. -+ nop(); -+ - assert(bang_size_in_bytes >= framesize, "stack bang size incorrect"); - // Make sure there is enough stack space for this method's activation. - // Note that we do this before creating a frame. -@@ -330,12 +336,6 @@ void C1_MacroAssembler::remove_frame(int framesize) { - - - void C1_MacroAssembler::verified_entry() { -- // If we have to make this method not-entrant we'll overwrite its -- // first instruction with a jump. For this action to be legal we -- // must ensure that this first instruction is a J, JAL or NOP. -- // Make it a NOP. -- -- nop(); - } - - void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { -diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -index 1133e80a210..b50be7e726c 100644 ---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -@@ -295,18 +295,6 @@ void InterpreterMacroAssembler::load_resolved_klass_at_offset( - ld(klass, Address(klass, Array::base_offset_in_bytes())); - } - --void InterpreterMacroAssembler::load_resolved_method_at_index(int byte_no, -- Register method, -- Register cache) { -- const int method_offset = in_bytes( -- ConstantPoolCache::base_offset() + -- ((byte_no == TemplateTable::f2_byte) -- ? ConstantPoolCacheEntry::f2_offset() -- : ConstantPoolCacheEntry::f1_offset())); -- -- ld(method, Address(cache, method_offset)); // get f1 Method* --} -- - // Generate a subtype check: branch to ok_is_subtype if sub_klass is a - // subtype of super_klass. - // -diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp -index 4d8cb086f82..4126e8ee70f 100644 ---- a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp -+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp -@@ -122,8 +122,6 @@ class InterpreterMacroAssembler: public MacroAssembler { - // Load cpool->resolved_klass_at(index). - void load_resolved_klass_at_offset(Register cpool, Register index, Register klass, Register temp); - -- void load_resolved_method_at_index(int byte_no, Register method, Register cache); -- - void pop_ptr(Register r = x10); - void pop_i(Register r = x10); - void pop_l(Register r = x10); -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -index a75bd9dfa89..304b6f2b06c 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -@@ -372,36 +372,6 @@ void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thr - sd(zr, Address(java_thread, JavaThread::vm_result_2_offset())); - } - --void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) { -- assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required"); -- assert_different_registers(klass, xthread, tmp); -- -- Label L_fallthrough, L_tmp; -- if (L_fast_path == NULL) { -- L_fast_path = &L_fallthrough; -- } else if (L_slow_path == NULL) { -- L_slow_path = &L_fallthrough; -- } -- -- // Fast path check: class is fully initialized -- lbu(tmp, Address(klass, InstanceKlass::init_state_offset())); -- sub(tmp, tmp, InstanceKlass::fully_initialized); -- beqz(tmp, *L_fast_path); -- -- // Fast path check: current thread is initializer thread -- ld(tmp, Address(klass, InstanceKlass::init_thread_offset())); -- -- if (L_slow_path == &L_fallthrough) { -- beq(xthread, tmp, *L_fast_path); -- bind(*L_slow_path); -- } else if (L_fast_path == &L_fallthrough) { -- bne(xthread, tmp, *L_slow_path); -- bind(*L_fast_path); -- } else { -- Unimplemented(); -- } --} -- - void MacroAssembler::verify_oop(Register reg, const char* s) { - if (!VerifyOops) { return; } - -@@ -3175,12 +3145,6 @@ void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) { - beq(src1, t0, equal); - } - --void MacroAssembler::load_method_holder(Register holder, Register method) { -- ld(holder, Address(method, Method::const_offset())); // ConstMethod* -- ld(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool* -- ld(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass* --} -- - // string indexof - // compute index by trailing zeros - void MacroAssembler::compute_index(Register haystack, Register trailing_zeros, -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -index b16fe904888..c6b71bdbc3c 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -@@ -671,9 +671,6 @@ class MacroAssembler: public Assembler { - - void cmpptr(Register src1, Address src2, Label& equal); - -- void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = NULL, Label* L_slow_path = NULL); -- void load_method_holder(Register holder, Register method); -- - void compute_index(Register str1, Register trailing_zeros, Register match_mask, - Register result, Register char_tmp, Register tmp, - bool haystack_isL); -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index 0a1838695e1..13546ab328b 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -1085,17 +1085,6 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { - - assert_cond(C != NULL); - -- if (C->clinit_barrier_on_entry()) { -- assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started"); -- -- Label L_skip_barrier; -- -- __ mov_metadata(t1, C->method()->holder()->constant_encoding()); -- __ clinit_barrier(t1, t0, &L_skip_barrier); -- __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); -- __ bind(L_skip_barrier); -- } -- - int bangsize = C->output()->bang_size_in_bytes(); - if (C->output()->need_stack_bang(bangsize)) { - __ generate_stack_overflow_check(bangsize); -diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -index 326ba62fcb0..ae414224c5b 100644 ---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -@@ -623,29 +623,10 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm - - address c2i_entry = __ pc(); - -- // Class initialization barrier for static methods -- address c2i_no_clinit_check_entry = NULL; -- if (VM_Version::supports_fast_class_init_checks()) { -- Label L_skip_barrier; -- -- { // Bypass the barrier for non-static methods -- __ lwu(t0, Address(xmethod, Method::access_flags_offset())); -- __ andi(t1, t0, JVM_ACC_STATIC); -- __ beqz(t1, L_skip_barrier); // non-static -- } -- -- __ load_method_holder(t1, xmethod); -- __ clinit_barrier(t1, t0, &L_skip_barrier); -- __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); -- -- __ bind(L_skip_barrier); -- c2i_no_clinit_check_entry = __ pc(); -- } -- - gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); - - __ flush(); -- return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry); -+ return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); - } - - int SharedRuntime::c_calling_convention(const BasicType *sig_bt, -@@ -1270,15 +1251,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, - // first instruction with a jump. - __ nop(); - -- if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) { -- Label L_skip_barrier; -- __ mov_metadata(t1, method->method_holder()); // InstanceKlass* -- __ clinit_barrier(t1, t0, &L_skip_barrier); -- __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); -- -- __ bind(L_skip_barrier); -- } -- - // Generate stack overflow check - __ bang_stack_with_offset(checked_cast(StackOverflow::stack_shadow_zone_size())); - -diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp -index bb20f228447..1f4409a9c9a 100644 ---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp -+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp -@@ -2307,7 +2307,7 @@ void TemplateTable::resolve_cache_and_index(int byte_no, - const Register temp = x9; - assert_different_registers(Rcache, index, temp); - -- Label resolved, clinit_barrier_slow; -+ Label resolved; - - Bytecodes::Code code = bytecode(); - switch (code) { -@@ -2321,10 +2321,6 @@ void TemplateTable::resolve_cache_and_index(int byte_no, - __ mv(t0, (int) code); - __ beq(temp, t0, resolved); - -- // resolve first time through -- // Class initialization barrier slow path lands here as well. -- __ bind(clinit_barrier_slow); -- - address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache); - __ mv(temp, (int) code); - __ call_VM(noreg, entry, temp); -@@ -2334,13 +2330,6 @@ void TemplateTable::resolve_cache_and_index(int byte_no, - // n.b. unlike x86 Rcache is now rcpool plus the indexed offset - // so all clients ofthis method must be modified accordingly - __ bind(resolved); -- -- // Class initialization barrier for static methods -- if (VM_Version::supports_fast_class_init_checks() && bytecode() == Bytecodes::_invokestatic) { -- __ load_resolved_method_at_index(byte_no, temp, Rcache); -- __ load_method_holder(temp, temp); -- __ clinit_barrier(temp, t0, NULL, &clinit_barrier_slow); -- } - } - - // The Rcache and index registers must be set before call -@@ -3431,7 +3420,9 @@ void TemplateTable::invokeinterface(int byte_no) { - __ profile_virtual_call(x13, x30, x9); - - // Get declaring interface class from method, and itable index -- __ load_method_holder(x10, xmethod); -+ __ ld(x10, Address(xmethod, Method::const_offset())); -+ __ ld(x10, Address(x10, ConstMethod::constants_offset())); -+ __ ld(x10, Address(x10, ConstantPool::pool_holder_offset_in_bytes())); - __ lwu(xmethod, Address(xmethod, Method::itable_index_offset())); - __ subw(xmethod, xmethod, Method::itable_index_max); - __ negw(xmethod, xmethod); - -From c259a42eac0a11e080d28dabe7f745ee79a53663 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sat, 29 Apr 2023 18:36:13 +0800 -Subject: [PATCH 089/140] Revert JDK-8268119: Rename copy_os_cpu.inline.hpp - files to copy_os_cpu.hpp && JDK-8142362: Lots of code duplication in Copy - class - ---- - src/hotspot/cpu/riscv/copy_riscv.hpp | 85 +----------- - .../os_cpu/linux_riscv/copy_linux_riscv.hpp | 31 ----- - .../linux_riscv/copy_linux_riscv.inline.hpp | 124 ++++++++++++++++++ - 3 files changed, 128 insertions(+), 112 deletions(-) - delete mode 100644 src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp - -diff --git a/src/hotspot/cpu/riscv/copy_riscv.hpp b/src/hotspot/cpu/riscv/copy_riscv.hpp -index bceadcc5dcc..05da242e354 100644 ---- a/src/hotspot/cpu/riscv/copy_riscv.hpp -+++ b/src/hotspot/cpu/riscv/copy_riscv.hpp -@@ -27,7 +27,10 @@ - #ifndef CPU_RISCV_COPY_RISCV_HPP - #define CPU_RISCV_COPY_RISCV_HPP - --#include OS_CPU_HEADER(copy) -+// Inline functions for memory copy and fill. -+ -+// Contains inline asm implementations -+#include OS_CPU_HEADER_INLINE(copy) - - static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { - julong* to = (julong*) tohw; -@@ -53,84 +56,4 @@ static void pd_zero_to_bytes(void* to, size_t count) { - (void)memset(to, 0, count); - } - --static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { -- (void)memmove(to, from, count * HeapWordSize); --} -- --static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { -- switch (count) { -- case 8: to[7] = from[7]; // fall through -- case 7: to[6] = from[6]; // fall through -- case 6: to[5] = from[5]; // fall through -- case 5: to[4] = from[4]; // fall through -- case 4: to[3] = from[3]; // fall through -- case 3: to[2] = from[2]; // fall through -- case 2: to[1] = from[1]; // fall through -- case 1: to[0] = from[0]; // fall through -- case 0: break; -- default: -- memcpy(to, from, count * HeapWordSize); -- break; -- } --} -- --static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { -- shared_disjoint_words_atomic(from, to, count); --} -- --static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { -- pd_conjoint_words(from, to, count); --} -- --static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { -- pd_disjoint_words(from, to, count); --} -- --static void pd_conjoint_bytes(const void* from, void* to, size_t count) { -- (void)memmove(to, from, count); --} -- --static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) { -- pd_conjoint_bytes(from, to, count); --} -- --static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { -- _Copy_conjoint_jshorts_atomic(from, to, count); --} -- --static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { -- _Copy_conjoint_jints_atomic(from, to, count); --} -- --static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { -- _Copy_conjoint_jlongs_atomic(from, to, count); --} -- --static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { -- assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size."); -- _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count); --} -- --static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { -- _Copy_arrayof_conjoint_bytes(from, to, count); --} -- --static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) { -- _Copy_arrayof_conjoint_jshorts(from, to, count); --} -- --static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) { -- _Copy_arrayof_conjoint_jints(from, to, count); --} -- --static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) { -- _Copy_arrayof_conjoint_jlongs(from, to, count); --} -- --static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) { -- assert(!UseCompressedOops, "foo!"); -- assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); -- _Copy_arrayof_conjoint_jlongs(from, to, count); --} -- - #endif // CPU_RISCV_COPY_RISCV_HPP -diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp -deleted file mode 100644 -index 147cfdf3c10..00000000000 ---- a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp -+++ /dev/null -@@ -1,31 +0,0 @@ --/* -- * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. -- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -- * -- * This code is free software; you can redistribute it and/or modify it -- * under the terms of the GNU General Public License version 2 only, as -- * published by the Free Software Foundation. -- * -- * This code is distributed in the hope that it will be useful, but WITHOUT -- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- * version 2 for more details (a copy is included in the LICENSE file that -- * accompanied this code). -- * -- * You should have received a copy of the GNU General Public License version -- * 2 along with this work; if not, write to the Free Software Foundation, -- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -- * -- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -- * or visit www.oracle.com if you need additional information or have any -- * questions. -- * -- */ -- --#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP --#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP -- --// Empty for build system -- --#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP -diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp -new file mode 100644 -index 00000000000..bdf36d6b4c3 ---- /dev/null -+++ b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp -@@ -0,0 +1,124 @@ -+/* -+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP -+#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP -+ -+static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { -+ (void)memmove(to, from, count * HeapWordSize); -+} -+ -+static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { -+ switch (count) { -+ case 8: to[7] = from[7]; // fall through -+ case 7: to[6] = from[6]; // fall through -+ case 6: to[5] = from[5]; // fall through -+ case 5: to[4] = from[4]; // fall through -+ case 4: to[3] = from[3]; // fall through -+ case 3: to[2] = from[2]; // fall through -+ case 2: to[1] = from[1]; // fall through -+ case 1: to[0] = from[0]; // fall through -+ case 0: break; -+ default: -+ memcpy(to, from, count * HeapWordSize); -+ break; -+ } -+} -+ -+static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { -+ switch (count) { -+ case 8: to[7] = from[7]; -+ case 7: to[6] = from[6]; -+ case 6: to[5] = from[5]; -+ case 5: to[4] = from[4]; -+ case 4: to[3] = from[3]; -+ case 3: to[2] = from[2]; -+ case 2: to[1] = from[1]; -+ case 1: to[0] = from[0]; -+ case 0: break; -+ default: -+ while (count-- > 0) { -+ *to++ = *from++; -+ } -+ break; -+ } -+} -+ -+static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { -+ pd_conjoint_words(from, to, count); -+} -+ -+static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { -+ pd_disjoint_words(from, to, count); -+} -+ -+static void pd_conjoint_bytes(const void* from, void* to, size_t count) { -+ (void)memmove(to, from, count); -+} -+ -+static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) { -+ pd_conjoint_bytes(from, to, count); -+} -+ -+static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { -+ _Copy_conjoint_jshorts_atomic(from, to, count); -+} -+ -+static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { -+ _Copy_conjoint_jints_atomic(from, to, count); -+} -+ -+static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { -+ _Copy_conjoint_jlongs_atomic(from, to, count); -+} -+ -+static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { -+ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size."); -+ _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count); -+} -+ -+static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { -+ _Copy_arrayof_conjoint_bytes(from, to, count); -+} -+ -+static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) { -+ _Copy_arrayof_conjoint_jshorts(from, to, count); -+} -+ -+static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) { -+ _Copy_arrayof_conjoint_jints(from, to, count); -+} -+ -+static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) { -+ _Copy_arrayof_conjoint_jlongs(from, to, count); -+} -+ -+static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) { -+ assert(!UseCompressedOops, "foo!"); -+ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); -+ _Copy_arrayof_conjoint_jlongs(from, to, count); -+} -+ -+#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP - -From 6033e30ebd94f2315bf809a42ef00c85bdbc780e Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sat, 29 Apr 2023 19:33:21 +0800 -Subject: [PATCH 090/140] Revert JDK-8241436: C2: Factor out C2-specific code - from MacroAssembler - ---- - .../cpu/riscv/c2_MacroAssembler_riscv.cpp | 1321 ----------------- - .../cpu/riscv/c2_MacroAssembler_riscv.hpp | 141 -- - .../cpu/riscv/macroAssembler_riscv.cpp | 1282 ++++++++++++++++ - .../cpu/riscv/macroAssembler_riscv.hpp | 103 ++ - src/hotspot/cpu/riscv/riscv.ad | 124 +- - 5 files changed, 1447 insertions(+), 1524 deletions(-) - delete mode 100644 src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp - delete mode 100644 src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp - -diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp -deleted file mode 100644 -index 73f84a724ca..00000000000 ---- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp -+++ /dev/null -@@ -1,1321 +0,0 @@ --/* -- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. -- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -- * -- * This code is free software; you can redistribute it and/or modify it -- * under the terms of the GNU General Public License version 2 only, as -- * published by the Free Software Foundation. -- * -- * This code is distributed in the hope that it will be useful, but WITHOUT -- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- * version 2 for more details (a copy is included in the LICENSE file that -- * accompanied this code). -- * -- * You should have received a copy of the GNU General Public License version -- * 2 along with this work; if not, write to the Free Software Foundation, -- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -- * -- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -- * or visit www.oracle.com if you need additional information or have any -- * questions. -- * -- */ -- --#include "precompiled.hpp" --#include "asm/assembler.hpp" --#include "asm/assembler.inline.hpp" --#include "opto/c2_MacroAssembler.hpp" --#include "opto/intrinsicnode.hpp" --#include "opto/subnode.hpp" --#include "runtime/stubRoutines.hpp" -- --#ifdef PRODUCT --#define BLOCK_COMMENT(str) /* nothing */ --#define STOP(error) stop(error) --#else --#define BLOCK_COMMENT(str) block_comment(str) --#define STOP(error) block_comment(error); stop(error) --#endif -- --#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") -- --// short string --// StringUTF16.indexOfChar --// StringLatin1.indexOfChar --void C2_MacroAssembler::string_indexof_char_short(Register str1, Register cnt1, -- Register ch, Register result, -- bool isL) --{ -- Register ch1 = t0; -- Register index = t1; -- -- BLOCK_COMMENT("string_indexof_char_short {"); -- -- Label LOOP, LOOP1, LOOP4, LOOP8; -- Label MATCH, MATCH1, MATCH2, MATCH3, -- MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH; -- -- mv(result, -1); -- mv(index, zr); -- -- bind(LOOP); -- addi(t0, index, 8); -- ble(t0, cnt1, LOOP8); -- addi(t0, index, 4); -- ble(t0, cnt1, LOOP4); -- j(LOOP1); -- -- bind(LOOP8); -- isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); -- beq(ch, ch1, MATCH); -- isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); -- beq(ch, ch1, MATCH1); -- isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); -- beq(ch, ch1, MATCH2); -- isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); -- beq(ch, ch1, MATCH3); -- isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8)); -- beq(ch, ch1, MATCH4); -- isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10)); -- beq(ch, ch1, MATCH5); -- isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12)); -- beq(ch, ch1, MATCH6); -- isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14)); -- beq(ch, ch1, MATCH7); -- addi(index, index, 8); -- addi(str1, str1, isL ? 8 : 16); -- blt(index, cnt1, LOOP); -- j(NOMATCH); -- -- bind(LOOP4); -- isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); -- beq(ch, ch1, MATCH); -- isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); -- beq(ch, ch1, MATCH1); -- isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); -- beq(ch, ch1, MATCH2); -- isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); -- beq(ch, ch1, MATCH3); -- addi(index, index, 4); -- addi(str1, str1, isL ? 4 : 8); -- bge(index, cnt1, NOMATCH); -- -- bind(LOOP1); -- isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1)); -- beq(ch, ch1, MATCH); -- addi(index, index, 1); -- addi(str1, str1, isL ? 1 : 2); -- blt(index, cnt1, LOOP1); -- j(NOMATCH); -- -- bind(MATCH1); -- addi(index, index, 1); -- j(MATCH); -- -- bind(MATCH2); -- addi(index, index, 2); -- j(MATCH); -- -- bind(MATCH3); -- addi(index, index, 3); -- j(MATCH); -- -- bind(MATCH4); -- addi(index, index, 4); -- j(MATCH); -- -- bind(MATCH5); -- addi(index, index, 5); -- j(MATCH); -- -- bind(MATCH6); -- addi(index, index, 6); -- j(MATCH); -- -- bind(MATCH7); -- addi(index, index, 7); -- -- bind(MATCH); -- mv(result, index); -- bind(NOMATCH); -- BLOCK_COMMENT("} string_indexof_char_short"); --} -- --// StringUTF16.indexOfChar --// StringLatin1.indexOfChar --void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1, -- Register ch, Register result, -- Register tmp1, Register tmp2, -- Register tmp3, Register tmp4, -- bool isL) --{ -- Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG; -- Register ch1 = t0; -- Register orig_cnt = t1; -- Register mask1 = tmp3; -- Register mask2 = tmp2; -- Register match_mask = tmp1; -- Register trailing_char = tmp4; -- Register unaligned_elems = tmp4; -- -- BLOCK_COMMENT("string_indexof_char {"); -- beqz(cnt1, NOMATCH); -- -- addi(t0, cnt1, isL ? -32 : -16); -- bgtz(t0, DO_LONG); -- string_indexof_char_short(str1, cnt1, ch, result, isL); -- j(DONE); -- -- bind(DO_LONG); -- mv(orig_cnt, cnt1); -- if (AvoidUnalignedAccesses) { -- Label ALIGNED; -- andi(unaligned_elems, str1, 0x7); -- beqz(unaligned_elems, ALIGNED); -- sub(unaligned_elems, unaligned_elems, 8); -- neg(unaligned_elems, unaligned_elems); -- if (!isL) { -- srli(unaligned_elems, unaligned_elems, 1); -- } -- // do unaligned part per element -- string_indexof_char_short(str1, unaligned_elems, ch, result, isL); -- bgez(result, DONE); -- mv(orig_cnt, cnt1); -- sub(cnt1, cnt1, unaligned_elems); -- bind(ALIGNED); -- } -- -- // duplicate ch -- if (isL) { -- slli(ch1, ch, 8); -- orr(ch, ch1, ch); -- } -- slli(ch1, ch, 16); -- orr(ch, ch1, ch); -- slli(ch1, ch, 32); -- orr(ch, ch1, ch); -- -- if (!isL) { -- slli(cnt1, cnt1, 1); -- } -- -- uint64_t mask0101 = UCONST64(0x0101010101010101); -- uint64_t mask0001 = UCONST64(0x0001000100010001); -- mv(mask1, isL ? mask0101 : mask0001); -- uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f); -- uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff); -- mv(mask2, isL ? mask7f7f : mask7fff); -- -- bind(CH1_LOOP); -- ld(ch1, Address(str1)); -- addi(str1, str1, 8); -- addi(cnt1, cnt1, -8); -- compute_match_mask(ch1, ch, match_mask, mask1, mask2); -- bnez(match_mask, HIT); -- bgtz(cnt1, CH1_LOOP); -- j(NOMATCH); -- -- bind(HIT); -- ctzc_bit(trailing_char, match_mask, isL, ch1, result); -- srli(trailing_char, trailing_char, 3); -- addi(cnt1, cnt1, 8); -- ble(cnt1, trailing_char, NOMATCH); -- // match case -- if (!isL) { -- srli(cnt1, cnt1, 1); -- srli(trailing_char, trailing_char, 1); -- } -- -- sub(result, orig_cnt, cnt1); -- add(result, result, trailing_char); -- j(DONE); -- -- bind(NOMATCH); -- mv(result, -1); -- -- bind(DONE); -- BLOCK_COMMENT("} string_indexof_char"); --} -- --typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp); -- --// Search for needle in haystack and return index or -1 --// x10: result --// x11: haystack --// x12: haystack_len --// x13: needle --// x14: needle_len --void C2_MacroAssembler::string_indexof(Register haystack, Register needle, -- Register haystack_len, Register needle_len, -- Register tmp1, Register tmp2, -- Register tmp3, Register tmp4, -- Register tmp5, Register tmp6, -- Register result, int ae) --{ -- assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); -- -- Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH; -- -- Register ch1 = t0; -- Register ch2 = t1; -- Register nlen_tmp = tmp1; // needle len tmp -- Register hlen_tmp = tmp2; // haystack len tmp -- Register result_tmp = tmp4; -- -- bool isLL = ae == StrIntrinsicNode::LL; -- -- bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; -- bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; -- int needle_chr_shift = needle_isL ? 0 : 1; -- int haystack_chr_shift = haystack_isL ? 0 : 1; -- int needle_chr_size = needle_isL ? 1 : 2; -- int haystack_chr_size = haystack_isL ? 1 : 2; -- load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : -- (load_chr_insn)&MacroAssembler::lhu; -- load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : -- (load_chr_insn)&MacroAssembler::lhu; -- -- BLOCK_COMMENT("string_indexof {"); -- -- // Note, inline_string_indexOf() generates checks: -- // if (pattern.count > src.count) return -1; -- // if (pattern.count == 0) return 0; -- -- // We have two strings, a source string in haystack, haystack_len and a pattern string -- // in needle, needle_len. Find the first occurence of pattern in source or return -1. -- -- // For larger pattern and source we use a simplified Boyer Moore algorithm. -- // With a small pattern and source we use linear scan. -- -- // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm. -- sub(result_tmp, haystack_len, needle_len); -- // needle_len < 8, use linear scan -- sub(t0, needle_len, 8); -- bltz(t0, LINEARSEARCH); -- // needle_len >= 256, use linear scan -- sub(t0, needle_len, 256); -- bgez(t0, LINEARSTUB); -- // needle_len >= haystack_len/4, use linear scan -- srli(t0, haystack_len, 2); -- bge(needle_len, t0, LINEARSTUB); -- -- // Boyer-Moore-Horspool introduction: -- // The Boyer Moore alogorithm is based on the description here:- -- // -- // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm -- // -- // This describes and algorithm with 2 shift rules. The 'Bad Character' rule -- // and the 'Good Suffix' rule. -- // -- // These rules are essentially heuristics for how far we can shift the -- // pattern along the search string. -- // -- // The implementation here uses the 'Bad Character' rule only because of the -- // complexity of initialisation for the 'Good Suffix' rule. -- // -- // This is also known as the Boyer-Moore-Horspool algorithm: -- // -- // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm -- // -- // #define ASIZE 256 -- // -- // int bm(unsigned char *pattern, int m, unsigned char *src, int n) { -- // int i, j; -- // unsigned c; -- // unsigned char bc[ASIZE]; -- // -- // /* Preprocessing */ -- // for (i = 0; i < ASIZE; ++i) -- // bc[i] = m; -- // for (i = 0; i < m - 1; ) { -- // c = pattern[i]; -- // ++i; -- // // c < 256 for Latin1 string, so, no need for branch -- // #ifdef PATTERN_STRING_IS_LATIN1 -- // bc[c] = m - i; -- // #else -- // if (c < ASIZE) bc[c] = m - i; -- // #endif -- // } -- // -- // /* Searching */ -- // j = 0; -- // while (j <= n - m) { -- // c = src[i+j]; -- // if (pattern[m-1] == c) -- // int k; -- // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); -- // if (k < 0) return j; -- // // c < 256 for Latin1 string, so, no need for branch -- // #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1 -- // // LL case: (c< 256) always true. Remove branch -- // j += bc[pattern[j+m-1]]; -- // #endif -- // #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF -- // // UU case: need if (c if not. -- // if (c < ASIZE) -- // j += bc[pattern[j+m-1]]; -- // else -- // j += m -- // #endif -- // } -- // return -1; -- // } -- -- // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result -- Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH, -- BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP; -- -- Register haystack_end = haystack_len; -- Register skipch = tmp2; -- -- // pattern length is >=8, so, we can read at least 1 register for cases when -- // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for -- // UL case. We'll re-read last character in inner pre-loop code to have -- // single outer pre-loop load -- const int firstStep = isLL ? 7 : 3; -- -- const int ASIZE = 256; -- const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd) -- -- sub(sp, sp, ASIZE); -- -- // init BC offset table with default value: needle_len -- slli(t0, needle_len, 8); -- orr(t0, t0, needle_len); // [63...16][needle_len][needle_len] -- slli(tmp1, t0, 16); -- orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len] -- slli(tmp1, t0, 32); -- orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len] -- -- mv(ch1, sp); // ch1 is t0 -- mv(tmp6, ASIZE / STORE_BYTES); // loop iterations -- -- bind(BM_INIT_LOOP); -- // for (i = 0; i < ASIZE; ++i) -- // bc[i] = m; -- for (int i = 0; i < 4; i++) { -- sd(tmp5, Address(ch1, i * wordSize)); -- } -- add(ch1, ch1, 32); -- sub(tmp6, tmp6, 4); -- bgtz(tmp6, BM_INIT_LOOP); -- -- sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern -- Register orig_haystack = tmp5; -- mv(orig_haystack, haystack); -- // result_tmp = tmp4 -- shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift); -- sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1 -- mv(tmp3, needle); -- -- // for (i = 0; i < m - 1; ) { -- // c = pattern[i]; -- // ++i; -- // // c < 256 for Latin1 string, so, no need for branch -- // #ifdef PATTERN_STRING_IS_LATIN1 -- // bc[c] = m - i; -- // #else -- // if (c < ASIZE) bc[c] = m - i; -- // #endif -- // } -- bind(BCLOOP); -- (this->*needle_load_1chr)(ch1, Address(tmp3), noreg); -- add(tmp3, tmp3, needle_chr_size); -- if (!needle_isL) { -- // ae == StrIntrinsicNode::UU -- mv(tmp6, ASIZE); -- bgeu(ch1, tmp6, BCSKIP); -- } -- add(tmp4, sp, ch1); -- sb(ch2, Address(tmp4)); // store skip offset to BC offset table -- -- bind(BCSKIP); -- sub(ch2, ch2, 1); // for next pattern element, skip distance -1 -- bgtz(ch2, BCLOOP); -- -- // tmp6: pattern end, address after needle -- shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift); -- if (needle_isL == haystack_isL) { -- // load last 8 bytes (8LL/4UU symbols) -- ld(tmp6, Address(tmp6, -wordSize)); -- } else { -- // UL: from UTF-16(source) search Latin1(pattern) -- lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols) -- // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d -- // We'll have to wait until load completed, but it's still faster than per-character loads+checks -- srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a -- slli(ch2, tmp6, XLEN - 24); -- srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b -- slli(ch1, tmp6, XLEN - 16); -- srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c -- andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d -- slli(ch2, ch2, 16); -- orr(ch2, ch2, ch1); // 0x00000b0c -- slli(result, tmp3, 48); // use result as temp register -- orr(tmp6, tmp6, result); // 0x0a00000d -- slli(result, ch2, 16); -- orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d -- } -- -- // i = m - 1; -- // skipch = j + i; -- // if (skipch == pattern[m - 1] -- // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); -- // else -- // move j with bad char offset table -- bind(BMLOOPSTR2); -- // compare pattern to source string backward -- shadd(result, nlen_tmp, haystack, result, haystack_chr_shift); -- (this->*haystack_load_1chr)(skipch, Address(result), noreg); -- sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8 -- if (needle_isL == haystack_isL) { -- // re-init tmp3. It's for free because it's executed in parallel with -- // load above. Alternative is to initialize it before loop, but it'll -- // affect performance on in-order systems with 2 or more ld/st pipelines -- srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1] -- } -- if (!isLL) { // UU/UL case -- slli(ch2, nlen_tmp, 1); // offsets in bytes -- } -- bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char -- add(result, haystack, isLL ? nlen_tmp : ch2); -- ld(ch2, Address(result)); // load 8 bytes from source string -- mv(ch1, tmp6); -- if (isLL) { -- j(BMLOOPSTR1_AFTER_LOAD); -- } else { -- sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8 -- j(BMLOOPSTR1_CMP); -- } -- -- bind(BMLOOPSTR1); -- shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift); -- (this->*needle_load_1chr)(ch1, Address(ch1), noreg); -- shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift); -- (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); -- -- bind(BMLOOPSTR1_AFTER_LOAD); -- sub(nlen_tmp, nlen_tmp, 1); -- bltz(nlen_tmp, BMLOOPSTR1_LASTCMP); -- -- bind(BMLOOPSTR1_CMP); -- beq(ch1, ch2, BMLOOPSTR1); -- -- bind(BMSKIP); -- if (!isLL) { -- // if we've met UTF symbol while searching Latin1 pattern, then we can -- // skip needle_len symbols -- if (needle_isL != haystack_isL) { -- mv(result_tmp, needle_len); -- } else { -- mv(result_tmp, 1); -- } -- mv(t0, ASIZE); -- bgeu(skipch, t0, BMADV); -- } -- add(result_tmp, sp, skipch); -- lbu(result_tmp, Address(result_tmp)); // load skip offset -- -- bind(BMADV); -- sub(nlen_tmp, needle_len, 1); -- // move haystack after bad char skip offset -- shadd(haystack, result_tmp, haystack, result, haystack_chr_shift); -- ble(haystack, haystack_end, BMLOOPSTR2); -- add(sp, sp, ASIZE); -- j(NOMATCH); -- -- bind(BMLOOPSTR1_LASTCMP); -- bne(ch1, ch2, BMSKIP); -- -- bind(BMMATCH); -- sub(result, haystack, orig_haystack); -- if (!haystack_isL) { -- srli(result, result, 1); -- } -- add(sp, sp, ASIZE); -- j(DONE); -- -- bind(LINEARSTUB); -- sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm -- bltz(t0, LINEARSEARCH); -- mv(result, zr); -- RuntimeAddress stub = NULL; -- if (isLL) { -- stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll()); -- assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated"); -- } else if (needle_isL) { -- stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul()); -- assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated"); -- } else { -- stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu()); -- assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated"); -- } -- trampoline_call(stub); -- j(DONE); -- -- bind(NOMATCH); -- mv(result, -1); -- j(DONE); -- -- bind(LINEARSEARCH); -- string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae); -- -- bind(DONE); -- BLOCK_COMMENT("} string_indexof"); --} -- --// string_indexof --// result: x10 --// src: x11 --// src_count: x12 --// pattern: x13 --// pattern_count: x14 or 1/2/3/4 --void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register needle, -- Register haystack_len, Register needle_len, -- Register tmp1, Register tmp2, -- Register tmp3, Register tmp4, -- int needle_con_cnt, Register result, int ae) --{ -- // Note: -- // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant -- // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1 -- assert(needle_con_cnt <= 4, "Invalid needle constant count"); -- assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); -- -- Register ch1 = t0; -- Register ch2 = t1; -- Register hlen_neg = haystack_len, nlen_neg = needle_len; -- Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4; -- -- bool isLL = ae == StrIntrinsicNode::LL; -- -- bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; -- bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; -- int needle_chr_shift = needle_isL ? 0 : 1; -- int haystack_chr_shift = haystack_isL ? 0 : 1; -- int needle_chr_size = needle_isL ? 1 : 2; -- int haystack_chr_size = haystack_isL ? 1 : 2; -- -- load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : -- (load_chr_insn)&MacroAssembler::lhu; -- load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : -- (load_chr_insn)&MacroAssembler::lhu; -- load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu; -- load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld; -- -- Label DO1, DO2, DO3, MATCH, NOMATCH, DONE; -- -- Register first = tmp3; -- -- if (needle_con_cnt == -1) { -- Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT; -- -- sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2); -- bltz(t0, DOSHORT); -- -- (this->*needle_load_1chr)(first, Address(needle), noreg); -- slli(t0, needle_len, needle_chr_shift); -- add(needle, needle, t0); -- neg(nlen_neg, t0); -- slli(t0, result_tmp, haystack_chr_shift); -- add(haystack, haystack, t0); -- neg(hlen_neg, t0); -- -- bind(FIRST_LOOP); -- add(t0, haystack, hlen_neg); -- (this->*haystack_load_1chr)(ch2, Address(t0), noreg); -- beq(first, ch2, STR1_LOOP); -- -- bind(STR2_NEXT); -- add(hlen_neg, hlen_neg, haystack_chr_size); -- blez(hlen_neg, FIRST_LOOP); -- j(NOMATCH); -- -- bind(STR1_LOOP); -- add(nlen_tmp, nlen_neg, needle_chr_size); -- add(hlen_tmp, hlen_neg, haystack_chr_size); -- bgez(nlen_tmp, MATCH); -- -- bind(STR1_NEXT); -- add(ch1, needle, nlen_tmp); -- (this->*needle_load_1chr)(ch1, Address(ch1), noreg); -- add(ch2, haystack, hlen_tmp); -- (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); -- bne(ch1, ch2, STR2_NEXT); -- add(nlen_tmp, nlen_tmp, needle_chr_size); -- add(hlen_tmp, hlen_tmp, haystack_chr_size); -- bltz(nlen_tmp, STR1_NEXT); -- j(MATCH); -- -- bind(DOSHORT); -- if (needle_isL == haystack_isL) { -- sub(t0, needle_len, 2); -- bltz(t0, DO1); -- bgtz(t0, DO3); -- } -- } -- -- if (needle_con_cnt == 4) { -- Label CH1_LOOP; -- (this->*load_4chr)(ch1, Address(needle), noreg); -- sub(result_tmp, haystack_len, 4); -- slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp -- add(haystack, haystack, tmp3); -- neg(hlen_neg, tmp3); -- -- bind(CH1_LOOP); -- add(ch2, haystack, hlen_neg); -- (this->*load_4chr)(ch2, Address(ch2), noreg); -- beq(ch1, ch2, MATCH); -- add(hlen_neg, hlen_neg, haystack_chr_size); -- blez(hlen_neg, CH1_LOOP); -- j(NOMATCH); -- } -- -- if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) { -- Label CH1_LOOP; -- BLOCK_COMMENT("string_indexof DO2 {"); -- bind(DO2); -- (this->*load_2chr)(ch1, Address(needle), noreg); -- if (needle_con_cnt == 2) { -- sub(result_tmp, haystack_len, 2); -- } -- slli(tmp3, result_tmp, haystack_chr_shift); -- add(haystack, haystack, tmp3); -- neg(hlen_neg, tmp3); -- -- bind(CH1_LOOP); -- add(tmp3, haystack, hlen_neg); -- (this->*load_2chr)(ch2, Address(tmp3), noreg); -- beq(ch1, ch2, MATCH); -- add(hlen_neg, hlen_neg, haystack_chr_size); -- blez(hlen_neg, CH1_LOOP); -- j(NOMATCH); -- BLOCK_COMMENT("} string_indexof DO2"); -- } -- -- if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) { -- Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; -- BLOCK_COMMENT("string_indexof DO3 {"); -- -- bind(DO3); -- (this->*load_2chr)(first, Address(needle), noreg); -- (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg); -- if (needle_con_cnt == 3) { -- sub(result_tmp, haystack_len, 3); -- } -- slli(hlen_tmp, result_tmp, haystack_chr_shift); -- add(haystack, haystack, hlen_tmp); -- neg(hlen_neg, hlen_tmp); -- -- bind(FIRST_LOOP); -- add(ch2, haystack, hlen_neg); -- (this->*load_2chr)(ch2, Address(ch2), noreg); -- beq(first, ch2, STR1_LOOP); -- -- bind(STR2_NEXT); -- add(hlen_neg, hlen_neg, haystack_chr_size); -- blez(hlen_neg, FIRST_LOOP); -- j(NOMATCH); -- -- bind(STR1_LOOP); -- add(hlen_tmp, hlen_neg, 2 * haystack_chr_size); -- add(ch2, haystack, hlen_tmp); -- (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); -- bne(ch1, ch2, STR2_NEXT); -- j(MATCH); -- BLOCK_COMMENT("} string_indexof DO3"); -- } -- -- if (needle_con_cnt == -1 || needle_con_cnt == 1) { -- Label DO1_LOOP; -- -- BLOCK_COMMENT("string_indexof DO1 {"); -- bind(DO1); -- (this->*needle_load_1chr)(ch1, Address(needle), noreg); -- sub(result_tmp, haystack_len, 1); -- mv(tmp3, result_tmp); -- if (haystack_chr_shift) { -- slli(tmp3, result_tmp, haystack_chr_shift); -- } -- add(haystack, haystack, tmp3); -- neg(hlen_neg, tmp3); -- -- bind(DO1_LOOP); -- add(tmp3, haystack, hlen_neg); -- (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg); -- beq(ch1, ch2, MATCH); -- add(hlen_neg, hlen_neg, haystack_chr_size); -- blez(hlen_neg, DO1_LOOP); -- BLOCK_COMMENT("} string_indexof DO1"); -- } -- -- bind(NOMATCH); -- mv(result, -1); -- j(DONE); -- -- bind(MATCH); -- srai(t0, hlen_neg, haystack_chr_shift); -- add(result, result_tmp, t0); -- -- bind(DONE); --} -- --// Compare strings. --void C2_MacroAssembler::string_compare(Register str1, Register str2, -- Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, -- Register tmp3, int ae) --{ -- Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB, -- DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT, -- SHORT_LOOP_START, TAIL_CHECK, L; -- -- const int STUB_THRESHOLD = 64 + 8; -- bool isLL = ae == StrIntrinsicNode::LL; -- bool isLU = ae == StrIntrinsicNode::LU; -- bool isUL = ae == StrIntrinsicNode::UL; -- -- bool str1_isL = isLL || isLU; -- bool str2_isL = isLL || isUL; -- -- // for L strings, 1 byte for 1 character -- // for U strings, 2 bytes for 1 character -- int str1_chr_size = str1_isL ? 1 : 2; -- int str2_chr_size = str2_isL ? 1 : 2; -- int minCharsInWord = isLL ? wordSize : wordSize / 2; -- -- load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; -- load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; -- -- BLOCK_COMMENT("string_compare {"); -- -- // Bizzarely, the counts are passed in bytes, regardless of whether they -- // are L or U strings, however the result is always in characters. -- if (!str1_isL) { -- sraiw(cnt1, cnt1, 1); -- } -- if (!str2_isL) { -- sraiw(cnt2, cnt2, 1); -- } -- -- // Compute the minimum of the string lengths and save the difference in result. -- sub(result, cnt1, cnt2); -- bgt(cnt1, cnt2, L); -- mv(cnt2, cnt1); -- bind(L); -- -- // A very short string -- li(t0, minCharsInWord); -- ble(cnt2, t0, SHORT_STRING); -- -- // Compare longwords -- // load first parts of strings and finish initialization while loading -- { -- if (str1_isL == str2_isL) { // LL or UU -- // load 8 bytes once to compare -- ld(tmp1, Address(str1)); -- beq(str1, str2, DONE); -- ld(tmp2, Address(str2)); -- li(t0, STUB_THRESHOLD); -- bge(cnt2, t0, STUB); -- sub(cnt2, cnt2, minCharsInWord); -- beqz(cnt2, TAIL_CHECK); -- // convert cnt2 from characters to bytes -- if (!str1_isL) { -- slli(cnt2, cnt2, 1); -- } -- add(str2, str2, cnt2); -- add(str1, str1, cnt2); -- sub(cnt2, zr, cnt2); -- } else if (isLU) { // LU case -- lwu(tmp1, Address(str1)); -- ld(tmp2, Address(str2)); -- li(t0, STUB_THRESHOLD); -- bge(cnt2, t0, STUB); -- addi(cnt2, cnt2, -4); -- add(str1, str1, cnt2); -- sub(cnt1, zr, cnt2); -- slli(cnt2, cnt2, 1); -- add(str2, str2, cnt2); -- inflate_lo32(tmp3, tmp1); -- mv(tmp1, tmp3); -- sub(cnt2, zr, cnt2); -- addi(cnt1, cnt1, 4); -- } else { // UL case -- ld(tmp1, Address(str1)); -- lwu(tmp2, Address(str2)); -- li(t0, STUB_THRESHOLD); -- bge(cnt2, t0, STUB); -- addi(cnt2, cnt2, -4); -- slli(t0, cnt2, 1); -- sub(cnt1, zr, t0); -- add(str1, str1, t0); -- add(str2, str2, cnt2); -- inflate_lo32(tmp3, tmp2); -- mv(tmp2, tmp3); -- sub(cnt2, zr, cnt2); -- addi(cnt1, cnt1, 8); -- } -- addi(cnt2, cnt2, isUL ? 4 : 8); -- bgez(cnt2, TAIL); -- xorr(tmp3, tmp1, tmp2); -- bnez(tmp3, DIFFERENCE); -- -- // main loop -- bind(NEXT_WORD); -- if (str1_isL == str2_isL) { // LL or UU -- add(t0, str1, cnt2); -- ld(tmp1, Address(t0)); -- add(t0, str2, cnt2); -- ld(tmp2, Address(t0)); -- addi(cnt2, cnt2, 8); -- } else if (isLU) { // LU case -- add(t0, str1, cnt1); -- lwu(tmp1, Address(t0)); -- add(t0, str2, cnt2); -- ld(tmp2, Address(t0)); -- addi(cnt1, cnt1, 4); -- inflate_lo32(tmp3, tmp1); -- mv(tmp1, tmp3); -- addi(cnt2, cnt2, 8); -- } else { // UL case -- add(t0, str2, cnt2); -- lwu(tmp2, Address(t0)); -- add(t0, str1, cnt1); -- ld(tmp1, Address(t0)); -- inflate_lo32(tmp3, tmp2); -- mv(tmp2, tmp3); -- addi(cnt1, cnt1, 8); -- addi(cnt2, cnt2, 4); -- } -- bgez(cnt2, TAIL); -- -- xorr(tmp3, tmp1, tmp2); -- beqz(tmp3, NEXT_WORD); -- j(DIFFERENCE); -- bind(TAIL); -- xorr(tmp3, tmp1, tmp2); -- bnez(tmp3, DIFFERENCE); -- // Last longword. In the case where length == 4 we compare the -- // same longword twice, but that's still faster than another -- // conditional branch. -- if (str1_isL == str2_isL) { // LL or UU -- ld(tmp1, Address(str1)); -- ld(tmp2, Address(str2)); -- } else if (isLU) { // LU case -- lwu(tmp1, Address(str1)); -- ld(tmp2, Address(str2)); -- inflate_lo32(tmp3, tmp1); -- mv(tmp1, tmp3); -- } else { // UL case -- lwu(tmp2, Address(str2)); -- ld(tmp1, Address(str1)); -- inflate_lo32(tmp3, tmp2); -- mv(tmp2, tmp3); -- } -- bind(TAIL_CHECK); -- xorr(tmp3, tmp1, tmp2); -- beqz(tmp3, DONE); -- -- // Find the first different characters in the longwords and -- // compute their difference. -- bind(DIFFERENCE); -- ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb -- srl(tmp1, tmp1, result); -- srl(tmp2, tmp2, result); -- if (isLL) { -- andi(tmp1, tmp1, 0xFF); -- andi(tmp2, tmp2, 0xFF); -- } else { -- andi(tmp1, tmp1, 0xFFFF); -- andi(tmp2, tmp2, 0xFFFF); -- } -- sub(result, tmp1, tmp2); -- j(DONE); -- } -- -- bind(STUB); -- RuntimeAddress stub = NULL; -- switch (ae) { -- case StrIntrinsicNode::LL: -- stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL()); -- break; -- case StrIntrinsicNode::UU: -- stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU()); -- break; -- case StrIntrinsicNode::LU: -- stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU()); -- break; -- case StrIntrinsicNode::UL: -- stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL()); -- break; -- default: -- ShouldNotReachHere(); -- } -- assert(stub.target() != NULL, "compare_long_string stub has not been generated"); -- trampoline_call(stub); -- j(DONE); -- -- bind(SHORT_STRING); -- // Is the minimum length zero? -- beqz(cnt2, DONE); -- // arrange code to do most branches while loading and loading next characters -- // while comparing previous -- (this->*str1_load_chr)(tmp1, Address(str1), t0); -- addi(str1, str1, str1_chr_size); -- addi(cnt2, cnt2, -1); -- beqz(cnt2, SHORT_LAST_INIT); -- (this->*str2_load_chr)(cnt1, Address(str2), t0); -- addi(str2, str2, str2_chr_size); -- j(SHORT_LOOP_START); -- bind(SHORT_LOOP); -- addi(cnt2, cnt2, -1); -- beqz(cnt2, SHORT_LAST); -- bind(SHORT_LOOP_START); -- (this->*str1_load_chr)(tmp2, Address(str1), t0); -- addi(str1, str1, str1_chr_size); -- (this->*str2_load_chr)(t0, Address(str2), t0); -- addi(str2, str2, str2_chr_size); -- bne(tmp1, cnt1, SHORT_LOOP_TAIL); -- addi(cnt2, cnt2, -1); -- beqz(cnt2, SHORT_LAST2); -- (this->*str1_load_chr)(tmp1, Address(str1), t0); -- addi(str1, str1, str1_chr_size); -- (this->*str2_load_chr)(cnt1, Address(str2), t0); -- addi(str2, str2, str2_chr_size); -- beq(tmp2, t0, SHORT_LOOP); -- sub(result, tmp2, t0); -- j(DONE); -- bind(SHORT_LOOP_TAIL); -- sub(result, tmp1, cnt1); -- j(DONE); -- bind(SHORT_LAST2); -- beq(tmp2, t0, DONE); -- sub(result, tmp2, t0); -- -- j(DONE); -- bind(SHORT_LAST_INIT); -- (this->*str2_load_chr)(cnt1, Address(str2), t0); -- addi(str2, str2, str2_chr_size); -- bind(SHORT_LAST); -- beq(tmp1, cnt1, DONE); -- sub(result, tmp1, cnt1); -- -- bind(DONE); -- -- BLOCK_COMMENT("} string_compare"); --} -- --void C2_MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3, -- Register tmp4, Register tmp5, Register tmp6, Register result, -- Register cnt1, int elem_size) { -- Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR; -- Register tmp1 = t0; -- Register tmp2 = t1; -- Register cnt2 = tmp2; // cnt2 only used in array length compare -- Register elem_per_word = tmp6; -- int log_elem_size = exact_log2(elem_size); -- int length_offset = arrayOopDesc::length_offset_in_bytes(); -- int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); -- -- assert(elem_size == 1 || elem_size == 2, "must be char or byte"); -- assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6); -- li(elem_per_word, wordSize / elem_size); -- -- BLOCK_COMMENT("arrays_equals {"); -- -- // if (a1 == a2), return true -- beq(a1, a2, SAME); -- -- mv(result, false); -- beqz(a1, DONE); -- beqz(a2, DONE); -- lwu(cnt1, Address(a1, length_offset)); -- lwu(cnt2, Address(a2, length_offset)); -- bne(cnt2, cnt1, DONE); -- beqz(cnt1, SAME); -- -- slli(tmp5, cnt1, 3 + log_elem_size); -- sub(tmp5, zr, tmp5); -- add(a1, a1, base_offset); -- add(a2, a2, base_offset); -- ld(tmp3, Address(a1, 0)); -- ld(tmp4, Address(a2, 0)); -- ble(cnt1, elem_per_word, SHORT); // short or same -- -- // Main 16 byte comparison loop with 2 exits -- bind(NEXT_DWORD); { -- ld(tmp1, Address(a1, wordSize)); -- ld(tmp2, Address(a2, wordSize)); -- sub(cnt1, cnt1, 2 * wordSize / elem_size); -- blez(cnt1, TAIL); -- bne(tmp3, tmp4, DONE); -- ld(tmp3, Address(a1, 2 * wordSize)); -- ld(tmp4, Address(a2, 2 * wordSize)); -- add(a1, a1, 2 * wordSize); -- add(a2, a2, 2 * wordSize); -- ble(cnt1, elem_per_word, TAIL2); -- } beq(tmp1, tmp2, NEXT_DWORD); -- j(DONE); -- -- bind(TAIL); -- xorr(tmp4, tmp3, tmp4); -- xorr(tmp2, tmp1, tmp2); -- sll(tmp2, tmp2, tmp5); -- orr(tmp5, tmp4, tmp2); -- j(IS_TMP5_ZR); -- -- bind(TAIL2); -- bne(tmp1, tmp2, DONE); -- -- bind(SHORT); -- xorr(tmp4, tmp3, tmp4); -- sll(tmp5, tmp4, tmp5); -- -- bind(IS_TMP5_ZR); -- bnez(tmp5, DONE); -- -- bind(SAME); -- mv(result, true); -- // That's it. -- bind(DONE); -- -- BLOCK_COMMENT("} array_equals"); --} -- --// Compare Strings -- --// For Strings we're passed the address of the first characters in a1 --// and a2 and the length in cnt1. --// elem_size is the element size in bytes: either 1 or 2. --// There are two implementations. For arrays >= 8 bytes, all --// comparisons (including the final one, which may overlap) are --// performed 8 bytes at a time. For strings < 8 bytes, we compare a --// halfword, then a short, and then a byte. -- --void C2_MacroAssembler::string_equals(Register a1, Register a2, -- Register result, Register cnt1, int elem_size) --{ -- Label SAME, DONE, SHORT, NEXT_WORD; -- Register tmp1 = t0; -- Register tmp2 = t1; -- -- assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte"); -- assert_different_registers(a1, a2, result, cnt1, t0, t1); -- -- BLOCK_COMMENT("string_equals {"); -- -- mv(result, false); -- -- // Check for short strings, i.e. smaller than wordSize. -- sub(cnt1, cnt1, wordSize); -- bltz(cnt1, SHORT); -- -- // Main 8 byte comparison loop. -- bind(NEXT_WORD); { -- ld(tmp1, Address(a1, 0)); -- add(a1, a1, wordSize); -- ld(tmp2, Address(a2, 0)); -- add(a2, a2, wordSize); -- sub(cnt1, cnt1, wordSize); -- bne(tmp1, tmp2, DONE); -- } bgtz(cnt1, NEXT_WORD); -- -- // Last longword. In the case where length == 4 we compare the -- // same longword twice, but that's still faster than another -- // conditional branch. -- // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when -- // length == 4. -- add(tmp1, a1, cnt1); -- ld(tmp1, Address(tmp1, 0)); -- add(tmp2, a2, cnt1); -- ld(tmp2, Address(tmp2, 0)); -- bne(tmp1, tmp2, DONE); -- j(SAME); -- -- bind(SHORT); -- Label TAIL03, TAIL01; -- -- // 0-7 bytes left. -- andi(t0, cnt1, 4); -- beqz(t0, TAIL03); -- { -- lwu(tmp1, Address(a1, 0)); -- add(a1, a1, 4); -- lwu(tmp2, Address(a2, 0)); -- add(a2, a2, 4); -- bne(tmp1, tmp2, DONE); -- } -- -- bind(TAIL03); -- // 0-3 bytes left. -- andi(t0, cnt1, 2); -- beqz(t0, TAIL01); -- { -- lhu(tmp1, Address(a1, 0)); -- add(a1, a1, 2); -- lhu(tmp2, Address(a2, 0)); -- add(a2, a2, 2); -- bne(tmp1, tmp2, DONE); -- } -- -- bind(TAIL01); -- if (elem_size == 1) { // Only needed when comparing 1-byte elements -- // 0-1 bytes left. -- andi(t0, cnt1, 1); -- beqz(t0, SAME); -- { -- lbu(tmp1, a1, 0); -- lbu(tmp2, a2, 0); -- bne(tmp1, tmp2, DONE); -- } -- } -- -- // Arrays are equal. -- bind(SAME); -- mv(result, true); -- -- // That's it. -- bind(DONE); -- BLOCK_COMMENT("} string_equals"); --} -- --typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far); --typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label, -- bool is_far, bool is_unordered); -- --static conditional_branch_insn conditional_branches[] = --{ -- /* SHORT branches */ -- (conditional_branch_insn)&Assembler::beq, -- (conditional_branch_insn)&Assembler::bgt, -- NULL, // BoolTest::overflow -- (conditional_branch_insn)&Assembler::blt, -- (conditional_branch_insn)&Assembler::bne, -- (conditional_branch_insn)&Assembler::ble, -- NULL, // BoolTest::no_overflow -- (conditional_branch_insn)&Assembler::bge, -- -- /* UNSIGNED branches */ -- (conditional_branch_insn)&Assembler::beq, -- (conditional_branch_insn)&Assembler::bgtu, -- NULL, -- (conditional_branch_insn)&Assembler::bltu, -- (conditional_branch_insn)&Assembler::bne, -- (conditional_branch_insn)&Assembler::bleu, -- NULL, -- (conditional_branch_insn)&Assembler::bgeu --}; -- --static float_conditional_branch_insn float_conditional_branches[] = --{ -- /* FLOAT SHORT branches */ -- (float_conditional_branch_insn)&MacroAssembler::float_beq, -- (float_conditional_branch_insn)&MacroAssembler::float_bgt, -- NULL, // BoolTest::overflow -- (float_conditional_branch_insn)&MacroAssembler::float_blt, -- (float_conditional_branch_insn)&MacroAssembler::float_bne, -- (float_conditional_branch_insn)&MacroAssembler::float_ble, -- NULL, // BoolTest::no_overflow -- (float_conditional_branch_insn)&MacroAssembler::float_bge, -- -- /* DOUBLE SHORT branches */ -- (float_conditional_branch_insn)&MacroAssembler::double_beq, -- (float_conditional_branch_insn)&MacroAssembler::double_bgt, -- NULL, -- (float_conditional_branch_insn)&MacroAssembler::double_blt, -- (float_conditional_branch_insn)&MacroAssembler::double_bne, -- (float_conditional_branch_insn)&MacroAssembler::double_ble, -- NULL, -- (float_conditional_branch_insn)&MacroAssembler::double_bge --}; -- --void C2_MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) { -- assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])), -- "invalid conditional branch index"); -- (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far); --} -- --// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use --// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode(). --void C2_MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) { -- assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])), -- "invalid float conditional branch index"); -- int booltest_flag = cmpFlag & ~(C2_MacroAssembler::double_branch_mask); -- (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far, -- (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true); --} -- --void C2_MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { -- switch (cmpFlag) { -- case BoolTest::eq: -- case BoolTest::le: -- beqz(op1, L, is_far); -- break; -- case BoolTest::ne: -- case BoolTest::gt: -- bnez(op1, L, is_far); -- break; -- default: -- ShouldNotReachHere(); -- } --} -- --void C2_MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { -- switch (cmpFlag) { -- case BoolTest::eq: -- beqz(op1, L, is_far); -- break; -- case BoolTest::ne: -- bnez(op1, L, is_far); -- break; -- default: -- ShouldNotReachHere(); -- } --} -- --void C2_MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) { -- Label L; -- cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L); -- mv(dst, src); -- bind(L); --} -- --// Set dst to NaN if any NaN input. --void C2_MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2, -- bool is_double, bool is_min) { -- assert_different_registers(dst, src1, src2); -- -- Label Done; -- fsflags(zr); -- if (is_double) { -- is_min ? fmin_d(dst, src1, src2) -- : fmax_d(dst, src1, src2); -- // Checking NaNs -- flt_d(zr, src1, src2); -- } else { -- is_min ? fmin_s(dst, src1, src2) -- : fmax_s(dst, src1, src2); -- // Checking NaNs -- flt_s(zr, src1, src2); -- } -- -- frflags(t0); -- beqz(t0, Done); -- -- // In case of NaNs -- is_double ? fadd_d(dst, src1, src2) -- : fadd_s(dst, src1, src2); -- -- bind(Done); --} -diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp -deleted file mode 100644 -index 90b6554af02..00000000000 ---- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp -+++ /dev/null -@@ -1,141 +0,0 @@ --/* -- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. -- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -- * -- * This code is free software; you can redistribute it and/or modify it -- * under the terms of the GNU General Public License version 2 only, as -- * published by the Free Software Foundation. -- * -- * This code is distributed in the hope that it will be useful, but WITHOUT -- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- * version 2 for more details (a copy is included in the LICENSE file that -- * accompanied this code). -- * -- * You should have received a copy of the GNU General Public License version -- * 2 along with this work; if not, write to the Free Software Foundation, -- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -- * -- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -- * or visit www.oracle.com if you need additional information or have any -- * questions. -- * -- */ -- --#ifndef CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP --#define CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP -- --// C2_MacroAssembler contains high-level macros for C2 -- -- public: -- -- void string_compare(Register str1, Register str2, -- Register cnt1, Register cnt2, Register result, -- Register tmp1, Register tmp2, Register tmp3, -- int ae); -- -- void string_indexof_char_short(Register str1, Register cnt1, -- Register ch, Register result, -- bool isL); -- -- void string_indexof_char(Register str1, Register cnt1, -- Register ch, Register result, -- Register tmp1, Register tmp2, -- Register tmp3, Register tmp4, -- bool isL); -- -- void string_indexof(Register str1, Register str2, -- Register cnt1, Register cnt2, -- Register tmp1, Register tmp2, -- Register tmp3, Register tmp4, -- Register tmp5, Register tmp6, -- Register result, int ae); -- -- void string_indexof_linearscan(Register haystack, Register needle, -- Register haystack_len, Register needle_len, -- Register tmp1, Register tmp2, -- Register tmp3, Register tmp4, -- int needle_con_cnt, Register result, int ae); -- -- void arrays_equals(Register r1, Register r2, -- Register tmp3, Register tmp4, -- Register tmp5, Register tmp6, -- Register result, Register cnt1, -- int elem_size); -- -- void string_equals(Register r1, Register r2, -- Register result, Register cnt1, -- int elem_size); -- -- // refer to conditional_branches and float_conditional_branches -- static const int bool_test_bits = 3; -- static const int neg_cond_bits = 2; -- static const int unsigned_branch_mask = 1 << bool_test_bits; -- static const int double_branch_mask = 1 << bool_test_bits; -- -- // cmp -- void cmp_branch(int cmpFlag, -- Register op1, Register op2, -- Label& label, bool is_far = false); -- -- void float_cmp_branch(int cmpFlag, -- FloatRegister op1, FloatRegister op2, -- Label& label, bool is_far = false); -- -- void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op, -- Label& L, bool is_far = false); -- -- void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op, -- Label& L, bool is_far = false); -- -- void enc_cmove(int cmpFlag, -- Register op1, Register op2, -- Register dst, Register src); -- -- void spill(Register r, bool is64, int offset) { -- is64 ? sd(r, Address(sp, offset)) -- : sw(r, Address(sp, offset)); -- } -- -- void spill(FloatRegister f, bool is64, int offset) { -- is64 ? fsd(f, Address(sp, offset)) -- : fsw(f, Address(sp, offset)); -- } -- -- void spill(VectorRegister v, int offset) { -- add(t0, sp, offset); -- vs1r_v(v, t0); -- } -- -- void unspill(Register r, bool is64, int offset) { -- is64 ? ld(r, Address(sp, offset)) -- : lw(r, Address(sp, offset)); -- } -- -- void unspillu(Register r, bool is64, int offset) { -- is64 ? ld(r, Address(sp, offset)) -- : lwu(r, Address(sp, offset)); -- } -- -- void unspill(FloatRegister f, bool is64, int offset) { -- is64 ? fld(f, Address(sp, offset)) -- : flw(f, Address(sp, offset)); -- } -- -- void unspill(VectorRegister v, int offset) { -- add(t0, sp, offset); -- vl1r_v(v, t0); -- } -- -- void spill_copy_vector_stack_to_stack(int src_offset, int dst_offset, int vec_reg_size_in_bytes) { -- assert(vec_reg_size_in_bytes % 16 == 0, "unexpected vector reg size"); -- unspill(v0, src_offset); -- spill(v0, dst_offset); -- } -- -- void minmax_FD(FloatRegister dst, -- FloatRegister src1, FloatRegister src2, -- bool is_double, bool is_min); -- --#endif // CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -index 304b6f2b06c..d175a62aeeb 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -@@ -4125,3 +4125,1285 @@ void MacroAssembler::safepoint_ifence() { - ifence(); - } - -+#ifdef COMPILER2 -+// short string -+// StringUTF16.indexOfChar -+// StringLatin1.indexOfChar -+void MacroAssembler::string_indexof_char_short(Register str1, Register cnt1, -+ Register ch, Register result, -+ bool isL) -+{ -+ Register ch1 = t0; -+ Register index = t1; -+ -+ BLOCK_COMMENT("string_indexof_char_short {"); -+ -+ Label LOOP, LOOP1, LOOP4, LOOP8; -+ Label MATCH, MATCH1, MATCH2, MATCH3, -+ MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH; -+ -+ mv(result, -1); -+ mv(index, zr); -+ -+ bind(LOOP); -+ addi(t0, index, 8); -+ ble(t0, cnt1, LOOP8); -+ addi(t0, index, 4); -+ ble(t0, cnt1, LOOP4); -+ j(LOOP1); -+ -+ bind(LOOP8); -+ isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); -+ beq(ch, ch1, MATCH); -+ isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); -+ beq(ch, ch1, MATCH1); -+ isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); -+ beq(ch, ch1, MATCH2); -+ isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); -+ beq(ch, ch1, MATCH3); -+ isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8)); -+ beq(ch, ch1, MATCH4); -+ isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10)); -+ beq(ch, ch1, MATCH5); -+ isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12)); -+ beq(ch, ch1, MATCH6); -+ isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14)); -+ beq(ch, ch1, MATCH7); -+ addi(index, index, 8); -+ addi(str1, str1, isL ? 8 : 16); -+ blt(index, cnt1, LOOP); -+ j(NOMATCH); -+ -+ bind(LOOP4); -+ isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); -+ beq(ch, ch1, MATCH); -+ isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); -+ beq(ch, ch1, MATCH1); -+ isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); -+ beq(ch, ch1, MATCH2); -+ isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); -+ beq(ch, ch1, MATCH3); -+ addi(index, index, 4); -+ addi(str1, str1, isL ? 4 : 8); -+ bge(index, cnt1, NOMATCH); -+ -+ bind(LOOP1); -+ isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1)); -+ beq(ch, ch1, MATCH); -+ addi(index, index, 1); -+ addi(str1, str1, isL ? 1 : 2); -+ blt(index, cnt1, LOOP1); -+ j(NOMATCH); -+ -+ bind(MATCH1); -+ addi(index, index, 1); -+ j(MATCH); -+ -+ bind(MATCH2); -+ addi(index, index, 2); -+ j(MATCH); -+ -+ bind(MATCH3); -+ addi(index, index, 3); -+ j(MATCH); -+ -+ bind(MATCH4); -+ addi(index, index, 4); -+ j(MATCH); -+ -+ bind(MATCH5); -+ addi(index, index, 5); -+ j(MATCH); -+ -+ bind(MATCH6); -+ addi(index, index, 6); -+ j(MATCH); -+ -+ bind(MATCH7); -+ addi(index, index, 7); -+ -+ bind(MATCH); -+ mv(result, index); -+ bind(NOMATCH); -+ BLOCK_COMMENT("} string_indexof_char_short"); -+} -+ -+// StringUTF16.indexOfChar -+// StringLatin1.indexOfChar -+void MacroAssembler::string_indexof_char(Register str1, Register cnt1, -+ Register ch, Register result, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ bool isL) -+{ -+ Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG; -+ Register ch1 = t0; -+ Register orig_cnt = t1; -+ Register mask1 = tmp3; -+ Register mask2 = tmp2; -+ Register match_mask = tmp1; -+ Register trailing_char = tmp4; -+ Register unaligned_elems = tmp4; -+ -+ BLOCK_COMMENT("string_indexof_char {"); -+ beqz(cnt1, NOMATCH); -+ -+ addi(t0, cnt1, isL ? -32 : -16); -+ bgtz(t0, DO_LONG); -+ string_indexof_char_short(str1, cnt1, ch, result, isL); -+ j(DONE); -+ -+ bind(DO_LONG); -+ mv(orig_cnt, cnt1); -+ if (AvoidUnalignedAccesses) { -+ Label ALIGNED; -+ andi(unaligned_elems, str1, 0x7); -+ beqz(unaligned_elems, ALIGNED); -+ sub(unaligned_elems, unaligned_elems, 8); -+ neg(unaligned_elems, unaligned_elems); -+ if (!isL) { -+ srli(unaligned_elems, unaligned_elems, 1); -+ } -+ // do unaligned part per element -+ string_indexof_char_short(str1, unaligned_elems, ch, result, isL); -+ bgez(result, DONE); -+ mv(orig_cnt, cnt1); -+ sub(cnt1, cnt1, unaligned_elems); -+ bind(ALIGNED); -+ } -+ -+ // duplicate ch -+ if (isL) { -+ slli(ch1, ch, 8); -+ orr(ch, ch1, ch); -+ } -+ slli(ch1, ch, 16); -+ orr(ch, ch1, ch); -+ slli(ch1, ch, 32); -+ orr(ch, ch1, ch); -+ -+ if (!isL) { -+ slli(cnt1, cnt1, 1); -+ } -+ -+ uint64_t mask0101 = UCONST64(0x0101010101010101); -+ uint64_t mask0001 = UCONST64(0x0001000100010001); -+ mv(mask1, isL ? mask0101 : mask0001); -+ uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f); -+ uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff); -+ mv(mask2, isL ? mask7f7f : mask7fff); -+ -+ bind(CH1_LOOP); -+ ld(ch1, Address(str1)); -+ addi(str1, str1, 8); -+ addi(cnt1, cnt1, -8); -+ compute_match_mask(ch1, ch, match_mask, mask1, mask2); -+ bnez(match_mask, HIT); -+ bgtz(cnt1, CH1_LOOP); -+ j(NOMATCH); -+ -+ bind(HIT); -+ ctzc_bit(trailing_char, match_mask, isL, ch1, result); -+ srli(trailing_char, trailing_char, 3); -+ addi(cnt1, cnt1, 8); -+ ble(cnt1, trailing_char, NOMATCH); -+ // match case -+ if (!isL) { -+ srli(cnt1, cnt1, 1); -+ srli(trailing_char, trailing_char, 1); -+ } -+ -+ sub(result, orig_cnt, cnt1); -+ add(result, result, trailing_char); -+ j(DONE); -+ -+ bind(NOMATCH); -+ mv(result, -1); -+ -+ bind(DONE); -+ BLOCK_COMMENT("} string_indexof_char"); -+} -+ -+typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp); -+ -+// Search for needle in haystack and return index or -1 -+// x10: result -+// x11: haystack -+// x12: haystack_len -+// x13: needle -+// x14: needle_len -+void MacroAssembler::string_indexof(Register haystack, Register needle, -+ Register haystack_len, Register needle_len, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ Register tmp5, Register tmp6, -+ Register result, int ae) -+{ -+ assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); -+ -+ Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH; -+ -+ Register ch1 = t0; -+ Register ch2 = t1; -+ Register nlen_tmp = tmp1; // needle len tmp -+ Register hlen_tmp = tmp2; // haystack len tmp -+ Register result_tmp = tmp4; -+ -+ bool isLL = ae == StrIntrinsicNode::LL; -+ -+ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; -+ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; -+ int needle_chr_shift = needle_isL ? 0 : 1; -+ int haystack_chr_shift = haystack_isL ? 0 : 1; -+ int needle_chr_size = needle_isL ? 1 : 2; -+ int haystack_chr_size = haystack_isL ? 1 : 2; -+ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : -+ (load_chr_insn)&MacroAssembler::lhu; -+ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : -+ (load_chr_insn)&MacroAssembler::lhu; -+ -+ BLOCK_COMMENT("string_indexof {"); -+ -+ // Note, inline_string_indexOf() generates checks: -+ // if (pattern.count > src.count) return -1; -+ // if (pattern.count == 0) return 0; -+ -+ // We have two strings, a source string in haystack, haystack_len and a pattern string -+ // in needle, needle_len. Find the first occurence of pattern in source or return -1. -+ -+ // For larger pattern and source we use a simplified Boyer Moore algorithm. -+ // With a small pattern and source we use linear scan. -+ -+ // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm. -+ sub(result_tmp, haystack_len, needle_len); -+ // needle_len < 8, use linear scan -+ sub(t0, needle_len, 8); -+ bltz(t0, LINEARSEARCH); -+ // needle_len >= 256, use linear scan -+ sub(t0, needle_len, 256); -+ bgez(t0, LINEARSTUB); -+ // needle_len >= haystack_len/4, use linear scan -+ srli(t0, haystack_len, 2); -+ bge(needle_len, t0, LINEARSTUB); -+ -+ // Boyer-Moore-Horspool introduction: -+ // The Boyer Moore alogorithm is based on the description here:- -+ // -+ // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm -+ // -+ // This describes and algorithm with 2 shift rules. The 'Bad Character' rule -+ // and the 'Good Suffix' rule. -+ // -+ // These rules are essentially heuristics for how far we can shift the -+ // pattern along the search string. -+ // -+ // The implementation here uses the 'Bad Character' rule only because of the -+ // complexity of initialisation for the 'Good Suffix' rule. -+ // -+ // This is also known as the Boyer-Moore-Horspool algorithm: -+ // -+ // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm -+ // -+ // #define ASIZE 256 -+ // -+ // int bm(unsigned char *pattern, int m, unsigned char *src, int n) { -+ // int i, j; -+ // unsigned c; -+ // unsigned char bc[ASIZE]; -+ // -+ // /* Preprocessing */ -+ // for (i = 0; i < ASIZE; ++i) -+ // bc[i] = m; -+ // for (i = 0; i < m - 1; ) { -+ // c = pattern[i]; -+ // ++i; -+ // // c < 256 for Latin1 string, so, no need for branch -+ // #ifdef PATTERN_STRING_IS_LATIN1 -+ // bc[c] = m - i; -+ // #else -+ // if (c < ASIZE) bc[c] = m - i; -+ // #endif -+ // } -+ // -+ // /* Searching */ -+ // j = 0; -+ // while (j <= n - m) { -+ // c = src[i+j]; -+ // if (pattern[m-1] == c) -+ // int k; -+ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); -+ // if (k < 0) return j; -+ // // c < 256 for Latin1 string, so, no need for branch -+ // #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1 -+ // // LL case: (c< 256) always true. Remove branch -+ // j += bc[pattern[j+m-1]]; -+ // #endif -+ // #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF -+ // // UU case: need if (c if not. -+ // if (c < ASIZE) -+ // j += bc[pattern[j+m-1]]; -+ // else -+ // j += m -+ // #endif -+ // } -+ // return -1; -+ // } -+ -+ // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result -+ Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH, -+ BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP; -+ -+ Register haystack_end = haystack_len; -+ Register skipch = tmp2; -+ -+ // pattern length is >=8, so, we can read at least 1 register for cases when -+ // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for -+ // UL case. We'll re-read last character in inner pre-loop code to have -+ // single outer pre-loop load -+ const int firstStep = isLL ? 7 : 3; -+ -+ const int ASIZE = 256; -+ const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd) -+ -+ sub(sp, sp, ASIZE); -+ -+ // init BC offset table with default value: needle_len -+ slli(t0, needle_len, 8); -+ orr(t0, t0, needle_len); // [63...16][needle_len][needle_len] -+ slli(tmp1, t0, 16); -+ orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len] -+ slli(tmp1, t0, 32); -+ orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len] -+ -+ mv(ch1, sp); // ch1 is t0 -+ mv(tmp6, ASIZE / STORE_BYTES); // loop iterations -+ -+ bind(BM_INIT_LOOP); -+ // for (i = 0; i < ASIZE; ++i) -+ // bc[i] = m; -+ for (int i = 0; i < 4; i++) { -+ sd(tmp5, Address(ch1, i * wordSize)); -+ } -+ add(ch1, ch1, 32); -+ sub(tmp6, tmp6, 4); -+ bgtz(tmp6, BM_INIT_LOOP); -+ -+ sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern -+ Register orig_haystack = tmp5; -+ mv(orig_haystack, haystack); -+ // result_tmp = tmp4 -+ shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift); -+ sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1 -+ mv(tmp3, needle); -+ -+ // for (i = 0; i < m - 1; ) { -+ // c = pattern[i]; -+ // ++i; -+ // // c < 256 for Latin1 string, so, no need for branch -+ // #ifdef PATTERN_STRING_IS_LATIN1 -+ // bc[c] = m - i; -+ // #else -+ // if (c < ASIZE) bc[c] = m - i; -+ // #endif -+ // } -+ bind(BCLOOP); -+ (this->*needle_load_1chr)(ch1, Address(tmp3), noreg); -+ add(tmp3, tmp3, needle_chr_size); -+ if (!needle_isL) { -+ // ae == StrIntrinsicNode::UU -+ mv(tmp6, ASIZE); -+ bgeu(ch1, tmp6, BCSKIP); -+ } -+ add(tmp4, sp, ch1); -+ sb(ch2, Address(tmp4)); // store skip offset to BC offset table -+ -+ bind(BCSKIP); -+ sub(ch2, ch2, 1); // for next pattern element, skip distance -1 -+ bgtz(ch2, BCLOOP); -+ -+ // tmp6: pattern end, address after needle -+ shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift); -+ if (needle_isL == haystack_isL) { -+ // load last 8 bytes (8LL/4UU symbols) -+ ld(tmp6, Address(tmp6, -wordSize)); -+ } else { -+ // UL: from UTF-16(source) search Latin1(pattern) -+ lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols) -+ // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d -+ // We'll have to wait until load completed, but it's still faster than per-character loads+checks -+ srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a -+ slli(ch2, tmp6, XLEN - 24); -+ srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b -+ slli(ch1, tmp6, XLEN - 16); -+ srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c -+ andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d -+ slli(ch2, ch2, 16); -+ orr(ch2, ch2, ch1); // 0x00000b0c -+ slli(result, tmp3, 48); // use result as temp register -+ orr(tmp6, tmp6, result); // 0x0a00000d -+ slli(result, ch2, 16); -+ orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d -+ } -+ -+ // i = m - 1; -+ // skipch = j + i; -+ // if (skipch == pattern[m - 1] -+ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); -+ // else -+ // move j with bad char offset table -+ bind(BMLOOPSTR2); -+ // compare pattern to source string backward -+ shadd(result, nlen_tmp, haystack, result, haystack_chr_shift); -+ (this->*haystack_load_1chr)(skipch, Address(result), noreg); -+ sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8 -+ if (needle_isL == haystack_isL) { -+ // re-init tmp3. It's for free because it's executed in parallel with -+ // load above. Alternative is to initialize it before loop, but it'll -+ // affect performance on in-order systems with 2 or more ld/st pipelines -+ srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1] -+ } -+ if (!isLL) { // UU/UL case -+ slli(ch2, nlen_tmp, 1); // offsets in bytes -+ } -+ bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char -+ add(result, haystack, isLL ? nlen_tmp : ch2); -+ ld(ch2, Address(result)); // load 8 bytes from source string -+ mv(ch1, tmp6); -+ if (isLL) { -+ j(BMLOOPSTR1_AFTER_LOAD); -+ } else { -+ sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8 -+ j(BMLOOPSTR1_CMP); -+ } -+ -+ bind(BMLOOPSTR1); -+ shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift); -+ (this->*needle_load_1chr)(ch1, Address(ch1), noreg); -+ shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift); -+ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); -+ -+ bind(BMLOOPSTR1_AFTER_LOAD); -+ sub(nlen_tmp, nlen_tmp, 1); -+ bltz(nlen_tmp, BMLOOPSTR1_LASTCMP); -+ -+ bind(BMLOOPSTR1_CMP); -+ beq(ch1, ch2, BMLOOPSTR1); -+ -+ bind(BMSKIP); -+ if (!isLL) { -+ // if we've met UTF symbol while searching Latin1 pattern, then we can -+ // skip needle_len symbols -+ if (needle_isL != haystack_isL) { -+ mv(result_tmp, needle_len); -+ } else { -+ mv(result_tmp, 1); -+ } -+ mv(t0, ASIZE); -+ bgeu(skipch, t0, BMADV); -+ } -+ add(result_tmp, sp, skipch); -+ lbu(result_tmp, Address(result_tmp)); // load skip offset -+ -+ bind(BMADV); -+ sub(nlen_tmp, needle_len, 1); -+ // move haystack after bad char skip offset -+ shadd(haystack, result_tmp, haystack, result, haystack_chr_shift); -+ ble(haystack, haystack_end, BMLOOPSTR2); -+ add(sp, sp, ASIZE); -+ j(NOMATCH); -+ -+ bind(BMLOOPSTR1_LASTCMP); -+ bne(ch1, ch2, BMSKIP); -+ -+ bind(BMMATCH); -+ sub(result, haystack, orig_haystack); -+ if (!haystack_isL) { -+ srli(result, result, 1); -+ } -+ add(sp, sp, ASIZE); -+ j(DONE); -+ -+ bind(LINEARSTUB); -+ sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm -+ bltz(t0, LINEARSEARCH); -+ mv(result, zr); -+ RuntimeAddress stub = NULL; -+ if (isLL) { -+ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll()); -+ assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated"); -+ } else if (needle_isL) { -+ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul()); -+ assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated"); -+ } else { -+ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu()); -+ assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated"); -+ } -+ trampoline_call(stub); -+ j(DONE); -+ -+ bind(NOMATCH); -+ mv(result, -1); -+ j(DONE); -+ -+ bind(LINEARSEARCH); -+ string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae); -+ -+ bind(DONE); -+ BLOCK_COMMENT("} string_indexof"); -+} -+ -+// string_indexof -+// result: x10 -+// src: x11 -+// src_count: x12 -+// pattern: x13 -+// pattern_count: x14 or 1/2/3/4 -+void MacroAssembler::string_indexof_linearscan(Register haystack, Register needle, -+ Register haystack_len, Register needle_len, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ int needle_con_cnt, Register result, int ae) -+{ -+ // Note: -+ // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant -+ // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1 -+ assert(needle_con_cnt <= 4, "Invalid needle constant count"); -+ assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); -+ -+ Register ch1 = t0; -+ Register ch2 = t1; -+ Register hlen_neg = haystack_len, nlen_neg = needle_len; -+ Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4; -+ -+ bool isLL = ae == StrIntrinsicNode::LL; -+ -+ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; -+ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; -+ int needle_chr_shift = needle_isL ? 0 : 1; -+ int haystack_chr_shift = haystack_isL ? 0 : 1; -+ int needle_chr_size = needle_isL ? 1 : 2; -+ int haystack_chr_size = haystack_isL ? 1 : 2; -+ -+ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : -+ (load_chr_insn)&MacroAssembler::lhu; -+ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : -+ (load_chr_insn)&MacroAssembler::lhu; -+ load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu; -+ load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld; -+ -+ Label DO1, DO2, DO3, MATCH, NOMATCH, DONE; -+ -+ Register first = tmp3; -+ -+ if (needle_con_cnt == -1) { -+ Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT; -+ -+ sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2); -+ bltz(t0, DOSHORT); -+ -+ (this->*needle_load_1chr)(first, Address(needle), noreg); -+ slli(t0, needle_len, needle_chr_shift); -+ add(needle, needle, t0); -+ neg(nlen_neg, t0); -+ slli(t0, result_tmp, haystack_chr_shift); -+ add(haystack, haystack, t0); -+ neg(hlen_neg, t0); -+ -+ bind(FIRST_LOOP); -+ add(t0, haystack, hlen_neg); -+ (this->*haystack_load_1chr)(ch2, Address(t0), noreg); -+ beq(first, ch2, STR1_LOOP); -+ -+ bind(STR2_NEXT); -+ add(hlen_neg, hlen_neg, haystack_chr_size); -+ blez(hlen_neg, FIRST_LOOP); -+ j(NOMATCH); -+ -+ bind(STR1_LOOP); -+ add(nlen_tmp, nlen_neg, needle_chr_size); -+ add(hlen_tmp, hlen_neg, haystack_chr_size); -+ bgez(nlen_tmp, MATCH); -+ -+ bind(STR1_NEXT); -+ add(ch1, needle, nlen_tmp); -+ (this->*needle_load_1chr)(ch1, Address(ch1), noreg); -+ add(ch2, haystack, hlen_tmp); -+ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); -+ bne(ch1, ch2, STR2_NEXT); -+ add(nlen_tmp, nlen_tmp, needle_chr_size); -+ add(hlen_tmp, hlen_tmp, haystack_chr_size); -+ bltz(nlen_tmp, STR1_NEXT); -+ j(MATCH); -+ -+ bind(DOSHORT); -+ if (needle_isL == haystack_isL) { -+ sub(t0, needle_len, 2); -+ bltz(t0, DO1); -+ bgtz(t0, DO3); -+ } -+ } -+ -+ if (needle_con_cnt == 4) { -+ Label CH1_LOOP; -+ (this->*load_4chr)(ch1, Address(needle), noreg); -+ sub(result_tmp, haystack_len, 4); -+ slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp -+ add(haystack, haystack, tmp3); -+ neg(hlen_neg, tmp3); -+ -+ bind(CH1_LOOP); -+ add(ch2, haystack, hlen_neg); -+ (this->*load_4chr)(ch2, Address(ch2), noreg); -+ beq(ch1, ch2, MATCH); -+ add(hlen_neg, hlen_neg, haystack_chr_size); -+ blez(hlen_neg, CH1_LOOP); -+ j(NOMATCH); -+ } -+ -+ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) { -+ Label CH1_LOOP; -+ BLOCK_COMMENT("string_indexof DO2 {"); -+ bind(DO2); -+ (this->*load_2chr)(ch1, Address(needle), noreg); -+ if (needle_con_cnt == 2) { -+ sub(result_tmp, haystack_len, 2); -+ } -+ slli(tmp3, result_tmp, haystack_chr_shift); -+ add(haystack, haystack, tmp3); -+ neg(hlen_neg, tmp3); -+ -+ bind(CH1_LOOP); -+ add(tmp3, haystack, hlen_neg); -+ (this->*load_2chr)(ch2, Address(tmp3), noreg); -+ beq(ch1, ch2, MATCH); -+ add(hlen_neg, hlen_neg, haystack_chr_size); -+ blez(hlen_neg, CH1_LOOP); -+ j(NOMATCH); -+ BLOCK_COMMENT("} string_indexof DO2"); -+ } -+ -+ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) { -+ Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; -+ BLOCK_COMMENT("string_indexof DO3 {"); -+ -+ bind(DO3); -+ (this->*load_2chr)(first, Address(needle), noreg); -+ (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg); -+ if (needle_con_cnt == 3) { -+ sub(result_tmp, haystack_len, 3); -+ } -+ slli(hlen_tmp, result_tmp, haystack_chr_shift); -+ add(haystack, haystack, hlen_tmp); -+ neg(hlen_neg, hlen_tmp); -+ -+ bind(FIRST_LOOP); -+ add(ch2, haystack, hlen_neg); -+ (this->*load_2chr)(ch2, Address(ch2), noreg); -+ beq(first, ch2, STR1_LOOP); -+ -+ bind(STR2_NEXT); -+ add(hlen_neg, hlen_neg, haystack_chr_size); -+ blez(hlen_neg, FIRST_LOOP); -+ j(NOMATCH); -+ -+ bind(STR1_LOOP); -+ add(hlen_tmp, hlen_neg, 2 * haystack_chr_size); -+ add(ch2, haystack, hlen_tmp); -+ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); -+ bne(ch1, ch2, STR2_NEXT); -+ j(MATCH); -+ BLOCK_COMMENT("} string_indexof DO3"); -+ } -+ -+ if (needle_con_cnt == -1 || needle_con_cnt == 1) { -+ Label DO1_LOOP; -+ -+ BLOCK_COMMENT("string_indexof DO1 {"); -+ bind(DO1); -+ (this->*needle_load_1chr)(ch1, Address(needle), noreg); -+ sub(result_tmp, haystack_len, 1); -+ mv(tmp3, result_tmp); -+ if (haystack_chr_shift) { -+ slli(tmp3, result_tmp, haystack_chr_shift); -+ } -+ add(haystack, haystack, tmp3); -+ neg(hlen_neg, tmp3); -+ -+ bind(DO1_LOOP); -+ add(tmp3, haystack, hlen_neg); -+ (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg); -+ beq(ch1, ch2, MATCH); -+ add(hlen_neg, hlen_neg, haystack_chr_size); -+ blez(hlen_neg, DO1_LOOP); -+ BLOCK_COMMENT("} string_indexof DO1"); -+ } -+ -+ bind(NOMATCH); -+ mv(result, -1); -+ j(DONE); -+ -+ bind(MATCH); -+ srai(t0, hlen_neg, haystack_chr_shift); -+ add(result, result_tmp, t0); -+ -+ bind(DONE); -+} -+ -+// Compare strings. -+void MacroAssembler::string_compare(Register str1, Register str2, -+ Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, -+ Register tmp3, int ae) -+{ -+ Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB, -+ DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT, -+ SHORT_LOOP_START, TAIL_CHECK, L; -+ -+ const int STUB_THRESHOLD = 64 + 8; -+ bool isLL = ae == StrIntrinsicNode::LL; -+ bool isLU = ae == StrIntrinsicNode::LU; -+ bool isUL = ae == StrIntrinsicNode::UL; -+ -+ bool str1_isL = isLL || isLU; -+ bool str2_isL = isLL || isUL; -+ -+ // for L strings, 1 byte for 1 character -+ // for U strings, 2 bytes for 1 character -+ int str1_chr_size = str1_isL ? 1 : 2; -+ int str2_chr_size = str2_isL ? 1 : 2; -+ int minCharsInWord = isLL ? wordSize : wordSize / 2; -+ -+ load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; -+ load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; -+ -+ BLOCK_COMMENT("string_compare {"); -+ -+ // Bizzarely, the counts are passed in bytes, regardless of whether they -+ // are L or U strings, however the result is always in characters. -+ if (!str1_isL) { -+ sraiw(cnt1, cnt1, 1); -+ } -+ if (!str2_isL) { -+ sraiw(cnt2, cnt2, 1); -+ } -+ -+ // Compute the minimum of the string lengths and save the difference in result. -+ sub(result, cnt1, cnt2); -+ bgt(cnt1, cnt2, L); -+ mv(cnt2, cnt1); -+ bind(L); -+ -+ // A very short string -+ li(t0, minCharsInWord); -+ ble(cnt2, t0, SHORT_STRING); -+ -+ // Compare longwords -+ // load first parts of strings and finish initialization while loading -+ { -+ if (str1_isL == str2_isL) { // LL or UU -+ // load 8 bytes once to compare -+ ld(tmp1, Address(str1)); -+ beq(str1, str2, DONE); -+ ld(tmp2, Address(str2)); -+ li(t0, STUB_THRESHOLD); -+ bge(cnt2, t0, STUB); -+ sub(cnt2, cnt2, minCharsInWord); -+ beqz(cnt2, TAIL_CHECK); -+ // convert cnt2 from characters to bytes -+ if (!str1_isL) { -+ slli(cnt2, cnt2, 1); -+ } -+ add(str2, str2, cnt2); -+ add(str1, str1, cnt2); -+ sub(cnt2, zr, cnt2); -+ } else if (isLU) { // LU case -+ lwu(tmp1, Address(str1)); -+ ld(tmp2, Address(str2)); -+ li(t0, STUB_THRESHOLD); -+ bge(cnt2, t0, STUB); -+ addi(cnt2, cnt2, -4); -+ add(str1, str1, cnt2); -+ sub(cnt1, zr, cnt2); -+ slli(cnt2, cnt2, 1); -+ add(str2, str2, cnt2); -+ inflate_lo32(tmp3, tmp1); -+ mv(tmp1, tmp3); -+ sub(cnt2, zr, cnt2); -+ addi(cnt1, cnt1, 4); -+ } else { // UL case -+ ld(tmp1, Address(str1)); -+ lwu(tmp2, Address(str2)); -+ li(t0, STUB_THRESHOLD); -+ bge(cnt2, t0, STUB); -+ addi(cnt2, cnt2, -4); -+ slli(t0, cnt2, 1); -+ sub(cnt1, zr, t0); -+ add(str1, str1, t0); -+ add(str2, str2, cnt2); -+ inflate_lo32(tmp3, tmp2); -+ mv(tmp2, tmp3); -+ sub(cnt2, zr, cnt2); -+ addi(cnt1, cnt1, 8); -+ } -+ addi(cnt2, cnt2, isUL ? 4 : 8); -+ bgez(cnt2, TAIL); -+ xorr(tmp3, tmp1, tmp2); -+ bnez(tmp3, DIFFERENCE); -+ -+ // main loop -+ bind(NEXT_WORD); -+ if (str1_isL == str2_isL) { // LL or UU -+ add(t0, str1, cnt2); -+ ld(tmp1, Address(t0)); -+ add(t0, str2, cnt2); -+ ld(tmp2, Address(t0)); -+ addi(cnt2, cnt2, 8); -+ } else if (isLU) { // LU case -+ add(t0, str1, cnt1); -+ lwu(tmp1, Address(t0)); -+ add(t0, str2, cnt2); -+ ld(tmp2, Address(t0)); -+ addi(cnt1, cnt1, 4); -+ inflate_lo32(tmp3, tmp1); -+ mv(tmp1, tmp3); -+ addi(cnt2, cnt2, 8); -+ } else { // UL case -+ add(t0, str2, cnt2); -+ lwu(tmp2, Address(t0)); -+ add(t0, str1, cnt1); -+ ld(tmp1, Address(t0)); -+ inflate_lo32(tmp3, tmp2); -+ mv(tmp2, tmp3); -+ addi(cnt1, cnt1, 8); -+ addi(cnt2, cnt2, 4); -+ } -+ bgez(cnt2, TAIL); -+ -+ xorr(tmp3, tmp1, tmp2); -+ beqz(tmp3, NEXT_WORD); -+ j(DIFFERENCE); -+ bind(TAIL); -+ xorr(tmp3, tmp1, tmp2); -+ bnez(tmp3, DIFFERENCE); -+ // Last longword. In the case where length == 4 we compare the -+ // same longword twice, but that's still faster than another -+ // conditional branch. -+ if (str1_isL == str2_isL) { // LL or UU -+ ld(tmp1, Address(str1)); -+ ld(tmp2, Address(str2)); -+ } else if (isLU) { // LU case -+ lwu(tmp1, Address(str1)); -+ ld(tmp2, Address(str2)); -+ inflate_lo32(tmp3, tmp1); -+ mv(tmp1, tmp3); -+ } else { // UL case -+ lwu(tmp2, Address(str2)); -+ ld(tmp1, Address(str1)); -+ inflate_lo32(tmp3, tmp2); -+ mv(tmp2, tmp3); -+ } -+ bind(TAIL_CHECK); -+ xorr(tmp3, tmp1, tmp2); -+ beqz(tmp3, DONE); -+ -+ // Find the first different characters in the longwords and -+ // compute their difference. -+ bind(DIFFERENCE); -+ ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb -+ srl(tmp1, tmp1, result); -+ srl(tmp2, tmp2, result); -+ if (isLL) { -+ andi(tmp1, tmp1, 0xFF); -+ andi(tmp2, tmp2, 0xFF); -+ } else { -+ andi(tmp1, tmp1, 0xFFFF); -+ andi(tmp2, tmp2, 0xFFFF); -+ } -+ sub(result, tmp1, tmp2); -+ j(DONE); -+ } -+ -+ bind(STUB); -+ RuntimeAddress stub = NULL; -+ switch (ae) { -+ case StrIntrinsicNode::LL: -+ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL()); -+ break; -+ case StrIntrinsicNode::UU: -+ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU()); -+ break; -+ case StrIntrinsicNode::LU: -+ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU()); -+ break; -+ case StrIntrinsicNode::UL: -+ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL()); -+ break; -+ default: -+ ShouldNotReachHere(); -+ } -+ assert(stub.target() != NULL, "compare_long_string stub has not been generated"); -+ trampoline_call(stub); -+ j(DONE); -+ -+ bind(SHORT_STRING); -+ // Is the minimum length zero? -+ beqz(cnt2, DONE); -+ // arrange code to do most branches while loading and loading next characters -+ // while comparing previous -+ (this->*str1_load_chr)(tmp1, Address(str1), t0); -+ addi(str1, str1, str1_chr_size); -+ addi(cnt2, cnt2, -1); -+ beqz(cnt2, SHORT_LAST_INIT); -+ (this->*str2_load_chr)(cnt1, Address(str2), t0); -+ addi(str2, str2, str2_chr_size); -+ j(SHORT_LOOP_START); -+ bind(SHORT_LOOP); -+ addi(cnt2, cnt2, -1); -+ beqz(cnt2, SHORT_LAST); -+ bind(SHORT_LOOP_START); -+ (this->*str1_load_chr)(tmp2, Address(str1), t0); -+ addi(str1, str1, str1_chr_size); -+ (this->*str2_load_chr)(t0, Address(str2), t0); -+ addi(str2, str2, str2_chr_size); -+ bne(tmp1, cnt1, SHORT_LOOP_TAIL); -+ addi(cnt2, cnt2, -1); -+ beqz(cnt2, SHORT_LAST2); -+ (this->*str1_load_chr)(tmp1, Address(str1), t0); -+ addi(str1, str1, str1_chr_size); -+ (this->*str2_load_chr)(cnt1, Address(str2), t0); -+ addi(str2, str2, str2_chr_size); -+ beq(tmp2, t0, SHORT_LOOP); -+ sub(result, tmp2, t0); -+ j(DONE); -+ bind(SHORT_LOOP_TAIL); -+ sub(result, tmp1, cnt1); -+ j(DONE); -+ bind(SHORT_LAST2); -+ beq(tmp2, t0, DONE); -+ sub(result, tmp2, t0); -+ -+ j(DONE); -+ bind(SHORT_LAST_INIT); -+ (this->*str2_load_chr)(cnt1, Address(str2), t0); -+ addi(str2, str2, str2_chr_size); -+ bind(SHORT_LAST); -+ beq(tmp1, cnt1, DONE); -+ sub(result, tmp1, cnt1); -+ -+ bind(DONE); -+ -+ BLOCK_COMMENT("} string_compare"); -+} -+ -+void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3, -+ Register tmp4, Register tmp5, Register tmp6, Register result, -+ Register cnt1, int elem_size) { -+ Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR; -+ Register tmp1 = t0; -+ Register tmp2 = t1; -+ Register cnt2 = tmp2; // cnt2 only used in array length compare -+ Register elem_per_word = tmp6; -+ int log_elem_size = exact_log2(elem_size); -+ int length_offset = arrayOopDesc::length_offset_in_bytes(); -+ int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); -+ -+ assert(elem_size == 1 || elem_size == 2, "must be char or byte"); -+ assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6); -+ li(elem_per_word, wordSize / elem_size); -+ -+ BLOCK_COMMENT("arrays_equals {"); -+ -+ // if (a1 == a2), return true -+ beq(a1, a2, SAME); -+ -+ mv(result, false); -+ beqz(a1, DONE); -+ beqz(a2, DONE); -+ lwu(cnt1, Address(a1, length_offset)); -+ lwu(cnt2, Address(a2, length_offset)); -+ bne(cnt2, cnt1, DONE); -+ beqz(cnt1, SAME); -+ -+ slli(tmp5, cnt1, 3 + log_elem_size); -+ sub(tmp5, zr, tmp5); -+ add(a1, a1, base_offset); -+ add(a2, a2, base_offset); -+ ld(tmp3, Address(a1, 0)); -+ ld(tmp4, Address(a2, 0)); -+ ble(cnt1, elem_per_word, SHORT); // short or same -+ -+ // Main 16 byte comparison loop with 2 exits -+ bind(NEXT_DWORD); { -+ ld(tmp1, Address(a1, wordSize)); -+ ld(tmp2, Address(a2, wordSize)); -+ sub(cnt1, cnt1, 2 * wordSize / elem_size); -+ blez(cnt1, TAIL); -+ bne(tmp3, tmp4, DONE); -+ ld(tmp3, Address(a1, 2 * wordSize)); -+ ld(tmp4, Address(a2, 2 * wordSize)); -+ add(a1, a1, 2 * wordSize); -+ add(a2, a2, 2 * wordSize); -+ ble(cnt1, elem_per_word, TAIL2); -+ } beq(tmp1, tmp2, NEXT_DWORD); -+ j(DONE); -+ -+ bind(TAIL); -+ xorr(tmp4, tmp3, tmp4); -+ xorr(tmp2, tmp1, tmp2); -+ sll(tmp2, tmp2, tmp5); -+ orr(tmp5, tmp4, tmp2); -+ j(IS_TMP5_ZR); -+ -+ bind(TAIL2); -+ bne(tmp1, tmp2, DONE); -+ -+ bind(SHORT); -+ xorr(tmp4, tmp3, tmp4); -+ sll(tmp5, tmp4, tmp5); -+ -+ bind(IS_TMP5_ZR); -+ bnez(tmp5, DONE); -+ -+ bind(SAME); -+ mv(result, true); -+ // That's it. -+ bind(DONE); -+ -+ BLOCK_COMMENT("} array_equals"); -+} -+ -+// Compare Strings -+ -+// For Strings we're passed the address of the first characters in a1 -+// and a2 and the length in cnt1. -+// elem_size is the element size in bytes: either 1 or 2. -+// There are two implementations. For arrays >= 8 bytes, all -+// comparisons (including the final one, which may overlap) are -+// performed 8 bytes at a time. For strings < 8 bytes, we compare a -+// halfword, then a short, and then a byte. -+ -+void MacroAssembler::string_equals(Register a1, Register a2, -+ Register result, Register cnt1, int elem_size) -+{ -+ Label SAME, DONE, SHORT, NEXT_WORD; -+ Register tmp1 = t0; -+ Register tmp2 = t1; -+ -+ assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte"); -+ assert_different_registers(a1, a2, result, cnt1, t0, t1); -+ -+ BLOCK_COMMENT("string_equals {"); -+ -+ mv(result, false); -+ -+ // Check for short strings, i.e. smaller than wordSize. -+ sub(cnt1, cnt1, wordSize); -+ bltz(cnt1, SHORT); -+ -+ // Main 8 byte comparison loop. -+ bind(NEXT_WORD); { -+ ld(tmp1, Address(a1, 0)); -+ add(a1, a1, wordSize); -+ ld(tmp2, Address(a2, 0)); -+ add(a2, a2, wordSize); -+ sub(cnt1, cnt1, wordSize); -+ bne(tmp1, tmp2, DONE); -+ } bgtz(cnt1, NEXT_WORD); -+ -+ // Last longword. In the case where length == 4 we compare the -+ // same longword twice, but that's still faster than another -+ // conditional branch. -+ // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when -+ // length == 4. -+ add(tmp1, a1, cnt1); -+ ld(tmp1, Address(tmp1, 0)); -+ add(tmp2, a2, cnt1); -+ ld(tmp2, Address(tmp2, 0)); -+ bne(tmp1, tmp2, DONE); -+ j(SAME); -+ -+ bind(SHORT); -+ Label TAIL03, TAIL01; -+ -+ // 0-7 bytes left. -+ andi(t0, cnt1, 4); -+ beqz(t0, TAIL03); -+ { -+ lwu(tmp1, Address(a1, 0)); -+ add(a1, a1, 4); -+ lwu(tmp2, Address(a2, 0)); -+ add(a2, a2, 4); -+ bne(tmp1, tmp2, DONE); -+ } -+ -+ bind(TAIL03); -+ // 0-3 bytes left. -+ andi(t0, cnt1, 2); -+ beqz(t0, TAIL01); -+ { -+ lhu(tmp1, Address(a1, 0)); -+ add(a1, a1, 2); -+ lhu(tmp2, Address(a2, 0)); -+ add(a2, a2, 2); -+ bne(tmp1, tmp2, DONE); -+ } -+ -+ bind(TAIL01); -+ if (elem_size == 1) { // Only needed when comparing 1-byte elements -+ // 0-1 bytes left. -+ andi(t0, cnt1, 1); -+ beqz(t0, SAME); -+ { -+ lbu(tmp1, a1, 0); -+ lbu(tmp2, a2, 0); -+ bne(tmp1, tmp2, DONE); -+ } -+ } -+ -+ // Arrays are equal. -+ bind(SAME); -+ mv(result, true); -+ -+ // That's it. -+ bind(DONE); -+ BLOCK_COMMENT("} string_equals"); -+} -+ -+typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far); -+typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label, -+ bool is_far, bool is_unordered); -+ -+static conditional_branch_insn conditional_branches[] = -+{ -+ /* SHORT branches */ -+ (conditional_branch_insn)&Assembler::beq, -+ (conditional_branch_insn)&Assembler::bgt, -+ NULL, // BoolTest::overflow -+ (conditional_branch_insn)&Assembler::blt, -+ (conditional_branch_insn)&Assembler::bne, -+ (conditional_branch_insn)&Assembler::ble, -+ NULL, // BoolTest::no_overflow -+ (conditional_branch_insn)&Assembler::bge, -+ -+ /* UNSIGNED branches */ -+ (conditional_branch_insn)&Assembler::beq, -+ (conditional_branch_insn)&Assembler::bgtu, -+ NULL, -+ (conditional_branch_insn)&Assembler::bltu, -+ (conditional_branch_insn)&Assembler::bne, -+ (conditional_branch_insn)&Assembler::bleu, -+ NULL, -+ (conditional_branch_insn)&Assembler::bgeu -+}; -+ -+static float_conditional_branch_insn float_conditional_branches[] = -+{ -+ /* FLOAT SHORT branches */ -+ (float_conditional_branch_insn)&MacroAssembler::float_beq, -+ (float_conditional_branch_insn)&MacroAssembler::float_bgt, -+ NULL, // BoolTest::overflow -+ (float_conditional_branch_insn)&MacroAssembler::float_blt, -+ (float_conditional_branch_insn)&MacroAssembler::float_bne, -+ (float_conditional_branch_insn)&MacroAssembler::float_ble, -+ NULL, // BoolTest::no_overflow -+ (float_conditional_branch_insn)&MacroAssembler::float_bge, -+ -+ /* DOUBLE SHORT branches */ -+ (float_conditional_branch_insn)&MacroAssembler::double_beq, -+ (float_conditional_branch_insn)&MacroAssembler::double_bgt, -+ NULL, -+ (float_conditional_branch_insn)&MacroAssembler::double_blt, -+ (float_conditional_branch_insn)&MacroAssembler::double_bne, -+ (float_conditional_branch_insn)&MacroAssembler::double_ble, -+ NULL, -+ (float_conditional_branch_insn)&MacroAssembler::double_bge -+}; -+ -+void MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) { -+ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])), -+ "invalid conditional branch index"); -+ (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far); -+} -+ -+// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use -+// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode(). -+void MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) { -+ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])), -+ "invalid float conditional branch index"); -+ int booltest_flag = cmpFlag & ~(MacroAssembler::double_branch_mask); -+ (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far, -+ (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true); -+} -+ -+void MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { -+ switch (cmpFlag) { -+ case BoolTest::eq: -+ case BoolTest::le: -+ beqz(op1, L, is_far); -+ break; -+ case BoolTest::ne: -+ case BoolTest::gt: -+ bnez(op1, L, is_far); -+ break; -+ default: -+ ShouldNotReachHere(); -+ } -+} -+ -+void MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { -+ switch (cmpFlag) { -+ case BoolTest::eq: -+ beqz(op1, L, is_far); -+ break; -+ case BoolTest::ne: -+ bnez(op1, L, is_far); -+ break; -+ default: -+ ShouldNotReachHere(); -+ } -+} -+ -+void MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) { -+ Label L; -+ cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L); -+ mv(dst, src); -+ bind(L); -+} -+ -+// Set dst to NaN if any NaN input. -+void MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2, -+ bool is_double, bool is_min) { -+ assert_different_registers(dst, src1, src2); -+ -+ Label Done; -+ fsflags(zr); -+ if (is_double) { -+ is_min ? fmin_d(dst, src1, src2) -+ : fmax_d(dst, src1, src2); -+ // Checking NaNs -+ flt_d(zr, src1, src2); -+ } else { -+ is_min ? fmin_s(dst, src1, src2) -+ : fmax_s(dst, src1, src2); -+ // Checking NaNs -+ flt_s(zr, src1, src2); -+ } -+ -+ frflags(t0); -+ beqz(t0, Done); -+ -+ // In case of NaNs -+ is_double ? fadd_d(dst, src1, src2) -+ : fadd_s(dst, src1, src2); -+ -+ bind(Done); -+} -+ -+#endif // COMPILER2 -+ -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -index c6b71bdbc3c..2ef28771e2e 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -@@ -851,6 +851,109 @@ class MacroAssembler: public Assembler { - void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire); - void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release); - -+public: -+ void string_compare(Register str1, Register str2, -+ Register cnt1, Register cnt2, Register result, -+ Register tmp1, Register tmp2, Register tmp3, -+ int ae); -+ -+ void string_indexof_char_short(Register str1, Register cnt1, -+ Register ch, Register result, -+ bool isL); -+ -+ void string_indexof_char(Register str1, Register cnt1, -+ Register ch, Register result, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ bool isL); -+ -+ void string_indexof(Register str1, Register str2, -+ Register cnt1, Register cnt2, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ Register tmp5, Register tmp6, -+ Register result, int ae); -+ -+ void string_indexof_linearscan(Register haystack, Register needle, -+ Register haystack_len, Register needle_len, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ int needle_con_cnt, Register result, int ae); -+ -+ void arrays_equals(Register r1, Register r2, -+ Register tmp3, Register tmp4, -+ Register tmp5, Register tmp6, -+ Register result, Register cnt1, -+ int elem_size); -+ -+ void string_equals(Register r1, Register r2, -+ Register result, Register cnt1, -+ int elem_size); -+ -+ // refer to conditional_branches and float_conditional_branches -+ static const int bool_test_bits = 3; -+ static const int neg_cond_bits = 2; -+ static const int unsigned_branch_mask = 1 << bool_test_bits; -+ static const int double_branch_mask = 1 << bool_test_bits; -+ -+ // cmp -+ void cmp_branch(int cmpFlag, -+ Register op1, Register op2, -+ Label& label, bool is_far = false); -+ -+ void float_cmp_branch(int cmpFlag, -+ FloatRegister op1, FloatRegister op2, -+ Label& label, bool is_far = false); -+ -+ void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op, -+ Label& L, bool is_far = false); -+ -+ void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op, -+ Label& L, bool is_far = false); -+ -+ void enc_cmove(int cmpFlag, -+ Register op1, Register op2, -+ Register dst, Register src); -+ -+ void spill(Register r, bool is64, int offset) { -+ is64 ? sd(r, Address(sp, offset)) -+ : sw(r, Address(sp, offset)); -+ } -+ -+ void spill(FloatRegister f, bool is64, int offset) { -+ is64 ? fsd(f, Address(sp, offset)) -+ : fsw(f, Address(sp, offset)); -+ } -+ -+ void spill(VectorRegister v, int offset) { -+ add(t0, sp, offset); -+ vs1r_v(v, t0); -+ } -+ -+ void unspill(Register r, bool is64, int offset) { -+ is64 ? ld(r, Address(sp, offset)) -+ : lw(r, Address(sp, offset)); -+ } -+ -+ void unspillu(Register r, bool is64, int offset) { -+ is64 ? ld(r, Address(sp, offset)) -+ : lwu(r, Address(sp, offset)); -+ } -+ -+ void unspill(FloatRegister f, bool is64, int offset) { -+ is64 ? fld(f, Address(sp, offset)) -+ : flw(f, Address(sp, offset)); -+ } -+ -+ void unspill(VectorRegister v, int offset) { -+ add(t0, sp, offset); -+ vl1r_v(v, t0); -+ } -+ -+ void minmax_FD(FloatRegister dst, -+ FloatRegister src1, FloatRegister src2, -+ bool is_double, bool is_min); -+ - }; - - #ifdef ASSERT -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index 13546ab328b..2e7eed8fb52 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -997,7 +997,7 @@ void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const { - #endif - - void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - Assembler::CompressibleRegion cr(&_masm); - __ ebreak(); - } -@@ -1015,7 +1015,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const { - #endif - - void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - Assembler::CompressibleRegion cr(&_masm); // nops shall be 2-byte under RVC for alignment purposes. - for (int i = 0; i < _count; i++) { - __ nop(); -@@ -1074,7 +1074,7 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const { - void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { - assert_cond(ra_ != NULL); - Compile* C = ra_->C; -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - - // n.b. frame size includes space for return pc and fp - const int framesize = C->output()->frame_size_in_bytes(); -@@ -1150,7 +1150,7 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const { - void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { - assert_cond(ra_ != NULL); - Compile* C = ra_->C; -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - assert_cond(C != NULL); - int framesize = C->output()->frame_size_in_bytes(); - -@@ -1251,7 +1251,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo - int dst_offset = ra_->reg2offset(dst_lo); - - if (cbuf != NULL) { -- C2_MacroAssembler _masm(cbuf); -+ MacroAssembler _masm(cbuf); - Assembler::CompressibleRegion cr(&_masm); - switch (src_lo_rc) { - case rc_int: -@@ -1371,7 +1371,7 @@ void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const { - #endif - - void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - - assert_cond(ra_ != NULL); - int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); -@@ -1422,7 +1422,7 @@ void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const - void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const - { - // This is the unverified entry point. -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - - Label skip; - __ cmp_klass(j_rarg0, t1, t0, skip); -@@ -1449,7 +1449,7 @@ int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) - // j #exception_blob_entry_point - // Note that the code buffer's insts_mark is always relative to insts. - // That's why we must use the macroassembler to generate a handler. -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - address base = __ start_a_stub(size_exception_handler()); - if (base == NULL) { - ciEnv::current()->record_failure("CodeCache is full"); -@@ -1467,7 +1467,7 @@ int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) - { - // Note that the code buffer's insts_mark is always relative to insts. - // That's why we must use the macroassembler to generate a handler. -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - address base = __ start_a_stub(size_deopt_handler()); - if (base == NULL) { - ciEnv::current()->record_failure("CodeCache is full"); -@@ -1848,7 +1848,7 @@ encode %{ - // BEGIN Non-volatile memory access - - enc_class riscv_enc_li_imm(iRegIorL dst, immIorL src) %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - Assembler::CompressibleRegion cr(&_masm); - int64_t con = (int64_t)$src$$constant; - Register dst_reg = as_Register($dst$$reg); -@@ -1856,7 +1856,7 @@ encode %{ - %} - - enc_class riscv_enc_mov_p(iRegP dst, immP src) %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - Register dst_reg = as_Register($dst$$reg); - address con = (address)$src$$constant; - if (con == NULL || con == (address)1) { -@@ -1875,7 +1875,7 @@ encode %{ - %} - - enc_class riscv_enc_mov_p1(iRegP dst) %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - Assembler::CompressibleRegion cr(&_masm); - Register dst_reg = as_Register($dst$$reg); - __ li(dst_reg, 1); -@@ -1893,12 +1893,12 @@ encode %{ - %} - - enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - __ load_byte_map_base($dst$$Register); - %} - - enc_class riscv_enc_mov_n(iRegN dst, immN src) %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - Register dst_reg = as_Register($dst$$reg); - address con = (address)$src$$constant; - if (con == NULL) { -@@ -1911,13 +1911,13 @@ encode %{ - %} - - enc_class riscv_enc_mov_zero(iRegNorP dst) %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - Register dst_reg = as_Register($dst$$reg); - __ mv(dst_reg, zr); - %} - - enc_class riscv_enc_mov_nk(iRegN dst, immNKlass src) %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - Register dst_reg = as_Register($dst$$reg); - address con = (address)$src$$constant; - if (con == NULL) { -@@ -1930,42 +1930,42 @@ encode %{ - %} - - enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, - /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, - /*result as bool*/ true); - %} - - enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, - /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, - /*result as bool*/ true); - %} - - enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, - /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, - /*result as bool*/ true); - %} - - enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, - /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, - /*result as bool*/ true); - %} - - enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, - /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, - /*result as bool*/ true); - %} - - enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, - /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, - /*result as bool*/ true); -@@ -1974,13 +1974,13 @@ encode %{ - // compare and branch instruction encodings - - enc_class riscv_enc_j(label lbl) %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - Label* L = $lbl$$label; - __ j(*L); - %} - - enc_class riscv_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - Label* L = $lbl$$label; - switch ($cmp$$cmpcode) { - case(BoolTest::ge): -@@ -2004,7 +2004,7 @@ encode %{ - - Label miss; - Label done; -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg, - NULL, &miss); - if ($primary) { -@@ -2023,7 +2023,7 @@ encode %{ - %} - - enc_class riscv_enc_java_static_call(method meth) %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - - address addr = (address)$meth$$method; - address call = NULL; -@@ -2055,7 +2055,7 @@ encode %{ - %} - - enc_class riscv_enc_java_dynamic_call(method meth) %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - int method_index = resolved_method_index(cbuf); - address call = __ ic_call((address)$meth$$method, method_index); - if (call == NULL) { -@@ -2065,7 +2065,7 @@ encode %{ - %} - - enc_class riscv_enc_call_epilog() %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - if (VerifyStackAtCalls) { - // Check that stack depth is unchanged: find majik cookie on stack - __ call_Unimplemented(); -@@ -2073,7 +2073,7 @@ encode %{ - %} - - enc_class riscv_enc_java_to_runtime(method meth) %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - - // some calls to generated routines (arraycopy code) are scheduled - // by C2 as runtime calls. if so we can call them using a jr (they -@@ -2102,7 +2102,7 @@ encode %{ - - // using the cr register as the bool result: 0 for success; others failed. - enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - Register flag = t1; - Register oop = as_Register($object$$reg); - Register box = as_Register($box$$reg); -@@ -2189,7 +2189,7 @@ encode %{ - - // using cr flag to indicate the fast_unlock result: 0 for success; others failed. - enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - Register flag = t1; - Register oop = as_Register($object$$reg); - Register box = as_Register($box$$reg); -@@ -2262,7 +2262,7 @@ encode %{ - // arithmetic encodings - - enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - Register dst_reg = as_Register($dst$$reg); - Register src1_reg = as_Register($src1$$reg); - Register src2_reg = as_Register($src2$$reg); -@@ -2270,7 +2270,7 @@ encode %{ - %} - - enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - Register dst_reg = as_Register($dst$$reg); - Register src1_reg = as_Register($src1$$reg); - Register src2_reg = as_Register($src2$$reg); -@@ -2278,7 +2278,7 @@ encode %{ - %} - - enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - Register dst_reg = as_Register($dst$$reg); - Register src1_reg = as_Register($src1$$reg); - Register src2_reg = as_Register($src2$$reg); -@@ -2286,7 +2286,7 @@ encode %{ - %} - - enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - Register dst_reg = as_Register($dst$$reg); - Register src1_reg = as_Register($src1$$reg); - Register src2_reg = as_Register($src2$$reg); -@@ -2294,14 +2294,14 @@ encode %{ - %} - - enc_class riscv_enc_tail_call(iRegP jump_target) %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - Assembler::CompressibleRegion cr(&_masm); - Register target_reg = as_Register($jump_target$$reg); - __ jr(target_reg); - %} - - enc_class riscv_enc_tail_jmp(iRegP jump_target) %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - Assembler::CompressibleRegion cr(&_masm); - Register target_reg = as_Register($jump_target$$reg); - // exception oop should be in x10 -@@ -2312,12 +2312,12 @@ encode %{ - %} - - enc_class riscv_enc_rethrow() %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub())); - %} - - enc_class riscv_enc_ret() %{ -- C2_MacroAssembler _masm(&cbuf); -+ MacroAssembler _masm(&cbuf); - Assembler::CompressibleRegion cr(&_masm); - __ ret(); - %} -@@ -8506,7 +8506,7 @@ instruct cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) - format %{ "b$cmp $op1, $op2, $lbl\t#@cmpU_branch" %} - - ins_encode %{ -- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), - as_Register($op2$$reg), *($lbl$$label)); - %} - -@@ -8526,7 +8526,7 @@ instruct cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) - format %{ "b$cmp $op1, $op2, $lbl\t#@cmpU_loop" %} - - ins_encode %{ -- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), - as_Register($op2$$reg), *($lbl$$label)); - %} - -@@ -8585,7 +8585,7 @@ instruct cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) - format %{ "b$cmp $op1, $op2, $lbl\t#@cmpUL_branch" %} - - ins_encode %{ -- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), - as_Register($op2$$reg), *($lbl$$label)); - %} - -@@ -8604,7 +8604,7 @@ instruct cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) - format %{ "b$cmp $op1, $op2, $lbl\t#@cmpUL_loop" %} - - ins_encode %{ -- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), - as_Register($op2$$reg), *($lbl$$label)); - %} - -@@ -8625,7 +8625,7 @@ instruct cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) - format %{ "b$cmp $op1, $op2, $lbl\t#@cmpP_branch" %} - - ins_encode %{ -- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), - as_Register($op2$$reg), *($lbl$$label)); - %} - -@@ -8645,7 +8645,7 @@ instruct cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) - format %{ "b$cmp $op1, $op2, $lbl\t#@cmpP_loop" %} - - ins_encode %{ -- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), - as_Register($op2$$reg), *($lbl$$label)); - %} - -@@ -8666,7 +8666,7 @@ instruct cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) - format %{ "b$cmp $op1, $op2, $lbl\t#@cmpN_branch" %} - - ins_encode %{ -- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), - as_Register($op2$$reg), *($lbl$$label)); - %} - -@@ -8686,7 +8686,7 @@ instruct cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) - format %{ "b$cmp $op1, $op2, $lbl\t#@cmpN_loop" %} - - ins_encode %{ -- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), - as_Register($op2$$reg), *($lbl$$label)); - %} - -@@ -8741,7 +8741,7 @@ instruct cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl) - format %{ "double_b$cmp $op1, $op2\t#@cmpD_branch"%} - - ins_encode %{ -- __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), -+ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), - as_FloatRegister($op2$$reg), *($lbl$$label)); - %} - -@@ -8759,7 +8759,7 @@ instruct cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl) - format %{ "double_b$cmp $op1, $op2\t#@cmpD_loop"%} - - ins_encode %{ -- __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), -+ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), - as_FloatRegister($op2$$reg), *($lbl$$label)); - %} - -@@ -9080,7 +9080,7 @@ instruct far_cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{ - format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_branch" %} - - ins_encode %{ -- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), - as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); - %} - -@@ -9095,7 +9095,7 @@ instruct far_cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{ - format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_loop" %} - - ins_encode %{ -- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), - as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); - %} - -@@ -9138,7 +9138,7 @@ instruct far_cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{ - format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpUL_branch" %} - - ins_encode %{ -- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), - as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); - %} - -@@ -9153,7 +9153,7 @@ instruct far_cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{ - format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpUL_loop" %} - - ins_encode %{ -- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), - as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); - %} - -@@ -9171,7 +9171,7 @@ instruct far_cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) - format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpP_branch" %} - - ins_encode %{ -- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), - as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); - %} - -@@ -9189,7 +9189,7 @@ instruct far_cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) - format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpP_loop" %} - - ins_encode %{ -- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), - as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); - %} - -@@ -9207,7 +9207,7 @@ instruct far_cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) - format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpN_branch" %} - - ins_encode %{ -- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), - as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); - %} - -@@ -9225,7 +9225,7 @@ instruct far_cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) - format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpN_loop" %} - - ins_encode %{ -- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), - as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); - %} - -@@ -9276,7 +9276,7 @@ instruct far_cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl) - format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_branch"%} - - ins_encode %{ -- __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), -+ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), - as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true); - %} - -@@ -9292,7 +9292,7 @@ instruct far_cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl) - format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_loop"%} - - ins_encode %{ -- __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), -+ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), - as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true); - %} - -@@ -9616,7 +9616,7 @@ instruct cmovI_cmpU(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOpU cop) - %} - - ins_encode %{ -- __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, -+ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, - as_Register($op1$$reg), as_Register($op2$$reg), - as_Register($dst$$reg), as_Register($src$$reg)); - %} -@@ -9673,7 +9673,7 @@ instruct cmovL_cmpUL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOpU cop) - %} - - ins_encode %{ -- __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, -+ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, - as_Register($op1$$reg), as_Register($op2$$reg), - as_Register($dst$$reg), as_Register($src$$reg)); - %} -@@ -9691,7 +9691,7 @@ instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop) - %} - - ins_encode %{ -- __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, -+ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, - as_Register($op1$$reg), as_Register($op2$$reg), - as_Register($dst$$reg), as_Register($src$$reg)); - %} - -From 115cd21290080b157d0ca8b7080e66ebd814fbdb Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 16:15:18 +0800 -Subject: [PATCH 091/140] Revert JDK-8222297: IRT_ENTRY/IRT_LEAF etc are the - same as JRT && JDK-8263709: Cleanup THREAD/TRAPS/CHECK usage in JRT_ENTRY - routines - ---- - src/hotspot/cpu/riscv/interpreterRT_riscv.cpp | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp -index d93530d8564..776b0787238 100644 ---- a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp -+++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp -@@ -278,12 +278,12 @@ class SlowSignatureHandler - }; - - --JRT_ENTRY(address, -- InterpreterRuntime::slow_signature_handler(JavaThread* current, -+IRT_ENTRY(address, -+ InterpreterRuntime::slow_signature_handler(JavaThread* thread, - Method* method, - intptr_t* from, - intptr_t* to)) -- methodHandle m(current, (Method*)method); -+ methodHandle m(thread, (Method*)method); - assert(m->is_native(), "sanity check"); - - // handle arguments -@@ -292,4 +292,4 @@ JRT_ENTRY(address, - - // return result handler - return Interpreter::result_handler(m->result_type()); --JRT_END -+IRT_END - -From 6cbf43d5f095aef93ef0bf595f51019a03cc1989 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 16:20:06 +0800 -Subject: [PATCH 092/140] Revert JDK-8245289: Clean up offset code in - JavaClasses - ---- - src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 18 +++++++++--------- - .../templateInterpreterGenerator_riscv.cpp | 2 +- - 2 files changed, 10 insertions(+), 10 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp -index 4442b5991b1..e070b8096a6 100644 ---- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp -+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp -@@ -53,7 +53,7 @@ void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_ - verify_klass(_masm, klass_reg, VM_CLASS_ID(java_lang_Class), - "MH argument is a Class"); - } -- __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset())); -+ __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes())); - } - - #ifdef ASSERT -@@ -140,13 +140,13 @@ void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, - - // Load the invoker, as MH -> MH.form -> LF.vmentry - __ verify_oop(recv); -- __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset())), temp2); -+ __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2); - __ verify_oop(method_temp); -- __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset())), temp2); -+ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2); - __ verify_oop(method_temp); -- __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset())), temp2); -+ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())), temp2); - __ verify_oop(method_temp); -- __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())), noreg, noreg); -+ __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), noreg, noreg); - - if (VerifyMethodHandles && !for_compiler_entry) { - // make sure recv is already on stack -@@ -284,10 +284,10 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, - "MemberName required for invokeVirtual etc."); - } - -- Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset())); -- Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset())); -- Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset())); -- Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())); -+ Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes())); -+ Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes())); -+ Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())); -+ Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())); - - Register temp1_recv_klass = temp1; - if (iid != vmIntrinsics::_linkToStatic) { -diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -index 8aea4eca048..ce6166030b4 100644 ---- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -@@ -894,7 +894,7 @@ address TemplateInterpreterGenerator::generate_Reference_get_entry(void) { - - address entry = __ pc(); - -- const int referent_offset = java_lang_ref_Reference::referent_offset(); -+ const int referent_offset = java_lang_ref_Reference::referent_offset; - guarantee(referent_offset > 0, "referent offset not initialized"); - - Label slow_path; - -From 8c9b9f4246f4ede3c31f59749f9d4bc625f106b3 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 16:30:35 +0800 -Subject: [PATCH 093/140] Revert JDK-8242629: Remove references to deprecated - java.util.Observer and Observable - ---- - .../runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java | 2 -- - .../classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java | 2 -- - .../sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java | 2 -- - 3 files changed, 6 deletions(-) - -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java -index f2e224f28ee..5c2b6e0e3ea 100644 ---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java -@@ -34,8 +34,6 @@ - import sun.jvm.hotspot.runtime.riscv64.*; - import sun.jvm.hotspot.types.*; - import sun.jvm.hotspot.utilities.*; --import sun.jvm.hotspot.utilities.Observable; --import sun.jvm.hotspot.utilities.Observer; - - public class LinuxRISCV64JavaThreadPDAccess implements JavaThreadPDAccess { - private static AddressField lastJavaFPField; -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java -index df280005d72..e372bc5f7be 100644 ---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java -@@ -34,8 +34,6 @@ - import sun.jvm.hotspot.runtime.*; - import sun.jvm.hotspot.types.*; - import sun.jvm.hotspot.utilities.*; --import sun.jvm.hotspot.utilities.Observable; --import sun.jvm.hotspot.utilities.Observer; - - /** Specialization of and implementation of abstract methods of the - Frame class for the riscv64 family of CPUs. */ -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java -index d0ad2b559a6..850758a7ed4 100644 ---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java -@@ -31,8 +31,6 @@ - import sun.jvm.hotspot.types.*; - import sun.jvm.hotspot.runtime.*; - import sun.jvm.hotspot.utilities.*; --import sun.jvm.hotspot.utilities.Observable; --import sun.jvm.hotspot.utilities.Observer; - - public class RISCV64JavaCallWrapper extends JavaCallWrapper { - private static AddressField lastJavaFPField; - -From 43f2a4fec6b4922fa8c187deda310ad636aeed2e Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 16:33:56 +0800 -Subject: [PATCH 094/140] Revert JDK-8256155: Allow multiple large page sizes - to be used on Linux - ---- - src/hotspot/os/linux/os_linux.cpp | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp -index 6f75e623a9a..7fc9588301b 100644 ---- a/src/hotspot/os/linux/os_linux.cpp -+++ b/src/hotspot/os/linux/os_linux.cpp -@@ -4078,7 +4078,8 @@ size_t os::Linux::find_large_page_size() { - IA64_ONLY(256 * M) - PPC_ONLY(4 * M) - S390_ONLY(1 * M) -- SPARC_ONLY(4 * M); -+ SPARC_ONLY(4 * M) -+ RISCV64_ONLY(2 * M); - #endif // ZERO - - FILE *fp = fopen("/proc/meminfo", "r"); - -From a93191be0155882a0f4d92bba4de9fdf4f508a4a Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 16:38:53 +0800 -Subject: [PATCH 095/140] Revert JDK-8252204: AArch64: Implement SHA3 - accelerator/intrinsic - ---- - src/hotspot/cpu/riscv/vm_version_riscv.cpp | 5 ----- - 1 file changed, 5 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp -index d4b79162d84..50ee7edb708 100644 ---- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp -+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp -@@ -82,11 +82,6 @@ void VM_Version::initialize() { - FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); - } - -- if (UseSHA3Intrinsics) { -- warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); -- FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); -- } -- - if (UseCRC32Intrinsics) { - warning("CRC32 intrinsics are not available on this CPU."); - FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); - -From 29acd4f1bb99e856418f7d9d3da4f205812b1663 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 16:43:23 +0800 -Subject: [PATCH 096/140] Revert JDK-8253717: Relocate stack overflow code out - of thread.hpp/cpp && JDK-8255766: Fix linux+arm64 build after 8254072 - ---- - src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 2 +- - src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 4 ++-- - src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp | 2 +- - 3 files changed, 4 insertions(+), 4 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -index d175a62aeeb..d94074b4a3c 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -@@ -1666,7 +1666,7 @@ void MacroAssembler::bang_stack_size(Register size, Register tmp) { - // was post-decremented.) Skip this address by starting at i=1, and - // touch a few more pages below. N.B. It is important to touch all - // the way down to and including i=StackShadowPages. -- for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) { -+ for (int i = 0; i < (int)(JavaThread::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) { - // this could be any sized move but this is can be a debugging crumb - // so the bigger the better. - sub(tmp, tmp, os::vm_page_size()); -diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -index ae414224c5b..dc3ac548d73 100644 ---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -@@ -1252,7 +1252,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, - __ nop(); - - // Generate stack overflow check -- __ bang_stack_with_offset(checked_cast(StackOverflow::stack_shadow_zone_size())); -+ __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size()); - - // Generate a new frame for the wrapper. - __ enter(); -@@ -1551,7 +1551,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, - Label reguard; - Label reguard_done; - __ lbu(t0, Address(xthread, JavaThread::stack_guard_state_offset())); -- __ mv(t1, StackOverflow::stack_guard_yellow_reserved_disabled); -+ __ mv(t1, JavaThread::stack_guard_yellow_reserved_disabled); - __ beq(t0, t1, reguard); - __ bind(reguard_done); - -diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -index ce6166030b4..e639fa7e12f 100644 ---- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -@@ -1248,7 +1248,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { - { - Label no_reguard; - __ lwu(t0, Address(xthread, in_bytes(JavaThread::stack_guard_state_offset()))); -- __ addi(t1, zr, (u1)StackOverflow::stack_guard_yellow_reserved_disabled); -+ __ addi(t1, zr, (u1)JavaThread::stack_guard_yellow_reserved_disabled); - __ bne(t0, t1, no_reguard); - - __ pusha(); // only save smashed registers - -From 6fa17c662dd2488108809e77dcff921bb475813c Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 16:50:51 +0800 -Subject: [PATCH 097/140] Revert JDK-8258459: Decouple gc_globals.hpp from - globals.hpp - ---- - src/hotspot/cpu/riscv/templateTable_riscv.cpp | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp -index 1f4409a9c9a..84b1afc7dc6 100644 ---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp -+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp -@@ -28,7 +28,6 @@ - #include "asm/macroAssembler.inline.hpp" - #include "gc/shared/barrierSetAssembler.hpp" - #include "gc/shared/collectedHeap.hpp" --#include "gc/shared/tlab_globals.hpp" - #include "interpreter/interp_masm.hpp" - #include "interpreter/interpreter.hpp" - #include "interpreter/interpreterRuntime.hpp" - -From bcc26e749ccc20db5a4ba51c2cf8740a908a8a74 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 16:56:58 +0800 -Subject: [PATCH 098/140] Revert JDK-8223136: Move compressed oops functions to - CompressedOops class - ---- - .../cpu/riscv/macroAssembler_riscv.cpp | 64 +++++++++---------- - .../cpu/riscv/macroAssembler_riscv.hpp | 1 - - src/hotspot/cpu/riscv/riscv.ad | 10 +-- - 3 files changed, 37 insertions(+), 38 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -index d94074b4a3c..becc1656358 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -@@ -1318,10 +1318,10 @@ int MacroAssembler::patch_oop(address insn_addr, address o) { - void MacroAssembler::reinit_heapbase() { - if (UseCompressedOops) { - if (Universe::is_fully_initialized()) { -- mv(xheapbase, CompressedOops::ptrs_base()); -+ mv(xheapbase, Universe::narrow_ptrs_base()); - } else { - int32_t offset = 0; -- la_patchable(xheapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()), offset); -+ la_patchable(xheapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()), offset); - ld(xheapbase, Address(xheapbase, offset)); - } - } -@@ -1596,8 +1596,8 @@ void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, R - void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L) { - if (UseCompressedClassPointers) { - lwu(tmp, Address(oop, oopDesc::klass_offset_in_bytes())); -- if (CompressedKlassPointers::base() == NULL) { -- slli(tmp, tmp, CompressedKlassPointers::shift()); -+ if (Universe::narrow_klass_base() == NULL) { -+ slli(tmp, tmp, Universe::narrow_klass_shift()); - beq(trial_klass, tmp, L); - return; - } -@@ -1745,9 +1745,9 @@ void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, - // Algorithm must match CompressedOops::encode. - void MacroAssembler::encode_heap_oop(Register d, Register s) { - verify_oop(s, "broken oop in encode_heap_oop"); -- if (CompressedOops::base() == NULL) { -- if (CompressedOops::shift() != 0) { -- assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); -+ if (Universe::narrow_oop_base() == NULL) { -+ if (Universe::narrow_oop_shift() != 0) { -+ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); - srli(d, s, LogMinObjAlignmentInBytes); - } else { - mv(d, s); -@@ -1758,9 +1758,9 @@ void MacroAssembler::encode_heap_oop(Register d, Register s) { - bgez(d, notNull); - mv(d, zr); - bind(notNull); -- if (CompressedOops::shift() != 0) { -- assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); -- srli(d, d, CompressedOops::shift()); -+ if (Universe::narrow_oop_shift() != 0) { -+ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); -+ srli(d, d, Universe::narrow_oop_shift()); - } - } - } -@@ -1799,9 +1799,9 @@ void MacroAssembler::decode_klass_not_null(Register r) { - void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) { - assert(UseCompressedClassPointers, "should only be used for compressed headers"); - -- if (CompressedKlassPointers::base() == NULL) { -- if (CompressedKlassPointers::shift() != 0) { -- assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); -+ if (Universe::narrow_klass_base() == NULL) { -+ if (Universe::narrow_klass_shift() != 0) { -+ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); - slli(dst, src, LogKlassAlignmentInBytes); - } else { - mv(dst, src); -@@ -1815,10 +1815,10 @@ void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register - } - - assert_different_registers(src, xbase); -- li(xbase, (uintptr_t)CompressedKlassPointers::base()); -+ li(xbase, (uintptr_t)Universe::narrow_klass_base()); - -- if (CompressedKlassPointers::shift() != 0) { -- assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); -+ if (Universe::narrow_klass_shift() != 0) { -+ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); - assert_different_registers(t0, xbase); - shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes); - } else { -@@ -1835,9 +1835,9 @@ void MacroAssembler::encode_klass_not_null(Register r) { - void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) { - assert(UseCompressedClassPointers, "should only be used for compressed headers"); - -- if (CompressedKlassPointers::base() == NULL) { -- if (CompressedKlassPointers::shift() != 0) { -- assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); -+ if (Universe::narrow_klass_base() == NULL) { -+ if (Universe::narrow_klass_shift() != 0) { -+ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); - srli(dst, src, LogKlassAlignmentInBytes); - } else { - mv(dst, src); -@@ -1845,8 +1845,8 @@ void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register - return; - } - -- if (((uint64_t)(uintptr_t)CompressedKlassPointers::base() & 0xffffffff) == 0 && -- CompressedKlassPointers::shift() == 0) { -+ if (((uint64_t)(uintptr_t)Universe::narrow_klass_base() & 0xffffffff) == 0 && -+ Universe::narrow_klass_shift() == 0) { - zero_extend(dst, src, 32); - return; - } -@@ -1857,10 +1857,10 @@ void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register - } - - assert_different_registers(src, xbase); -- li(xbase, (intptr_t)CompressedKlassPointers::base()); -+ li(xbase, (intptr_t)Universe::narrow_klass_base()); - sub(dst, src, xbase); -- if (CompressedKlassPointers::shift() != 0) { -- assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); -+ if (Universe::narrow_klass_shift() != 0) { -+ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); - srli(dst, dst, LogKlassAlignmentInBytes); - } - if (xbase == xheapbase) { -@@ -1878,22 +1878,22 @@ void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { - // Cannot assert, unverified entry point counts instructions (see .ad file) - // vtableStubs also counts instructions in pd_code_size_limit. - // Also do not verify_oop as this is called by verify_oop. -- if (CompressedOops::shift() != 0) { -- assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); -+ if (Universe::narrow_oop_shift() != 0) { -+ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); - slli(dst, src, LogMinObjAlignmentInBytes); -- if (CompressedOops::base() != NULL) { -+ if (Universe::narrow_oop_base() != NULL) { - add(dst, xheapbase, dst); - } - } else { -- assert(CompressedOops::base() == NULL, "sanity"); -+ assert(Universe::narrow_oop_base() == NULL, "sanity"); - mv(dst, src); - } - } - - void MacroAssembler::decode_heap_oop(Register d, Register s) { -- if (CompressedOops::base() == NULL) { -- if (CompressedOops::shift() != 0 || d != s) { -- slli(d, s, CompressedOops::shift()); -+ if (Universe::narrow_oop_base() == NULL) { -+ if (Universe::narrow_oop_shift() != 0 || d != s) { -+ slli(d, s, Universe::narrow_oop_shift()); - } - } else { - Label done; -@@ -3004,7 +3004,7 @@ void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { - InstructionMark im(this); - RelocationHolder rspec = metadata_Relocation::spec(index); - code_section()->relocate(inst_mark(), rspec); -- narrowKlass nk = CompressedKlassPointers::encode(k); -+ narrowKlass nk = Klass::encode_klass(k); - li32(dst, nk); - zero_extend(dst, dst, 32); - } -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -index 2ef28771e2e..953bca3cbd8 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -@@ -29,7 +29,6 @@ - - #include "asm/assembler.hpp" - #include "metaprogramming/enableIf.hpp" --#include "oops/compressedOops.hpp" - - // MacroAssembler extends Assembler by frequently used macros. - // -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index 2e7eed8fb52..24214964243 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -1407,7 +1407,7 @@ void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const - st->print_cr("# MachUEPNode"); - if (UseCompressedClassPointers) { - st->print_cr("\tlwu t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass"); -- if (CompressedKlassPointers::shift() != 0) { -+ if (Universe::narrow_klass_shift() != 0) { - st->print_cr("\tdecode_klass_not_null t0, t0"); - } - } else { -@@ -3255,7 +3255,7 @@ operand indOffL(iRegP reg, immLOffset off) - - operand indirectN(iRegN reg) - %{ -- predicate(CompressedOops::shift() == 0); -+ predicate(Universe::narrow_oop_shift() == 0); - constraint(ALLOC_IN_RC(ptr_reg)); - match(DecodeN reg); - op_cost(0); -@@ -3270,7 +3270,7 @@ operand indirectN(iRegN reg) - - operand indOffIN(iRegN reg, immIOffset off) - %{ -- predicate(CompressedOops::shift() == 0); -+ predicate(Universe::narrow_oop_shift() == 0); - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP (DecodeN reg) off); - op_cost(0); -@@ -3285,7 +3285,7 @@ operand indOffIN(iRegN reg, immIOffset off) - - operand indOffLN(iRegN reg, immLOffset off) - %{ -- predicate(CompressedOops::shift() == 0); -+ predicate(Universe::narrow_oop_shift() == 0); - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP (DecodeN reg) off); - op_cost(0); -@@ -7947,7 +7947,7 @@ instruct convP2I(iRegINoSp dst, iRegP src) %{ - // in case of 32bit oops (heap < 4Gb). - instruct convN2I(iRegINoSp dst, iRegN src) - %{ -- predicate(CompressedOops::shift() == 0); -+ predicate(Universe::narrow_oop_shift() == 0); - match(Set dst (ConvL2I (CastP2X (DecodeN src)))); - - ins_cost(ALU_COST); - -From 81d8ea9077484f1dd20033390cbd3c1844b1b966 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 17:11:20 +0800 -Subject: [PATCH 099/140] Revert JDK-8247912: Make narrowOop a scoped enum - ---- - src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -index becc1656358..e2841c28c37 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -@@ -1305,7 +1305,7 @@ int MacroAssembler::patch_oop(address insn_addr, address o) { - // instruction. - if (NativeInstruction::is_li32_at(insn_addr)) { - // Move narrow OOP -- uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o)); -+ narrowOop n = CompressedOops::encode((oop)o); - return patch_imm_in_li32(insn_addr, (int32_t)n); - } else if (NativeInstruction::is_movptr_at(insn_addr)) { - // Move wide OOP - -From f980e03cb17804ff72958dd13505058048c04da8 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 17:20:05 +0800 -Subject: [PATCH 100/140] Revert JDK-8260467: Move well-known classes from - systemDictionary.hpp to vmClasses.hpp - ---- - src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 11 +++++------ - src/hotspot/cpu/riscv/methodHandles_riscv.hpp | 4 ++-- - 2 files changed, 7 insertions(+), 8 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp -index e070b8096a6..fd907f77afb 100644 ---- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp -+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp -@@ -27,7 +27,6 @@ - #include "precompiled.hpp" - #include "asm/macroAssembler.hpp" - #include "classfile/javaClasses.inline.hpp" --#include "classfile/vmClasses.hpp" - #include "interpreter/interpreter.hpp" - #include "interpreter/interpreterRuntime.hpp" - #include "memory/allocation.inline.hpp" -@@ -50,7 +49,7 @@ - void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) { - assert_cond(_masm != NULL); - if (VerifyMethodHandles) { -- verify_klass(_masm, klass_reg, VM_CLASS_ID(java_lang_Class), -+ verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class), - "MH argument is a Class"); - } - __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes())); -@@ -68,11 +67,11 @@ static int check_nonzero(const char* xname, int x) { - - #ifdef ASSERT - void MethodHandles::verify_klass(MacroAssembler* _masm, -- Register obj, vmClassID klass_id, -+ Register obj, SystemDictionary::WKID klass_id, - const char* error_message) { - assert_cond(_masm != NULL); -- InstanceKlass** klass_addr = vmClasses::klass_addr_at(klass_id); -- Klass* klass = vmClasses::klass_at(klass_id); -+ InstanceKlass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id); -+ Klass* klass = SystemDictionary::well_known_klass(klass_id); - Register temp = t1; - Register temp2 = t0; // used by MacroAssembler::cmpptr - Label L_ok, L_bad; -@@ -280,7 +279,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, - // The method is a member invoker used by direct method handles. - if (VerifyMethodHandles) { - // make sure the trailing argument really is a MemberName (caller responsibility) -- verify_klass(_masm, member_reg, VM_CLASS_ID(java_lang_invoke_MemberName), -+ verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName), - "MemberName required for invokeVirtual etc."); - } - -diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp -index f73aba29d67..65493eba764 100644 ---- a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp -+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp -@@ -36,11 +36,11 @@ enum /* platform_dependent_constants */ { - static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg); - - static void verify_klass(MacroAssembler* _masm, -- Register obj, vmClassID klass_id, -+ Register obj, SystemDictionary::WKID klass_id, - const char* error_message = "wrong klass") NOT_DEBUG_RETURN; - - static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) { -- verify_klass(_masm, mh_reg, VM_CLASS_ID(java_lang_invoke_MethodHandle), -+ verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle), - "reference is a MH"); - } - - -From 2c68b064100b5abaca80926e213280ea82ff161a Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 17:32:15 +0800 -Subject: [PATCH 101/140] Revert JDK-8268858: Determine register pressure - automatically by the number of available registers for allocation - ---- - src/hotspot/cpu/riscv/c2_globals_riscv.hpp | 2 ++ - src/hotspot/cpu/riscv/riscv.ad | 27 ---------------------- - 2 files changed, 2 insertions(+), 27 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp -index 6c301cdae04..33d78fb2f6f 100644 ---- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp -+++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp -@@ -44,8 +44,10 @@ define_pd_global(intx, CompileThreshold, 10000); - - define_pd_global(intx, OnStackReplacePercentage, 140); - define_pd_global(intx, ConditionalMoveLimit, 0); -+define_pd_global(intx, FLOATPRESSURE, 32); - define_pd_global(intx, FreqInlineSize, 325); - define_pd_global(intx, MinJumpTableSize, 10); -+define_pd_global(intx, INTPRESSURE, 24); - define_pd_global(intx, InteriorEntryAlignment, 16); - define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); - define_pd_global(intx, LoopUnrollLimit, 60); -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index 24214964243..c5e0ae23029 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -1719,33 +1719,6 @@ bool Matcher::is_spillable_arg(int reg) - return can_be_java_arg(reg); - } - --uint Matcher::int_pressure_limit() --{ -- // A derived pointer is live at CallNode and then is flagged by RA -- // as a spilled LRG. Spilling heuristics(Spill-USE) explicitly skip -- // derived pointers and lastly fail to spill after reaching maximum -- // number of iterations. Lowering the default pressure threshold to -- // (_NO_SPECIAL_REG32_mask.Size() minus 1) forces CallNode to become -- // a high register pressure area of the code so that split_DEF can -- // generate DefinitionSpillCopy for the derived pointer. -- uint default_int_pressure_threshold = _NO_SPECIAL_REG32_mask.Size() - 1; -- if (!PreserveFramePointer) { -- // When PreserveFramePointer is off, frame pointer is allocatable, -- // but different from other SOC registers, it is excluded from -- // fatproj's mask because its save type is No-Save. Decrease 1 to -- // ensure high pressure at fatproj when PreserveFramePointer is off. -- // See check_pressure_at_fatproj(). -- default_int_pressure_threshold--; -- } -- return (INTPRESSURE == -1) ? default_int_pressure_threshold : INTPRESSURE; --} -- --uint Matcher::float_pressure_limit() --{ -- // _FLOAT_REG_mask is generated by adlc from the float_reg register class. -- return (FLOATPRESSURE == -1) ? _FLOAT_REG_mask.Size() : FLOATPRESSURE; --} -- - bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) { - return false; - } - -From 932ebd6238ea7703dc3164e4506af332f6847592 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 17:51:12 +0800 -Subject: [PATCH 102/140] Revert JDK-8276563: Undefined Behaviour in class - Assembler && 8257882: Implement linkToNative intrinsic on AArch64 (the - register part) - ---- - .../cpu/riscv/globalDefinitions_riscv.hpp | 2 - - src/hotspot/cpu/riscv/register_riscv.cpp | 4 - - src/hotspot/cpu/riscv/register_riscv.hpp | 123 +++++++++++++----- - 3 files changed, 91 insertions(+), 38 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp -index 2936837d951..ffd420da024 100644 ---- a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp -+++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp -@@ -47,6 +47,4 @@ const bool CCallingConventionRequiresIntsAsLongs = false; - - #define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS false - --#define USE_POINTERS_TO_REGISTER_IMPL_ARRAY -- - #endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp -index 96cf1996a83..ef60cb3bb05 100644 ---- a/src/hotspot/cpu/riscv/register_riscv.cpp -+++ b/src/hotspot/cpu/riscv/register_riscv.cpp -@@ -26,10 +26,6 @@ - #include "precompiled.hpp" - #include "register_riscv.hpp" - --REGISTER_IMPL_DEFINITION(Register, RegisterImpl, RegisterImpl::number_of_registers); --REGISTER_IMPL_DEFINITION(FloatRegister, FloatRegisterImpl, FloatRegisterImpl::number_of_registers); --REGISTER_IMPL_DEFINITION(VectorRegister, VectorRegisterImpl, VectorRegisterImpl::number_of_registers); -- - const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers * - RegisterImpl::max_slots_per_register; - -diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp -index d697751f55f..f64a06eb89f 100644 ---- a/src/hotspot/cpu/riscv/register_riscv.hpp -+++ b/src/hotspot/cpu/riscv/register_riscv.hpp -@@ -47,13 +47,13 @@ typedef VMRegImpl* VMReg; - - // Use Register as shortcut - class RegisterImpl; --typedef const RegisterImpl* Register; -+typedef RegisterImpl* Register; - --inline constexpr Register as_Register(int encoding); -+inline Register as_Register(int encoding) { -+ return (Register)(intptr_t) encoding; -+} - - class RegisterImpl: public AbstractRegisterImpl { -- static constexpr Register first(); -- - public: - enum { - number_of_registers = 32, -@@ -66,16 +66,16 @@ class RegisterImpl: public AbstractRegisterImpl { - }; - - // derived registers, offsets, and addresses -- const Register successor() const { return this + 1; } -+ const Register successor() const { return as_Register(encoding() + 1); } - - // construction -- inline friend constexpr Register as_Register(int encoding); -+ inline friend Register as_Register(int encoding); - - VMReg as_VMReg() const; - - // accessors - int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); } -- int encoding_nocheck() const { return this - first(); } -+ int encoding_nocheck() const { return (intptr_t)this; } - bool is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; } - const char* name() const; - -@@ -93,9 +93,11 @@ class RegisterImpl: public AbstractRegisterImpl { - return encoding_nocheck() >= compressed_register_base && - encoding_nocheck() <= compressed_register_top; - } --}; - --REGISTER_IMPL_DECLARATION(Register, RegisterImpl, RegisterImpl::number_of_registers); -+ // Return the bit which represents this register. This is intended -+ // to be ORed into a bitmask: for usage see class RegSet below. -+ uint64_t bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; } -+}; - - // The integer registers of the RISCV architecture - -@@ -136,14 +138,14 @@ CONSTANT_REGISTER_DECLARATION(Register, x31, (31)); - - // Use FloatRegister as shortcut - class FloatRegisterImpl; --typedef const FloatRegisterImpl* FloatRegister; -+typedef FloatRegisterImpl* FloatRegister; - --inline constexpr FloatRegister as_FloatRegister(int encoding); -+inline FloatRegister as_FloatRegister(int encoding) { -+ return (FloatRegister)(intptr_t) encoding; -+} - - // The implementation of floating point registers for the architecture - class FloatRegisterImpl: public AbstractRegisterImpl { -- static constexpr FloatRegister first(); -- - public: - enum { - number_of_registers = 32, -@@ -155,18 +157,16 @@ class FloatRegisterImpl: public AbstractRegisterImpl { - }; - - // construction -- inline friend constexpr FloatRegister as_FloatRegister(int encoding); -+ inline friend FloatRegister as_FloatRegister(int encoding); - - VMReg as_VMReg() const; - - // derived registers, offsets, and addresses -- FloatRegister successor() const { -- return as_FloatRegister((encoding() + 1) % (unsigned)number_of_registers); -- } -+ FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } - - // accessors - int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); } -- int encoding_nocheck() const { return this - first(); } -+ int encoding_nocheck() const { return (intptr_t)this; } - int is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; } - const char* name() const; - -@@ -186,8 +186,6 @@ class FloatRegisterImpl: public AbstractRegisterImpl { - } - }; - --REGISTER_IMPL_DECLARATION(FloatRegister, FloatRegisterImpl, FloatRegisterImpl::number_of_registers); -- - // The float registers of the RISCV architecture - - CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1)); -@@ -227,14 +225,14 @@ CONSTANT_REGISTER_DECLARATION(FloatRegister, f31 , (31)); - - // Use VectorRegister as shortcut - class VectorRegisterImpl; --typedef const VectorRegisterImpl* VectorRegister; -+typedef VectorRegisterImpl* VectorRegister; - --inline constexpr VectorRegister as_VectorRegister(int encoding); -+inline VectorRegister as_VectorRegister(int encoding) { -+ return (VectorRegister)(intptr_t) encoding; -+} - - // The implementation of vector registers for RVV - class VectorRegisterImpl: public AbstractRegisterImpl { -- static constexpr VectorRegister first(); -- - public: - enum { - number_of_registers = 32, -@@ -242,23 +240,21 @@ class VectorRegisterImpl: public AbstractRegisterImpl { - }; - - // construction -- inline friend constexpr VectorRegister as_VectorRegister(int encoding); -+ inline friend VectorRegister as_VectorRegister(int encoding); - - VMReg as_VMReg() const; - - // derived registers, offsets, and addresses -- VectorRegister successor() const { return this + 1; } -+ VectorRegister successor() const { return as_VectorRegister(encoding() + 1); } - - // accessors - int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); } -- int encoding_nocheck() const { return this - first(); } -+ int encoding_nocheck() const { return (intptr_t)this; } - bool is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; } - const char* name() const; - - }; - --REGISTER_IMPL_DECLARATION(VectorRegister, VectorRegisterImpl, VectorRegisterImpl::number_of_registers); -- - // The vector registers of RVV - CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg , (-1)); - -@@ -315,8 +311,71 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl { - static const int max_fpr; - }; - --typedef AbstractRegSet RegSet; --typedef AbstractRegSet FloatRegSet; --typedef AbstractRegSet VectorRegSet; -+// A set of registers -+class RegSet { -+ uint32_t _bitset; ++ if (DEBUG) { ++ System.out.println("senderForCompiledFrame"); ++ } + -+ RegSet(uint32_t bitset) : _bitset(bitset) { } ++ // ++ // NOTE: some of this code is (unfortunately) duplicated RISCV64CurrentFrameGuess ++ // + -+public: ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } + -+ RegSet() : _bitset(0) { } ++ // frame owned by optimizing compiler ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size"); ++ } ++ Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize()); + -+ RegSet(Register r1) : _bitset(r1->bit()) { } ++ // The return_address is always the word on the stack ++ Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize()); + -+ RegSet operator+(const RegSet aSet) const { -+ RegSet result(_bitset | aSet._bitset); -+ return result; ++ // This is the saved value of FP which may or may not really be an FP. ++ // It is only an FP if the sender is an interpreter frame. ++ Address savedFPAddr = senderSP.addOffsetTo(-2 * VM.getVM().getAddressSize()); ++ ++ if (map.getUpdateMap()) { ++ // Tell GC to use argument oopmaps for some runtime stubs that need it. ++ // For C1, the runtime stub might not have oop maps, so set this flag ++ // outside of update_register_map. ++ map.setIncludeArgumentOops(cb.callerMustGCArguments()); ++ ++ if (cb.getOopMaps() != null) { ++ ImmutableOopMapSet.updateRegisterMap(this, cb, map, true); ++ } ++ ++ // Since the prolog does the save and restore of FP there is no oopmap ++ // for it so we must fill in its location as if there was an oopmap entry ++ // since if our caller was compiled code there could be live jvm state in it. ++ updateMapWithSavedLink(map, savedFPAddr); ++ } ++ ++ return new RISCV64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC); + } + -+ RegSet operator-(const RegSet aSet) const { -+ RegSet result(_bitset & ~aSet._bitset); -+ return result; ++ protected boolean hasSenderPD() { ++ return true; + } + -+ RegSet &operator+=(const RegSet aSet) { -+ *this = *this + aSet; -+ return *this; ++ public long frameSize() { ++ return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize()); + } + -+ RegSet &operator-=(const RegSet aSet) { -+ *this = *this - aSet; -+ return *this; ++ public Address getLink() { ++ try { ++ if (DEBUG) { ++ System.out.println("Reading link at " + addressOfStackSlot(LINK_OFFSET) ++ + " = " + addressOfStackSlot(LINK_OFFSET).getAddressAt(0)); ++ } ++ return addressOfStackSlot(LINK_OFFSET).getAddressAt(0); ++ } catch (Exception e) { ++ if (DEBUG) ++ System.out.println("Returning null"); ++ return null; ++ } ++ } ++ ++ public Address getUnextendedSP() { return raw_unextendedSP; } ++ ++ // Return address: ++ public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); } ++ public Address getSenderPC() { return getSenderPCAddr().getAddressAt(0); } ++ ++ // return address of param, zero origin index. ++ public Address getNativeParamAddr(int idx) { ++ return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx); + } + -+ static RegSet of(Register r1) { -+ return RegSet(r1); ++ public Address getSenderSP() { return addressOfStackSlot(SENDER_SP_OFFSET); } ++ ++ public Address addressOfInterpreterFrameLocals() { ++ return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET); + } + -+ static RegSet of(Register r1, Register r2) { -+ return of(r1) + r2; ++ private Address addressOfInterpreterFrameBCX() { ++ return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET); + } + -+ static RegSet of(Register r1, Register r2, Register r3) { -+ return of(r1, r2) + r3; ++ public int getInterpreterFrameBCI() { ++ // FIXME: this is not atomic with respect to GC and is unsuitable ++ // for use in a non-debugging, or reflective, system. Need to ++ // figure out how to express this. ++ Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0); ++ Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0); ++ Method method = (Method)Metadata.instantiateWrapperFor(methodHandle); ++ return bcpToBci(bcp, method); + } + -+ static RegSet of(Register r1, Register r2, Register r3, Register r4) { -+ return of(r1, r2, r3) + r4; ++ public Address addressOfInterpreterFrameMDX() { ++ return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET); + } + -+ static RegSet range(Register start, Register end) { -+ uint32_t bits = ~0; -+ bits <<= start->encoding(); -+ bits <<= 31 - end->encoding(); -+ bits >>= 31 - end->encoding(); ++ // expression stack ++ // (the max_stack arguments are used by the GC; see class FrameClosure) + -+ return RegSet(bits); ++ public Address addressOfInterpreterFrameExpressionStack() { ++ Address monitorEnd = interpreterFrameMonitorEnd().address(); ++ return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize()); + } + -+ uint32_t bits() const { return _bitset; } ++ public int getInterpreterFrameExpressionStackDirection() { return -1; } + -+private: ++ // top of expression stack ++ public Address addressOfInterpreterFrameTOS() { ++ return getSP(); ++ } + -+ Register first() { -+ uint32_t first = _bitset & -_bitset; -+ return first ? as_Register(exact_log2(first)) : noreg; ++ /** Expression stack from top down */ ++ public Address addressOfInterpreterFrameTOSAt(int slot) { ++ return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize()); + } -+}; - - #endif // CPU_RISCV_REGISTER_RISCV_HPP - -From 9c85aa8d3387d795f9c2f4795ffc7f9d7f814d92 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 19:24:49 +0800 -Subject: [PATCH 103/140] Revert JDK-8240363: Refactor Compile::Output() to its - own Phase - ---- - .../cpu/riscv/macroAssembler_riscv.cpp | 2 +- - src/hotspot/cpu/riscv/riscv.ad | 20 +++++++++---------- - 2 files changed, 11 insertions(+), 11 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -index e2841c28c37..656334f326b 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -@@ -3027,7 +3027,7 @@ address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) { - CompileTask* task = ciEnv::current()->task(); - in_scratch_emit_size = - (task != NULL && is_c2_compile(task->comp_level()) && -- Compile::current()->output()->in_scratch_emit_size()); -+ Compile::current()->in_scratch_emit_size()); - #endif - if (!in_scratch_emit_size) { - address stub = emit_trampoline_stub(offset(), entry.target()); -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index c5e0ae23029..d736750d02d 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -1029,7 +1029,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const { - //============================================================================= - const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; - --int ConstantTable::calculate_table_base_offset() const { -+int Compile::ConstantTable::calculate_table_base_offset() const { - return 0; // absolute addressing, no offset - } - -@@ -1058,9 +1058,9 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const { - assert_cond(st != NULL && ra_ != NULL); - Compile* C = ra_->C; - -- int framesize = C->output()->frame_slots() << LogBytesPerInt; -+ int framesize = C->frame_slots() << LogBytesPerInt; - -- if (C->output()->need_stack_bang(framesize)) { -+ if (C->need_stack_bang(framesize)) { - st->print("# stack bang size=%d\n\t", framesize); - } - -@@ -1077,7 +1077,7 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { - MacroAssembler _masm(&cbuf); - - // n.b. frame size includes space for return pc and fp -- const int framesize = C->output()->frame_size_in_bytes(); -+ const int framesize = C->frame_size_in_bytes(); - - // insert a nop at the start of the prolog so we can patch in a - // branch if we need to invalidate the method later -@@ -1085,8 +1085,8 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { - - assert_cond(C != NULL); - -- int bangsize = C->output()->bang_size_in_bytes(); -- if (C->output()->need_stack_bang(bangsize)) { -+ int bangsize = C->bang_size_in_bytes(); -+ if (C->need_stack_bang(bangsize)) { - __ generate_stack_overflow_check(bangsize); - } - -@@ -1096,12 +1096,12 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { - Unimplemented(); - } - -- C->output()->set_frame_complete(cbuf.insts_size()); -+ C->set_frame_complete(cbuf.insts_size()); - - if (C->has_mach_constant_base_node()) { - // NOTE: We set the table base offset here because users might be - // emitted before MachConstantBaseNode. -- ConstantTable& constant_table = C->output()->constant_table(); -+ Compile::ConstantTable& constant_table = C->constant_table(); - constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); - } - } -@@ -1125,7 +1125,7 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const { - assert_cond(st != NULL && ra_ != NULL); - Compile* C = ra_->C; - assert_cond(C != NULL); -- int framesize = C->output()->frame_size_in_bytes(); -+ int framesize = C->frame_size_in_bytes(); - - st->print("# pop frame %d\n\t", framesize); - -@@ -1152,7 +1152,7 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { - Compile* C = ra_->C; - MacroAssembler _masm(&cbuf); - assert_cond(C != NULL); -- int framesize = C->output()->frame_size_in_bytes(); -+ int framesize = C->frame_size_in_bytes(); - - __ remove_frame(framesize); - - -From 3a58114310a56ebca04ba44b4883d205096eb844 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 19:36:09 +0800 -Subject: [PATCH 104/140] Revert RotateLeft && RotateRight matching rules - ---- - src/hotspot/cpu/riscv/riscv.ad | 2 - - src/hotspot/cpu/riscv/riscv_b.ad | 76 -------------------------------- - 2 files changed, 78 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index d736750d02d..1e6495692da 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -1505,8 +1505,6 @@ const bool Matcher::match_rule_supported(int opcode) { - case Op_PopCountL: - return UsePopCountInstruction; - -- case Op_RotateRight: -- case Op_RotateLeft: - case Op_CountLeadingZerosI: - case Op_CountLeadingZerosL: - case Op_CountTrailingZerosI: -diff --git a/src/hotspot/cpu/riscv/riscv_b.ad b/src/hotspot/cpu/riscv/riscv_b.ad -index 4488c1c4031..b9e04c432e1 100644 ---- a/src/hotspot/cpu/riscv/riscv_b.ad -+++ b/src/hotspot/cpu/riscv/riscv_b.ad -@@ -25,82 +25,6 @@ - - // RISCV Bit-Manipulation Extension Architecture Description File - --instruct rorI_imm_rvb(iRegINoSp dst, iRegI src, immI shift) %{ -- predicate(UseRVB); -- match(Set dst (RotateRight src shift)); -- -- format %{ "roriw $dst, $src, ($shift & 0x1f)\t#@rorI_imm_rvb" %} -- -- ins_cost(ALU_COST); -- ins_encode %{ -- __ roriw(as_Register($dst$$reg), as_Register($src$$reg), $shift$$constant & 0x1f); -- %} -- -- ins_pipe(ialu_reg_shift); --%} -- --instruct rorL_imm_rvb(iRegLNoSp dst, iRegL src, immI shift) %{ -- predicate(UseRVB); -- match(Set dst (RotateRight src shift)); -- -- format %{ "rori $dst, $src, ($shift & 0x3f)\t#@rorL_imm_rvb" %} -- -- ins_cost(ALU_COST); -- ins_encode %{ -- __ rori(as_Register($dst$$reg), as_Register($src$$reg), $shift$$constant & 0x3f); -- %} -- -- ins_pipe(ialu_reg_shift); --%} -- --instruct rorI_reg_rvb(iRegINoSp dst, iRegI src, iRegI shift) %{ -- predicate(UseRVB); -- match(Set dst (RotateRight src shift)); -- -- format %{ "rorw $dst, $src, $shift\t#@rorI_reg_rvb" %} -- ins_cost(ALU_COST); -- ins_encode %{ -- __ rorw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); -- %} -- ins_pipe(ialu_reg_reg); --%} -- --instruct rorL_reg_rvb(iRegLNoSp dst, iRegL src, iRegI shift) %{ -- predicate(UseRVB); -- match(Set dst (RotateRight src shift)); -- -- format %{ "ror $dst, $src, $shift\t#@rorL_reg_rvb" %} -- ins_cost(ALU_COST); -- ins_encode %{ -- __ ror(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); -- %} -- ins_pipe(ialu_reg_reg); --%} -- --instruct rolI_reg_rvb(iRegINoSp dst, iRegI src, iRegI shift) %{ -- predicate(UseRVB); -- match(Set dst (RotateLeft src shift)); -- -- format %{ "rolw $dst, $src, $shift\t#@rolI_reg_rvb" %} -- ins_cost(ALU_COST); -- ins_encode %{ -- __ rolw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); -- %} -- ins_pipe(ialu_reg_reg); --%} -- --instruct rolL_reg_rvb(iRegLNoSp dst, iRegL src, iRegI shift) %{ -- predicate(UseRVB); -- match(Set dst (RotateLeft src shift)); -- -- format %{ "rol $dst, $src, $shift\t#@rolL_reg_rvb" %} -- ins_cost(ALU_COST); -- ins_encode %{ -- __ rol(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); -- %} -- ins_pipe(ialu_reg_reg); --%} -- - // Convert oop into int for vectors alignment masking - instruct convP2I_rvb(iRegINoSp dst, iRegP src) %{ - predicate(UseRVB); - -From 21577388eda0218eeb4b28bc71ecf5737d40639e Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 19:49:28 +0800 -Subject: [PATCH 105/140] Revert JDK-8230565: ZGC: Redesign C2 load barrier to - expand on the MachNode level - ---- - src/hotspot/cpu/riscv/riscv.ad | 14 ++++---------- - 1 file changed, 4 insertions(+), 10 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index 1e6495692da..533eaf843e3 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -4324,7 +4324,6 @@ instruct loadRange(iRegINoSp dst, memory mem) - instruct loadP(iRegPNoSp dst, memory mem) - %{ - match(Set dst (LoadP mem)); -- predicate(n->as_Load()->barrier_data() == 0); - - ins_cost(LOAD_COST); - format %{ "ld $dst, $mem\t# ptr, #@loadP" %} -@@ -5060,8 +5059,6 @@ instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoS - - instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) - %{ -- predicate(n->as_LoadStore()->barrier_data() == 0); -- - match(Set res (CompareAndSwapP mem (Binary oldval newval))); - - ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); -@@ -5181,7 +5178,7 @@ instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegL - - instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) - %{ -- predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0)); -+ predicate(needs_acquiring_load_reserved(n)); - - match(Set res (CompareAndSwapP mem (Binary oldval newval))); - -@@ -5327,7 +5324,6 @@ instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN ne - - instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) - %{ -- predicate(n->as_LoadStore()->barrier_data() == 0); - match(Set res (CompareAndExchangeP mem (Binary oldval newval))); - - ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); -@@ -5462,7 +5458,7 @@ instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN - - instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) - %{ -- predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0)); -+ predicate(needs_acquiring_load_reserved(n)); - - match(Set res (CompareAndExchangeP mem (Binary oldval newval))); - -@@ -5592,7 +5588,6 @@ instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN ne - - instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) - %{ -- predicate(n->as_LoadStore()->barrier_data() == 0); - match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); - - ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); -@@ -5731,7 +5726,7 @@ instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN - - instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) - %{ -- predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0)); -+ predicate(needs_acquiring_load_reserved(n)); - - match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); - -@@ -5798,7 +5793,6 @@ instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) - - instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) - %{ -- predicate(n->as_LoadStore()->barrier_data() == 0); - match(Set prev (GetAndSetP mem newv)); - - ins_cost(ALU_COST); -@@ -5865,7 +5859,7 @@ instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) - - instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) - %{ -- predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0)); -+ predicate(needs_acquiring_load_reserved(n)); - - match(Set prev (GetAndSetP mem newv)); - - -From 4673921af60f4779d4322256f92bb60a850cb035 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 19:51:09 +0800 -Subject: [PATCH 106/140] Revert JDK-8252990: Intrinsify Unsafe.storeStoreFence - ---- - src/hotspot/cpu/riscv/riscv.ad | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index 533eaf843e3..5fa3b85c001 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -7537,7 +7537,6 @@ instruct membar_release() %{ - - instruct membar_storestore() %{ - match(MemBarStoreStore); -- match(StoreStoreFence); - ins_cost(ALU_COST); - - format %{ "MEMBAR-store-store\t#@membar_storestore" %} - -From e254a03e87ffc6d8f563dbd7db1b607a95657263 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 19:54:02 +0800 -Subject: [PATCH 107/140] Revert JDK-8255150: Add utility methods to check long - indexes and ranges && JDK-8252372: Check if cloning is required to move loads - out of loops in PhaseIdealLoop::split_if_with_blocks_post() - ---- - src/hotspot/cpu/riscv/riscv.ad | 33 --------------------------------- - 1 file changed, 33 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index 5fa3b85c001..388e65f623d 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -7621,17 +7621,6 @@ instruct castPP(iRegPNoSp dst) - ins_pipe(pipe_class_empty); - %} - --instruct castLL(iRegL dst) --%{ -- match(Set dst (CastLL dst)); -- -- size(0); -- format %{ "# castLL of $dst, #@castLL" %} -- ins_encode(/* empty encoding */); -- ins_cost(0); -- ins_pipe(pipe_class_empty); --%} -- - instruct castII(iRegI dst) - %{ - match(Set dst (CastII dst)); -@@ -7654,28 +7643,6 @@ instruct checkCastPP(iRegPNoSp dst) - ins_pipe(pipe_class_empty); - %} - --instruct castFF(fRegF dst) --%{ -- match(Set dst (CastFF dst)); -- -- size(0); -- format %{ "# castFF of $dst" %} -- ins_encode(/* empty encoding */); -- ins_cost(0); -- ins_pipe(pipe_class_empty); --%} -- --instruct castDD(fRegD dst) --%{ -- match(Set dst (CastDD dst)); -- -- size(0); -- format %{ "# castDD of $dst" %} -- ins_encode(/* empty encoding */); -- ins_cost(0); -- ins_pipe(pipe_class_empty); --%} -- - // ============================================================================ - // Convert Instructions - - -From 2c1820363992d09ef0cd2ed2553c04e0f7afd91f Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 20:02:14 +0800 -Subject: [PATCH 108/140] Revert reset_label part of JDK-8248411: [aarch64] - Insufficient error handling when CodeBuffer is exhausted - ---- - src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 2 +- - src/hotspot/cpu/riscv/macroAssembler_riscv.hpp | 14 +++++--------- - 2 files changed, 6 insertions(+), 10 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -index 656334f326b..37ccf132986 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -@@ -3784,7 +3784,7 @@ address MacroAssembler::zero_words(Register ptr, Register cnt) - if (StubRoutines::riscv::complete()) { - address tpc = trampoline_call(zero_blocks); - if (tpc == NULL) { -- DEBUG_ONLY(reset_labels(around)); -+ DEBUG_ONLY(reset_labels1(around)); - postcond(pc() == badAddress); - return NULL; - } -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -index 953bca3cbd8..45ffc663963 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -@@ -815,17 +815,13 @@ class MacroAssembler: public Assembler { - private: - - #ifdef ASSERT -- // Template short-hand support to clean-up after a failed call to trampoline -+ // Macro short-hand support to clean-up after a failed call to trampoline - // call generation (see trampoline_call() below), when a set of Labels must - // be reset (before returning). -- template -- void reset_labels(Label& lbl, More&... more) { -- lbl.reset(); reset_labels(more...); -- } -- template -- void reset_labels(Label& lbl) { -- lbl.reset(); -- } -+#define reset_labels1(L1) L1.reset() -+#define reset_labels2(L1, L2) L1.reset(); L2.reset() -+#define reset_labels3(L1, L2, L3) L1.reset(); reset_labels2(L2, L3) -+#define reset_labels5(L1, L2, L3, L4, L5) reset_labels2(L1, L2); reset_labels3(L3, L4, L5) - #endif - void repne_scan(Register addr, Register value, Register count, Register tmp); - - -From 014972a0778b8c5568fae9e92d286b634cb44674 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 20:30:49 +0800 -Subject: [PATCH 109/140] Revert JDK-8242289: C2: Support platform-specific - node cloning in Matcher - ---- - src/hotspot/cpu/riscv/riscv.ad | 12 +----------- - 1 file changed, 1 insertion(+), 11 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index 388e65f623d..7cd6c2995ba 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -1765,20 +1765,10 @@ bool size_fits_all_mem_uses(AddPNode* addp, int shift) { - - const bool Matcher::convi2l_type_required = false; - --// Should the Matcher clone input 'm' of node 'n'? --bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { -- assert_cond(m != NULL); -- if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) -- mstack.push(m, Visit); // m = ShiftCntV -- return true; -- } -- return false; --} -- - // Should the Matcher clone shifts on addressing modes, expecting them - // to be subsumed into complex addressing expressions or compute them - // into registers? --bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { -+bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { - return clone_base_plus_offset_address(m, mstack, address_visited); - } ++ ++ public Address getInterpreterFrameSenderSP() { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(isInterpretedFrame(), "interpreted frame expected"); ++ } ++ return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); ++ } ++ ++ // Monitors ++ public BasicObjectLock interpreterFrameMonitorBegin() { ++ return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET)); ++ } ++ ++ public BasicObjectLock interpreterFrameMonitorEnd() { ++ Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0); ++ if (Assert.ASSERTS_ENABLED) { ++ // make sure the pointer points inside the frame ++ Assert.that(AddressOps.gt(getFP(), result), "result must < than frame pointer"); ++ Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer"); ++ } ++ return new BasicObjectLock(result); ++ } ++ ++ public int interpreterFrameMonitorSize() { ++ return BasicObjectLock.size(); ++ } ++ ++ // Method ++ public Address addressOfInterpreterFrameMethod() { ++ return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET); ++ } ++ ++ // Constant pool cache ++ public Address addressOfInterpreterFrameCPCache() { ++ return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET); ++ } ++ ++ // Entry frames ++ public JavaCallWrapper getEntryFrameCallWrapper() { ++ return new RISCV64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0)); ++ } ++ ++ protected Address addressOfSavedOopResult() { ++ // offset is 2 for compiler2 and 3 for compiler1 ++ return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) * ++ VM.getVM().getAddressSize()); ++ } ++ ++ protected Address addressOfSavedReceiver() { ++ return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); ++ } ++ ++ private void dumpStack() { ++ for (Address addr = getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); ++ AddressOps.lt(addr, getSP()); ++ addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { ++ System.out.println(addr + ": " + addr.getAddressAt(0)); ++ } ++ System.out.println("-----------------------"); ++ for (Address addr = getSP(); ++ AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize())); ++ addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { ++ System.out.println(addr + ": " + addr.getAddressAt(0)); ++ } ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java +new file mode 100644 +index 0000000000..850758a7ed +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java +@@ -0,0 +1,59 @@ ++/* ++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.riscv64; ++ ++import java.util.*; ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.runtime.*; ++import sun.jvm.hotspot.utilities.*; ++ ++public class RISCV64JavaCallWrapper extends JavaCallWrapper { ++ private static AddressField lastJavaFPField; ++ ++ static { ++ VM.registerVMInitializedObserver(new Observer() { ++ public void update(Observable o, Object data) { ++ initialize(VM.getVM().getTypeDataBase()); ++ } ++ }); ++ } ++ ++ private static synchronized void initialize(TypeDataBase db) { ++ Type type = db.lookupType("JavaFrameAnchor"); ++ ++ lastJavaFPField = type.getAddressField("_last_Java_fp"); ++ } ++ ++ public RISCV64JavaCallWrapper(Address addr) { ++ super(addr); ++ } ++ ++ public Address getLastJavaFP() { ++ return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset())); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java +new file mode 100644 +index 0000000000..4aeb1c6f55 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java +@@ -0,0 +1,53 @@ ++/* ++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.riscv64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.runtime.*; ++ ++public class RISCV64RegisterMap extends RegisterMap { ++ ++ /** This is the only public constructor */ ++ public RISCV64RegisterMap(JavaThread thread, boolean updateMap) { ++ super(thread, updateMap); ++ } ++ ++ protected RISCV64RegisterMap(RegisterMap map) { ++ super(map); ++ } ++ ++ public Object clone() { ++ RISCV64RegisterMap retval = new RISCV64RegisterMap(this); ++ return retval; ++ } ++ ++ // no PD state to clear or copy: ++ protected void clearPD() {} ++ protected void initializePD() {} ++ protected void initializeFromPD(RegisterMap map) {} ++ protected Address getLocationPD(VMReg reg) { return null; } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java +index 7d7a6107ca..6552ce255f 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -54,7 +54,7 @@ public class PlatformInfo { - -From d15e155e9b84f4789cfbb1cf75382be859b0a8ca Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 20:40:00 +0800 -Subject: [PATCH 110/140] Revert JDK-8255782: Turn UseTLAB and ResizeTLAB from - product_pd to product, defaulting to "true" - ---- - src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 2 ++ - src/hotspot/cpu/riscv/c2_globals_riscv.hpp | 2 ++ - 2 files changed, 4 insertions(+) - -diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp -index 8f2f4e0e81d..25e00bea901 100644 ---- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp -+++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp -@@ -34,6 +34,8 @@ + public static boolean knownCPU(String cpu) { + final String[] KNOWN = +- new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64"}; ++ new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64", "riscv64"}; - #ifndef TIERED - define_pd_global(bool, BackgroundCompilation, true ); -+define_pd_global(bool, UseTLAB, true ); -+define_pd_global(bool, ResizeTLAB, true ); - define_pd_global(bool, InlineIntrinsics, true ); - define_pd_global(bool, PreferInterpreterNativeStubs, false); - define_pd_global(bool, ProfileTraps, false); -diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp -index 33d78fb2f6f..3da1f1c6d86 100644 ---- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp -+++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp -@@ -33,6 +33,8 @@ - // (see c2_globals.hpp). Alpha-sorted. + for(String s : KNOWN) { + if(s.equals(cpu)) +diff --git a/test/hotspot/jtreg/compiler/c2/TestBit.java b/test/hotspot/jtreg/compiler/c2/TestBit.java +index 7805918c28..823b9f39db 100644 +--- a/test/hotspot/jtreg/compiler/c2/TestBit.java ++++ b/test/hotspot/jtreg/compiler/c2/TestBit.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -34,7 +34,7 @@ import jdk.test.lib.process.ProcessTools; + * + * @run driver compiler.c2.TestBit + * +- * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le" ++ * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le" | os.arch == "riscv64" + * @requires vm.debug == true & vm.compiler2.enabled + */ + public class TestBit { +@@ -54,7 +54,8 @@ public class TestBit { + String expectedTestBitInstruction = + "ppc64le".equals(System.getProperty("os.arch")) ? "ANDI" : + "aarch64".equals(System.getProperty("os.arch")) ? "tb" : +- "amd64".equals(System.getProperty("os.arch")) ? "test" : null; ++ "amd64".equals(System.getProperty("os.arch")) ? "test" : ++ "riscv64".equals(System.getProperty("os.arch")) ? "andi" : null; - define_pd_global(bool, BackgroundCompilation, true); -+define_pd_global(bool, UseTLAB, true); -+define_pd_global(bool, ResizeTLAB, true); - define_pd_global(bool, CICompileOSR, true); - define_pd_global(bool, InlineIntrinsics, true); - define_pd_global(bool, PreferInterpreterNativeStubs, false); - -From f3fa0cfa987743b4ee83332ddf71add421561908 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 20:49:57 +0800 -Subject: [PATCH 111/140] Revert JDK-8265245: depChecker_ don't have any - functionalities - ---- - src/hotspot/cpu/riscv/depChecker_riscv.hpp | 32 ++++++++++++++++++++++ - 1 file changed, 32 insertions(+) - create mode 100644 src/hotspot/cpu/riscv/depChecker_riscv.hpp - -diff --git a/src/hotspot/cpu/riscv/depChecker_riscv.hpp b/src/hotspot/cpu/riscv/depChecker_riscv.hpp + if (expectedTestBitInstruction != null) { + output.shouldContain(expectedTestBitInstruction); +diff --git a/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java new file mode 100644 -index 00000000000..e9ff307b647 +index 0000000000..5a1b659bbe --- /dev/null -+++ b/src/hotspot/cpu/riscv/depChecker_riscv.hpp -@@ -0,0 +1,32 @@ ++++ b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java +@@ -0,0 +1,80 @@ +/* -+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Alibaba Group Holding Limited. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -80560,1450 +56639,228 @@ index 00000000000..e9ff307b647 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. ++ */ ++ ++/* ++ * @test ++ * @summary Test libm intrinsics ++ * @library /test/lib / + * ++ * @build sun.hotspot.WhiteBox ++ * @run driver ClassFileInstaller sun.hotspot.WhiteBox ++ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI ++ * -XX:-BackgroundCompilation -XX:-UseOnStackReplacement ++ * compiler.floatingpoint.TestLibmIntrinsics + */ + -+#ifndef CPU_RISCV_VM_DEPCHECKER_RISCV_HPP -+#define CPU_RISCV_VM_DEPCHECKER_RISCV_HPP ++package compiler.floatingpoint; + -+// Nothing to do on riscv ++import compiler.whitebox.CompilerWhiteBoxTest; ++import sun.hotspot.WhiteBox; + -+#endif // CPU_RISCV_VM_DEPCHECKER_RISCV_HPP - -From 97a3d4d3b98a450aa316eaa94103cf8473d12d50 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 20:58:34 +0800 -Subject: [PATCH 112/140] Revert JDK-8241438: Move IntelJccErratum mitigation - code to platform-specific code - ---- - src/hotspot/cpu/riscv/riscv.ad | 18 ------------------ - 1 file changed, 18 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index 7cd6c2995ba..fc6823daf8b 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -740,13 +740,6 @@ class HandlerImpl { - } - }; - --class Node::PD { --public: -- enum NodeFlags { -- _last_flag = Node::_last_flag -- }; --}; -- - bool is_CAS(int opcode, bool maybe_volatile); - - // predicate controlling translation of CompareAndSwapX -@@ -805,17 +798,6 @@ void reg_mask_init() { - } - } - --void PhaseOutput::pd_perform_mach_node_analysis() { --} -- --int MachNode::pd_alignment_required() const { -- return 1; --} -- --int MachNode::compute_padding(int current_offset) const { -- return 0; --} -- - // is_CAS(int opcode, bool maybe_volatile) - // - // return true if opcode is one of the possible CompareAndSwapX - -From 8a3e7b81b79918a4f2feb4d9226ab8be6c43c28a Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 21:03:47 +0800 -Subject: [PATCH 113/140] Revert JDK-8260355: AArch64: deoptimization stub - should save vector registers - ---- - src/hotspot/cpu/riscv/registerMap_riscv.cpp | 45 --------------------- - src/hotspot/cpu/riscv/registerMap_riscv.hpp | 1 - - 2 files changed, 46 deletions(-) - delete mode 100644 src/hotspot/cpu/riscv/registerMap_riscv.cpp - -diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.cpp b/src/hotspot/cpu/riscv/registerMap_riscv.cpp -deleted file mode 100644 -index 26c1edc36ff..00000000000 ---- a/src/hotspot/cpu/riscv/registerMap_riscv.cpp -+++ /dev/null -@@ -1,45 +0,0 @@ --/* -- * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. -- * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. -- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -- * -- * This code is free software; you can redistribute it and/or modify it -- * under the terms of the GNU General Public License version 2 only, as -- * published by the Free Software Foundation. -- * -- * This code is distributed in the hope that it will be useful, but WITHOUT -- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- * version 2 for more details (a copy is included in the LICENSE file that -- * accompanied this code). -- * -- * You should have received a copy of the GNU General Public License version -- * 2 along with this work; if not, write to the Free Software Foundation, -- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -- * -- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -- * or visit www.oracle.com if you need additional information or have any -- * questions. -- * -- */ -- --#include "precompiled.hpp" --#include "runtime/registerMap.hpp" --#include "vmreg_riscv.inline.hpp" -- --address RegisterMap::pd_location(VMReg base_reg, int slot_idx) const { -- if (base_reg->is_VectorRegister()) { -- assert(base_reg->is_concrete(), "must pass base reg"); -- int base_reg_enc = (base_reg->value() - ConcreteRegisterImpl::max_fpr) / -- VectorRegisterImpl::max_slots_per_register; -- intptr_t offset_in_bytes = slot_idx * VMRegImpl::stack_slot_size; -- address base_location = location(base_reg); -- if (base_location != NULL) { -- return base_location + offset_in_bytes; -- } else { -- return NULL; -- } -- } else { -- return location(base_reg->next(slot_idx)); -- } --} -diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.hpp b/src/hotspot/cpu/riscv/registerMap_riscv.hpp -index f34349811a9..fef8ca9b64e 100644 ---- a/src/hotspot/cpu/riscv/registerMap_riscv.hpp -+++ b/src/hotspot/cpu/riscv/registerMap_riscv.hpp -@@ -33,7 +33,6 @@ - // This is the hook for finding a register in an "well-known" location, - // such as a register block of a predetermined format. - address pd_location(VMReg reg) const { return NULL; } -- address pd_location(VMReg base_reg, int slot_idx) const; - - // no PD state to clear or copy: - void pd_clear() {} - -From 5fc20f93a312f9189b55c5236c15a55b3da10cf9 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 21:05:37 +0800 -Subject: [PATCH 114/140] Revert JDK-8250914: Matcher::stack_direction() is - unused - ---- - src/hotspot/cpu/riscv/riscv.ad | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index fc6823daf8b..c21508b6e7c 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -2326,6 +2326,9 @@ encode %{ - // SP meets the minimum alignment. - - frame %{ -+ // What direction does stack grow in (assumed to be same for C & Java) -+ stack_direction(TOWARDS_LOW); ++import java.lang.reflect.Method; + - // These three registers define part of the calling convention - // between compiled code and the interpreter. - - -From aab3322fd2507a3aeae39c69ba871400dd342834 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 21:15:45 +0800 -Subject: [PATCH 115/140] Revert CacheWB*Node matching rules - ---- - src/hotspot/cpu/riscv/riscv.ad | 8 -------- - 1 file changed, 8 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index c21508b6e7c..e410bd06aa6 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -1475,14 +1475,6 @@ const bool Matcher::match_rule_supported(int opcode) { - } - - switch (opcode) { -- case Op_CacheWB: // fall through -- case Op_CacheWBPreSync: // fall through -- case Op_CacheWBPostSync: -- if (!VM_Version::supports_data_cache_line_flush()) { -- return false; -- } -- break; -- - case Op_PopCountI: - case Op_PopCountL: - return UsePopCountInstruction; - -From 705981aaff19b442b55df8a038aab9c61133bc3a Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 21:21:10 +0800 -Subject: [PATCH 116/140] Revert JDK-8263595: Remove oop type punning in - JavaCallArguments - ---- - src/hotspot/cpu/riscv/jniTypes_riscv.hpp | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp -index bc4e5758256..df3c0267eea 100644 ---- a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp -+++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp -@@ -65,8 +65,9 @@ class JNITypes : private AllStatic { - } - - // Oops are stored in native format in one JavaCallArgument slot at *to. -- static inline void put_obj(const Handle& from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle.raw_value(); } -- static inline void put_obj(jobject from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle; } -+ static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; } -+ static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; } -+ static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; } - - // Floats are stored in native format in one JavaCallArgument slot at *to. - static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } - -From bba22725b9f1386d8899941ccee3e8dc7f9a4a6f Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 21:33:01 +0800 -Subject: [PATCH 117/140] Revert JDK-8260012: Reduce inclusion of - collectedHeap.hpp and heapInspection.hpp - ---- - src/hotspot/cpu/riscv/frame_riscv.cpp | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp -index 40ec584b994..d4fcbdcbbde 100644 ---- a/src/hotspot/cpu/riscv/frame_riscv.cpp -+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp -@@ -598,7 +598,7 @@ BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) - oop* obj_p = (oop*)tos_addr; - obj = (obj_p == NULL) ? (oop)NULL : *obj_p; - } -- assert(Universe::is_in_heap_or_null(obj), "sanity check"); -+ assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check"); - *oop_result = obj; - break; - } - -From 49000a43408aba29d3dc9ee4e03219e6f85be602 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 21:35:21 +0800 -Subject: [PATCH 118/140] Revert JDK-8271869: AArch64: build errors with GCC11 - in frame::saved_oop_result - ---- - src/hotspot/cpu/riscv/frame_riscv.inline.hpp | 3 --- - 1 file changed, 3 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp -index 5ac1bf57f57..abd5bda7e49 100644 ---- a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp -+++ b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp -@@ -230,8 +230,6 @@ inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { - - - // Compiled frames --PRAGMA_DIAG_PUSH --PRAGMA_NONNULL_IGNORED - inline oop frame::saved_oop_result(RegisterMap* map) const { - oop* result_adr = (oop *)map->location(x10->as_VMReg()); - guarantee(result_adr != NULL, "bad register save location"); -@@ -243,6 +241,5 @@ inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { - guarantee(result_adr != NULL, "bad register save location"); - *result_adr = obj; - } --PRAGMA_DIAG_POP - - #endif // CPU_RISCV_FRAME_RISCV_INLINE_HPP - -From 14a46a85e65f6fec09ac566d49a6232216881adb Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 21:40:43 +0800 -Subject: [PATCH 119/140] Revert JDK-8230392: Define AArch64 as - MULTI_COPY_ATOMIC - ---- - src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp | 4 ---- - 1 file changed, 4 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp -index ffd420da024..606f0fa0da3 100644 ---- a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp -+++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp -@@ -33,10 +33,6 @@ const int StackAlignmentInBytes = 16; - // 32-bit integer argument values are extended to 64 bits. - const bool CCallingConventionRequiresIntsAsLongs = false; - --// RISCV has adopted a multicopy atomic model closely following --// that of ARMv8. --#define CPU_MULTI_COPY_ATOMIC -- - // To be safe, we deoptimize when we come across an access that needs - // patching. This is similar to what is done on aarch64. - #define DEOPTIMIZE_WHEN_PATCHING - -From 8740928267a831c62f1deb20c910e3c27716bc40 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 21:42:20 +0800 -Subject: [PATCH 120/140] Revert: JDK-8246689: Enable independent compressed - oops/class ptrs on Aarch64 JDK-8241825: Make compressed oops and compressed - class pointers independent (x86_64, PPC, S390) - ---- - src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp | 2 -- - 1 file changed, 2 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp -index 606f0fa0da3..acdf75d324e 100644 ---- a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp -+++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp -@@ -41,6 +41,4 @@ const bool CCallingConventionRequiresIntsAsLongs = false; - - #define SUPPORT_RESERVED_STACK_AREA - --#define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS false -- - #endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP - -From 94b40f4efccc19c8ac66eda6c57381a222b02d2d Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 21:50:49 +0800 -Subject: [PATCH 121/140] Revert JDK-8222637: Obsolete NeedsDeoptSuspend - ---- - src/hotspot/cpu/riscv/globals_riscv.hpp | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp -index b78f258a764..a838a377829 100644 ---- a/src/hotspot/cpu/riscv/globals_riscv.hpp -+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp -@@ -32,6 +32,8 @@ - // Sets the default values for platform dependent flags used by the runtime system. - // (see globals.hpp) - -+define_pd_global(bool, NeedsDeoptSuspend, false); // only register window machines need this ++public class TestLibmIntrinsics { + - define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks - define_pd_global(bool, TrapBasedNullChecks, false); - define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs past to check cast - -From 09968c9fc102fd32bc628d3e6fd9d9adcbec4373 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 21:52:44 +0800 -Subject: [PATCH 122/140] Revert JDK-8220051: Remove global safepoint code - ---- - src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp -index acdf75d324e..d6ce8da07b8 100644 ---- a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp -+++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp -@@ -41,4 +41,6 @@ const bool CCallingConventionRequiresIntsAsLongs = false; - - #define SUPPORT_RESERVED_STACK_AREA - -+#define THREAD_LOCAL_POLL ++ private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox(); + - #endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP - -From 2f4fb2b5ac420d456421592dc09b81244636ba4d Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 22:00:52 +0800 -Subject: [PATCH 123/140] Revert JDK-8272873: C2: Inlining should not depend on - absolute call site counts - ---- - src/hotspot/cpu/riscv/globals_riscv.hpp | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp -index a838a377829..b4f71c45ec1 100644 ---- a/src/hotspot/cpu/riscv/globals_riscv.hpp -+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp -@@ -41,6 +41,7 @@ define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs - define_pd_global(uintx, CodeCacheSegmentSize, 64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment. - define_pd_global(intx, CodeEntryAlignment, 64); - define_pd_global(intx, OptoLoopAlignment, 16); -+define_pd_global(intx, InlineFrequencyCount, 100); - - #define DEFAULT_STACK_YELLOW_PAGES (2) - #define DEFAULT_STACK_RED_PAGES (1) - -From 2df3625eea16fc0d45c0e4cf12c9433f0ec070fd Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 22:02:13 +0800 -Subject: [PATCH 124/140] Revert JDK-8220049: Obsolete ThreadLocalHandshakes - ---- - src/hotspot/cpu/riscv/globals_riscv.hpp | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp -index b4f71c45ec1..b7d85373c4a 100644 ---- a/src/hotspot/cpu/riscv/globals_riscv.hpp -+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp -@@ -76,6 +76,8 @@ define_pd_global(bool, CompactStrings, true); - // Clear short arrays bigger than one word in an arch-specific way - define_pd_global(intx, InitArrayShortSize, BytesPerLong); - -+define_pd_global(bool, ThreadLocalHandshakes, true); ++ private static final double pi = 3.1415926; + - define_pd_global(intx, InlineSmallCode, 1000); - - #define ARCH_FLAGS(develop, \ - -From a875c4caa423dd727cea1c891b17f4ded97e57d1 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sun, 30 Apr 2023 22:04:32 +0800 -Subject: [PATCH 125/140] Revert: JDK-8243208: Clean up JVMFlag implementation - JDK-8236625: Remove writeable macro from JVM flags declaration - ---- - src/hotspot/cpu/riscv/globals_riscv.hpp | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp -index b7d85373c4a..0becd9efd35 100644 ---- a/src/hotspot/cpu/riscv/globals_riscv.hpp -+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp -@@ -82,9 +82,12 @@ define_pd_global(intx, InlineSmallCode, 1000); - - #define ARCH_FLAGS(develop, \ - product, \ -+ diagnostic, \ -+ experimental, \ - notproduct, \ - range, \ -- constraint) \ -+ constraint, \ -+ writeable) \ - \ - product(bool, NearCpool, true, \ - "constant pool is close to instructions") \ - -From 19a9e6e8c3dba77cf8be0f25b1aec394aeca0b25 Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Wed, 10 May 2023 09:44:12 +0800 -Subject: [PATCH 126/140] Revert JDK-8213436: Obsolete UseMembar && - JDK-8188764: Obsolete AssumeMP and then remove all support for non-MP builds, - always enabled - ---- - src/hotspot/cpu/riscv/globals_riscv.hpp | 2 ++ - src/hotspot/cpu/riscv/vm_version_riscv.cpp | 2 ++ - 2 files changed, 4 insertions(+) - -diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp -index 0becd9efd35..e820898d87f 100644 ---- a/src/hotspot/cpu/riscv/globals_riscv.hpp -+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp -@@ -64,6 +64,8 @@ define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); - define_pd_global(bool, RewriteBytecodes, true); - define_pd_global(bool, RewriteFrequentPairs, true); - -+define_pd_global(bool, UseMembar, true); ++ private static final double expected = 2.5355263553695413; + - define_pd_global(bool, PreserveFramePointer, false); - - // GC Ergo Flags -diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp -index 50ee7edb708..f13e4269b77 100644 ---- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp -+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp -@@ -139,6 +139,8 @@ void VM_Version::initialize() { - #endif // COMPILER2 - - UNSUPPORTED_OPTION(CriticalJNINatives); ++ static double m() { ++ return Math.pow(pi, Math.sin(Math.cos(Math.tan(Math.log(Math.log10(Math.exp(pi))))))); ++ } + -+ FLAG_SET_DEFAULT(UseMembar, true); - } - - #ifdef COMPILER2 - -From 0c4a9d1b6b3b3b31a1c105ff311414ae542764bb Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Mon, 1 May 2023 16:04:15 +0800 -Subject: [PATCH 127/140] Misc adaptations to jdk11u - ---- - src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 2 +- - .../linux_riscv/vm_version_linux_riscv.cpp | 16 ++++++++-------- - 2 files changed, 9 insertions(+), 9 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp -index 25e00bea901..9316d4be02e 100644 ---- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp -+++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp -@@ -57,7 +57,7 @@ define_pd_global(uintx, CodeCacheMinBlockLength, 1); - define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); - define_pd_global(uintx, MetaspaceSize, 12*M ); - define_pd_global(bool, NeverActAsServerClassMachine, true ); --define_pd_global(uint64_t, MaxRAM, 1ULL*G); -+define_pd_global(uint64_t, MaxRAM, 1ULL*G); - define_pd_global(bool, CICompileOSR, true ); - #endif // !TIERED - define_pd_global(bool, UseTypeProfile, false); -diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp -index 4623dbfad42..60260854db6 100644 ---- a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp -+++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp -@@ -83,14 +83,14 @@ void VM_Version::get_os_cpu_info() { - - uint64_t auxv = getauxval(AT_HWCAP); - -- static_assert(CPU_I == HWCAP_ISA_I, "Flag CPU_I must follow Linux HWCAP"); -- static_assert(CPU_M == HWCAP_ISA_M, "Flag CPU_M must follow Linux HWCAP"); -- static_assert(CPU_A == HWCAP_ISA_A, "Flag CPU_A must follow Linux HWCAP"); -- static_assert(CPU_F == HWCAP_ISA_F, "Flag CPU_F must follow Linux HWCAP"); -- static_assert(CPU_D == HWCAP_ISA_D, "Flag CPU_D must follow Linux HWCAP"); -- static_assert(CPU_C == HWCAP_ISA_C, "Flag CPU_C must follow Linux HWCAP"); -- static_assert(CPU_V == HWCAP_ISA_V, "Flag CPU_V must follow Linux HWCAP"); -- static_assert(CPU_B == HWCAP_ISA_B, "Flag CPU_B must follow Linux HWCAP"); -+ STATIC_ASSERT(CPU_I == HWCAP_ISA_I); -+ STATIC_ASSERT(CPU_M == HWCAP_ISA_M); -+ STATIC_ASSERT(CPU_A == HWCAP_ISA_A); -+ STATIC_ASSERT(CPU_F == HWCAP_ISA_F); -+ STATIC_ASSERT(CPU_D == HWCAP_ISA_D); -+ STATIC_ASSERT(CPU_C == HWCAP_ISA_C); -+ STATIC_ASSERT(CPU_V == HWCAP_ISA_V); -+ STATIC_ASSERT(CPU_B == HWCAP_ISA_B); - _features = auxv & ( - HWCAP_ISA_I | - HWCAP_ISA_M | - -From 4ce5e05526029360ad15eb9639c9c05fac77ac8e Mon Sep 17 00:00:00 2001 -From: "yunyao.zxl" -Date: Sat, 20 May 2023 17:51:52 +0800 -Subject: [PATCH 128/140] Save all call-clobbered registers for spark tests may - crash - ---- - .../cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp | 10 ++-------- - 1 file changed, 2 insertions(+), 8 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp -index bc847388f68..e191cbcee2a 100644 ---- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp -@@ -157,21 +157,15 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, - __ j(done); - - __ bind(runtime); -- // save the live input values -- RegSet saved = RegSet::of(pre_val); -- if (tosca_live) { saved += RegSet::of(x10); } -- if (obj != noreg) { saved += RegSet::of(obj); } -- -- __ push_reg(saved, sp); - -+ __ push_call_clobbered_registers(); - if (expand_call) { - assert(pre_val != c_rarg1, "smashed arg"); - __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); - } else { - __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); - } -- -- __ pop_reg(saved, sp); -+ __ pop_call_clobbered_registers(); - - __ bind(done); - - -From 1b8778b0831571e9ac688bbd22afca4cf8f62407 Mon Sep 17 00:00:00 2001 -From: Kuai Wei -Date: Tue, 22 Aug 2023 16:17:31 +0800 -Subject: [PATCH 129/140] Build with gcc 13 - ---- - src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 1 + - src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp | 1 + - 2 files changed, 2 insertions(+) - -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -index 37ccf132986..fd18bb77058 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -@@ -49,6 +49,7 @@ - #include "runtime/thread.hpp" - #ifdef COMPILER2 - #include "opto/compile.hpp" -+#include "opto/intrinsicnode.hpp" - #include "opto/node.hpp" - #include "opto/output.hpp" - #endif -diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp -index 31d9254d8ad..ccceed643ed 100644 ---- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp -+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp -@@ -24,6 +24,7 @@ - */ - - #include "precompiled.hpp" -+#include "memory/metaspaceShared.hpp" - #include "runtime/frame.inline.hpp" - #include "runtime/thread.inline.hpp" - - -From 4c23be6665aec94462e82e3b4adcf7abb5b23981 Mon Sep 17 00:00:00 2001 -From: Kuai Wei -Date: Tue, 5 Sep 2023 15:37:43 +0800 -Subject: [PATCH 130/140] Fix copyright information - ---- - make/autoconf/build-aux/config.guess | 2 +- - .../MyPackage/HeapMonitorEventsForTwoThreadsTest.java | 1 + - 2 files changed, 2 insertions(+), 1 deletion(-) - -diff --git a/make/autoconf/build-aux/config.guess b/make/autoconf/build-aux/config.guess -index 15111d827ab..a88a9adec3f 100644 ---- a/make/autoconf/build-aux/config.guess -+++ b/make/autoconf/build-aux/config.guess -@@ -1,6 +1,6 @@ - #!/bin/sh - # --# Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved. -+# Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights reserved. - # Copyright (c) 2021, Azul Systems, Inc. All rights reserved. - # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - # -diff --git a/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java b/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java -index f0b7aed5ceb..54640b245f8 100644 ---- a/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java -+++ b/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java -@@ -1,4 +1,5 @@ ++ static public void main(String[] args) throws NoSuchMethodException { ++ Method test_method = compiler.floatingpoint.TestLibmIntrinsics.class.getDeclaredMethod("m"); ++ ++ double interpreter_result = m(); ++ ++ // Compile with C1 if possible ++ WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_SIMPLE); ++ ++ double c1_result = m(); ++ ++ WHITE_BOX.deoptimizeMethod(test_method); ++ ++ // Compile it with C2 if possible ++ WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION); ++ ++ double c2_result = m(); ++ ++ if (interpreter_result != c1_result || ++ interpreter_result != c2_result || ++ c1_result != c2_result) { ++ System.out.println("interpreter = " + interpreter_result + " c1 = " + c1_result + " c2 = " + c2_result); ++ throw new RuntimeException("Test Failed"); ++ } ++ } ++} +diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java +index 558b4218f0..55374b116e 100644 +--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java ++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java +@@ -1,5 +1,5 @@ /* -+ * Copyright (c) 2018, Google and/or its affiliates. All rights reserved. +- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it - -From 70a060f73c3617e58f881bcee19f1a3ce43f54ff Mon Sep 17 00:00:00 2001 -From: Chris Plummer -Date: Thu, 2 Jul 2020 13:13:10 -0700 -Subject: [PATCH 131/140] 8247533: SA stack walking sometimes fails with - sun.jvm.hotspot.debugger.DebuggerException: get_thread_regs failed for a lwp - -Reviewed-by: sspitsyn, ysuenaga, dtitov ---- - .../native/libsaproc/LinuxDebuggerLocal.c | 8 ++++++- - .../linux/native/libsaproc/ps_proc.c | 3 ++- - .../native/libsaproc/MacosxDebuggerLocal.m | 24 ++++++++++++------- - .../debugger/bsd/BsdDebuggerLocal.java | 2 +- - .../jvm/hotspot/debugger/bsd/BsdThread.java | 10 +++++--- - .../debugger/linux/LinuxDebuggerLocal.java | 2 +- - .../hotspot/debugger/linux/LinuxThread.java | 10 +++++--- - .../windbg/amd64/WindbgAMD64Thread.java | 15 ++++++++---- - .../windows/native/libsaproc/sawindbg.cpp | 14 ++++++++--- - 9 files changed, 61 insertions(+), 27 deletions(-) - -diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c -index 45a927fb5ee..6f1887f8113 100644 ---- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c -+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c -@@ -413,7 +413,13 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo - - struct ps_prochandle* ph = get_proc_handle(env, this_obj); - if (get_lwp_regs(ph, lwp_id, &gregs) != true) { -- THROW_NEW_DEBUGGER_EXCEPTION_("get_thread_regs failed for a lwp", 0); -+ // This is not considered fatal and does happen on occassion, usually with an -+ // ESRCH error. The root cause is not fully understood, but by ignoring this error -+ // and returning NULL, stacking walking code will get null registers and fallback -+ // to using the "last java frame" if setup. -+ fprintf(stdout, "WARNING: getThreadIntegerRegisterSet0: get_lwp_regs failed for lwp (%d)\n", lwp_id); -+ fflush(stdout); -+ return NULL; - } - - #undef NPRGREG -diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c -index de5254d859e..691c3f6684a 100644 ---- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c -+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c -@@ -144,7 +144,8 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use - - #ifdef PTRACE_GETREGS_REQ - if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) { -- print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp %d\n", pid); -+ print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp(%d) errno(%d) \"%s\"\n", pid, -+ errno, strerror(errno)); - return false; - } - return true; -diff --git a/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m b/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m -index 18b8b4282fe..e46370a1f18 100644 ---- a/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m -+++ b/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m -@@ -685,7 +685,7 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo - JNIEnv *env, jobject this_obj, - jlong thread_id) - { -- print_debug("getThreadRegisterSet0 called\n"); -+ print_debug("getThreadIntegerRegisterSet0 called\n"); - - struct ps_prochandle* ph = get_proc_handle(env, this_obj); - if (ph != NULL && ph->core != NULL) { -@@ -705,7 +705,13 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo - result = thread_get_state(tid, HSDB_THREAD_STATE, (thread_state_t)&state, &count); - - if (result != KERN_SUCCESS) { -- print_error("getregs: thread_get_state(%d) failed (%d)\n", tid, result); -+ // This is not considered fatal. Unlike on Linux and Windows, we haven't seen a -+ // failure to get thread registers, but if it were to fail the response should -+ // be the same. By ignoring this error and returning NULL, stacking walking code -+ // will get null registers and fallback to using the "last java frame" if setup. -+ fprintf(stdout, "WARNING: getThreadIntegerRegisterSet0: thread_get_state failed (%d) for thread (%d)\n", -+ result, tid); -+ fflush(stdout); - return NULL; - } - -@@ -808,25 +814,25 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo - */ - JNIEXPORT jint JNICALL - Java_sun_jvm_hotspot_debugger_macosx_MacOSXDebuggerLocal_translateTID0( -- JNIEnv *env, jobject this_obj, jint tid) -+ JNIEnv *env, jobject this_obj, jint tid) - { - print_debug("translateTID0 called on tid = 0x%x\n", (int)tid); - - kern_return_t result; - thread_t foreign_tid, usable_tid; - mach_msg_type_name_t type; -- -+ - foreign_tid = tid; -- -+ - task_t gTask = getTask(env, this_obj); -- result = mach_port_extract_right(gTask, foreign_tid, -- MACH_MSG_TYPE_COPY_SEND, -+ result = mach_port_extract_right(gTask, foreign_tid, -+ MACH_MSG_TYPE_COPY_SEND, - &usable_tid, &type); - if (result != KERN_SUCCESS) - return -1; -- -+ - print_debug("translateTID0: 0x%x -> 0x%x\n", foreign_tid, usable_tid); -- -+ - return (jint) usable_tid; - } +@@ -42,6 +42,7 @@ package compiler.intrinsics.sha.cli; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; ++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; + import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU; -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java -index 655b450c3fc..d0557a7d254 100644 ---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java -@@ -166,7 +166,7 @@ public WorkerThreadTask execute(WorkerThreadTask task) throws DebuggerException - } catch (InterruptedException x) {} - } - if (lastException != null) { -- throw new DebuggerException(lastException); -+ throw new DebuggerException(lastException.getMessage(), lastException); - } else { - return task; - } -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java -index 0d637f30f14..c52d3a51d54 100644 ---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java +@@ -54,6 +55,8 @@ public class TestUseSHA1IntrinsicsOptionOnUnsupportedCPU { + SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), + new GenericTestCaseForUnsupportedAArch64CPU( + SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), ++ new GenericTestCaseForUnsupportedRISCV64CPU( ++ SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), + new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU( + SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), + new GenericTestCaseForOtherCPU( +diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java +index 3ed72bf0a9..8fb82ee453 100644 +--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java ++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java @@ -1,5 +1,5 @@ /* -- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -67,8 +67,12 @@ public String toString() { - public ThreadContext getContext() throws IllegalThreadStateException { - long[] data = debugger.getThreadIntegerRegisterSet(unique_thread_id); - ThreadContext context = BsdThreadContextFactory.createThreadContext(debugger); -- for (int i = 0; i < data.length; i++) { -- context.setRegister(i, data[i]); -+ // null means we failed to get the register set for some reason. The caller -+ // is responsible for dealing with the set of null registers in that case. -+ if (data != null) { -+ for (int i = 0; i < data.length; i++) { -+ context.setRegister(i, data[i]); -+ } - } - return context; - } -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java -index cb6712b58ee..6a0648f508a 100644 ---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java -@@ -173,7 +173,7 @@ public WorkerThreadTask execute(WorkerThreadTask task) throws DebuggerException - } catch (InterruptedException x) {} - } - if (lastException != null) { -- throw new DebuggerException(lastException); -+ throw new DebuggerException(lastException.getMessage(), lastException); - } else { - return task; - } -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java -index 52307b9cdcf..3fe795d34bc 100644 ---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java +@@ -42,6 +42,7 @@ package compiler.intrinsics.sha.cli; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; ++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; + import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU; + +@@ -54,6 +55,8 @@ public class TestUseSHA256IntrinsicsOptionOnUnsupportedCPU { + SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), + new GenericTestCaseForUnsupportedAArch64CPU( + SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), ++ new GenericTestCaseForUnsupportedRISCV64CPU( ++ SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), + new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU( + SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), + new GenericTestCaseForOtherCPU( +diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java +index c05cf309da..aca32137ed 100644 +--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java ++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java @@ -1,5 +1,5 @@ /* -- * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -73,8 +73,12 @@ public String toString() { - public ThreadContext getContext() throws IllegalThreadStateException { - long[] data = debugger.getThreadIntegerRegisterSet(lwp_id); - ThreadContext context = LinuxThreadContextFactory.createThreadContext(debugger); -- for (int i = 0; i < data.length; i++) { -- context.setRegister(i, data[i]); -+ // null means we failed to get the register set for some reason. The caller -+ // is responsible for dealing with the set of null registers in that case. -+ if (data != null) { -+ for (int i = 0; i < data.length; i++) { -+ context.setRegister(i, data[i]); -+ } - } - return context; - } -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java -index ec5aea35e8c..377650a0a1c 100644 ---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java +@@ -42,6 +42,7 @@ package compiler.intrinsics.sha.cli; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; ++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; + import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU; + +@@ -54,6 +55,8 @@ public class TestUseSHA512IntrinsicsOptionOnUnsupportedCPU { + SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), + new GenericTestCaseForUnsupportedAArch64CPU( + SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), ++ new GenericTestCaseForUnsupportedRISCV64CPU( ++ SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), + new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU( + SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), + new GenericTestCaseForOtherCPU( +diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java +index 58ce5366ba..8deac4f789 100644 +--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java ++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java @@ -1,5 +1,5 @@ /* -- * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -30,9 +30,9 @@ - - class WindbgAMD64Thread implements ThreadProxy { - private WindbgDebugger debugger; -- private long sysId; -+ private long sysId; // SystemID for Windows thread, stored in OSThread::_thread_id - private boolean gotID; -- private long id; -+ private long id; // ThreadID for Windows thread, returned by GetThreadIdBySystemId - - // The address argument must be the address of the OSThread::_thread_id - WindbgAMD64Thread(WindbgDebugger debugger, Address addr) { -@@ -50,8 +50,12 @@ class WindbgAMD64Thread implements ThreadProxy { - public ThreadContext getContext() throws IllegalThreadStateException { - long[] data = debugger.getThreadIntegerRegisterSet(getThreadID()); - WindbgAMD64ThreadContext context = new WindbgAMD64ThreadContext(debugger); -- for (int i = 0; i < data.length; i++) { -- context.setRegister(i, data[i]); -+ // null means we failed to get the register set for some reason. The caller -+ // is responsible for dealing with the set of null registers in that case. -+ if (data != null) { -+ for (int i = 0; i < data.length; i++) { -+ context.setRegister(i, data[i]); -+ } - } - return context; - } -@@ -86,6 +90,7 @@ public String toString() { - private long getThreadID() { - if (!gotID) { - id = debugger.getThreadIdFromSysId(sysId); -+ gotID = true; - } - - return id; -diff --git a/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp b/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp -index 314cf69c957..e3b218b4dae 100644 ---- a/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp -+++ b/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp -@@ -45,6 +45,7 @@ - - #include - #include -+#include - - #define DEBUG_NO_IMPLEMENTATION - #include -@@ -765,9 +766,16 @@ JNIEXPORT jlong JNICALL Java_sun_jvm_hotspot_debugger_windbg_WindbgDebuggerLocal - CHECK_EXCEPTION_(0); - - ULONG id = 0; -- COM_VERIFY_OK_(ptrIDebugSystemObjects->GetThreadIdBySystemId((ULONG)sysId, &id), -- "Windbg Error: GetThreadIdBySystemId failed!", 0); -- -+ HRESULT hr = ptrIDebugSystemObjects->GetThreadIdBySystemId((ULONG)sysId, &id); -+ if (hr != S_OK) { -+ // This is not considered fatal and does happen on occassion, usually with an -+ // 0x80004002 "No such interface supported". The root cause is not fully understood, -+ // but by ignoring this error and returning NULL, stacking walking code will get -+ // null registers and fallback to using the "last java frame" if setup. -+ printf("WARNING: GetThreadIdBySystemId failed with 0x%x for sysId (%" PRIu64 ")\n", -+ hr, sysId); -+ return -1; -+ } - return (jlong) id; - } - - -From 2cadd133d25e05be6ab9b16024a37bed79af1f15 Mon Sep 17 00:00:00 2001 -From: Xiaolin Zheng -Date: Wed, 30 Mar 2022 09:04:55 +0000 -Subject: [PATCH 132/140] 8283737: riscv: MacroAssembler::stop() should emit - fixed-length instruction sequence - -Reviewed-by: fyang, shade ---- - src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -index fd18bb77058..b72a553da2f 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -@@ -542,8 +542,11 @@ void MacroAssembler::resolve_jobject(Register value, Register thread, Register t - void MacroAssembler::stop(const char* msg) { - address ip = pc(); - pusha(); -- li(c_rarg0, (uintptr_t)(address)msg); -- li(c_rarg1, (uintptr_t)(address)ip); -+ // The length of the instruction sequence emitted should be independent -+ // of the values of msg and ip so that the size of mach nodes for scratch -+ // emit and normal emit matches. -+ mv(c_rarg0, (address)msg); -+ mv(c_rarg1, (address)ip); - mv(c_rarg2, sp); - mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64)); - jalr(c_rarg3); - -From 729e0db14cb320aedf1f12051e667513bddbb8e8 Mon Sep 17 00:00:00 2001 -From: Xiaolin Zheng -Date: Sun, 24 Apr 2022 02:17:03 +0000 -Subject: [PATCH 133/140] 8285437: riscv: Fix MachNode size mismatch for - MacroAssembler::verify_oops* - -Reviewed-by: shade, fyang ---- - src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 10 ++++++++-- - 1 file changed, 8 insertions(+), 2 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -index b72a553da2f..9f80f7e2650 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -@@ -389,7 +389,10 @@ void MacroAssembler::verify_oop(Register reg, const char* s) { - push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); - - mv(c_rarg0, reg); // c_rarg0 : x10 -- li(t0, (uintptr_t)(address)b); -+ // The length of the instruction sequence emitted should be independent -+ // of the values of the local char buffer address so that the size of mach -+ // nodes for scratch emit and normal emit matches. -+ mv(t0, (address)b); - - // call indirectly to solve generation ordering problem - int32_t offset = 0; -@@ -425,7 +428,10 @@ void MacroAssembler::verify_oop_addr(Address addr, const char* s) { - ld(x10, addr); - } - -- li(t0, (uintptr_t)(address)b); -+ // The length of the instruction sequence emitted should be independent -+ // of the values of the local char buffer address so that the size of mach -+ // nodes for scratch emit and normal emit matches. -+ mv(t0, (address)b); - - // call indirectly to solve generation ordering problem - int32_t offset = 0; - -From 5cab06c6f09f4b62d54d8d291b1a23f796a085c1 Mon Sep 17 00:00:00 2001 -From: Xiaolin Zheng -Date: Mon, 30 May 2022 07:45:50 +0000 -Subject: [PATCH 134/140] 8287418: riscv: Fix correctness issue of - MacroAssembler::movptr - -Reviewed-by: fjiang, yadongwang, fyang ---- - src/hotspot/cpu/riscv/assembler_riscv.cpp | 14 +++++++------- - src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 18 +++++++++--------- - src/hotspot/cpu/riscv/macroAssembler_riscv.hpp | 3 ++- - src/hotspot/cpu/riscv/nativeInst_riscv.cpp | 2 +- - 4 files changed, 19 insertions(+), 18 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/assembler_riscv.cpp b/src/hotspot/cpu/riscv/assembler_riscv.cpp -index f15ef5304c5..a5f688cda1f 100644 ---- a/src/hotspot/cpu/riscv/assembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/assembler_riscv.cpp -@@ -282,9 +282,9 @@ void Assembler::movptr_with_offset(Register Rd, address addr, int32_t &offset) { - } - #endif - assert(is_unsigned_imm_in_range(imm64, 47, 0) || (imm64 == (uintptr_t)-1), -- "48-bit overflow in address constant"); -- // Load upper 32 bits -- int32_t imm = imm64 >> 16; -+ "bit 47 overflows in address constant"); -+ // Load upper 31 bits -+ int32_t imm = imm64 >> 17; - int64_t upper = imm, lower = imm; - lower = (lower << 52) >> 52; - upper -= lower; -@@ -292,13 +292,13 @@ void Assembler::movptr_with_offset(Register Rd, address addr, int32_t &offset) { - lui(Rd, upper); - addi(Rd, Rd, lower); - -- // Load the rest 16 bits. -+ // Load the rest 17 bits. - slli(Rd, Rd, 11); -- addi(Rd, Rd, (imm64 >> 5) & 0x7ff); -- slli(Rd, Rd, 5); -+ addi(Rd, Rd, (imm64 >> 6) & 0x7ff); -+ slli(Rd, Rd, 6); - - // This offset will be used by following jalr/ld. -- offset = imm64 & 0x1f; -+ offset = imm64 & 0x3f; - } - - void Assembler::movptr(Register Rd, uintptr_t imm64) { -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -index 9f80f7e2650..f592d7585da 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -@@ -1158,12 +1158,12 @@ static int patch_offset_in_pc_relative(address branch, int64_t offset) { - - static int patch_addr_in_movptr(address branch, address target) { - const int MOVPTR_INSTRUCTIONS_NUM = 6; // lui + addi + slli + addi + slli + addi/jalr/load -- int32_t lower = ((intptr_t)target << 36) >> 36; -- int64_t upper = ((intptr_t)target - lower) >> 28; -- Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[47:28] + target[27] ==> branch[31:12] -- Assembler::patch(branch + 4, 31, 20, (lower >> 16) & 0xfff); // Addi. target[27:16] ==> branch[31:20] -- Assembler::patch(branch + 12, 31, 20, (lower >> 5) & 0x7ff); // Addi. target[15: 5] ==> branch[31:20] -- Assembler::patch(branch + 20, 31, 20, lower & 0x1f); // Addi/Jalr/Load. target[ 4: 0] ==> branch[31:20] -+ int32_t lower = ((intptr_t)target << 35) >> 35; -+ int64_t upper = ((intptr_t)target - lower) >> 29; -+ Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[48:29] + target[28] ==> branch[31:12] -+ Assembler::patch(branch + 4, 31, 20, (lower >> 17) & 0xfff); // Addi. target[28:17] ==> branch[31:20] -+ Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff); // Addi. target[16: 6] ==> branch[31:20] -+ Assembler::patch(branch + 20, 31, 20, lower & 0x3f); // Addi/Jalr/Load. target[ 5: 0] ==> branch[31:20] - return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; - } - -@@ -1235,9 +1235,9 @@ static long get_offset_of_pc_relative(address insn_addr) { - - static address get_target_of_movptr(address insn_addr) { - assert_cond(insn_addr != NULL); -- intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 28; // Lui. -- target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 16; // Addi. -- target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 5; // Addi. -+ intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 29; // Lui. -+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 17; // Addi. -+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 6; // Addi. - target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)); // Addi/Jalr/Load. - return (address) target_address; - } -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -index 45ffc663963..792c1fc2103 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -@@ -827,7 +827,8 @@ class MacroAssembler: public Assembler { - - // Return true if an address is within the 48-bit RISCV64 address space. - bool is_valid_riscv64_address(address addr) { -- return ((uintptr_t)addr >> 48) == 0; -+ // sv48: must have bits 63–48 all equal to bit 47 -+ return ((uintptr_t)addr >> 47) == 0; - } - - void ld_constant(Register dest, const Address &const_addr) { -diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp -index bfe84fa4e30..27011ad1283 100644 ---- a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp -+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp -@@ -89,7 +89,7 @@ bool NativeInstruction::is_movptr_at(address instr) { - is_addi_at(instr + instruction_size) && // Addi - is_slli_shift_at(instr + instruction_size * 2, 11) && // Slli Rd, Rs, 11 - is_addi_at(instr + instruction_size * 3) && // Addi -- is_slli_shift_at(instr + instruction_size * 4, 5) && // Slli Rd, Rs, 5 -+ is_slli_shift_at(instr + instruction_size * 4, 6) && // Slli Rd, Rs, 6 - (is_addi_at(instr + instruction_size * 5) || - is_jalr_at(instr + instruction_size * 5) || - is_load_at(instr + instruction_size * 5)) && // Addi/Jalr/Load - -From 41d73298bf28473b3ba2483e61a39c188eddfde3 Mon Sep 17 00:00:00 2001 -From: Kuai Wei -Date: Fri, 22 Sep 2023 16:57:56 +0800 -Subject: [PATCH 135/140] Fix: Fixed-length mv() mistakenly redirected to li() - during reshaping - ---- - src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 6 ++++++ - src/hotspot/cpu/riscv/macroAssembler_riscv.hpp | 3 +-- - 2 files changed, 7 insertions(+), 2 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -index f592d7585da..f851cc1e413 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -@@ -1343,6 +1343,12 @@ void MacroAssembler::mv(Register Rd, Address dest) { - movptr(Rd, dest.target()); - } - -+void MacroAssembler::mv(Register Rd, address addr) { -+ // Here in case of use with relocation, use fix length instruction -+ // movptr instead of li -+ movptr(Rd, addr); -+} -+ - void MacroAssembler::mv(Register Rd, RegisterOrConstant src) { - if (src.is_register()) { - mv(Rd, src.as_register()); -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -index 792c1fc2103..65f91532661 100644 ---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -@@ -540,8 +540,6 @@ class MacroAssembler: public Assembler { - } - - // mv -- void mv(Register Rd, address addr) { li(Rd, (int64_t)addr); } -- - inline void mv(Register Rd, int imm64) { li(Rd, (int64_t)imm64); } - inline void mv(Register Rd, long imm64) { li(Rd, (int64_t)imm64); } - inline void mv(Register Rd, long long imm64) { li(Rd, (int64_t)imm64); } -@@ -552,6 +550,7 @@ class MacroAssembler: public Assembler { - inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); } - - void mv(Register Rd, Address dest); -+ void mv(Register Rd, address dest); - void mv(Register Rd, RegisterOrConstant src); - - // logic - -From 26f4b26a98507ec03a2329bfcbaab393247fe83f Mon Sep 17 00:00:00 2001 -From: Xiaolin Zheng -Date: Fri, 2 Sep 2022 07:01:02 +0000 -Subject: [PATCH 136/140] 8293100: RISC-V: Need to save and restore - callee-saved FloatRegisters in StubGenerator::generate_call_stub - -Reviewed-by: yadongwang, fjiang, shade, vkempik ---- - src/hotspot/cpu/riscv/frame_riscv.hpp | 2 +- - src/hotspot/cpu/riscv/riscv.ad | 18 ++--- - src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 74 +++++++++++++++++-- - src/hotspot/cpu/riscv/vmreg_riscv.cpp | 2 +- - 4 files changed, 80 insertions(+), 16 deletions(-) - -diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp -index 3b88f6d5a1a..18e021dcb94 100644 ---- a/src/hotspot/cpu/riscv/frame_riscv.hpp -+++ b/src/hotspot/cpu/riscv/frame_riscv.hpp -@@ -131,7 +131,7 @@ - // Entry frames - // n.b. these values are determined by the layout defined in - // stubGenerator for the Java call stub -- entry_frame_after_call_words = 22, -+ entry_frame_after_call_words = 34, - entry_frame_call_wrapper_offset = -10, - - // we don't need a save area -diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad -index e410bd06aa6..69696b272a5 100644 ---- a/src/hotspot/cpu/riscv/riscv.ad -+++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -8601,7 +8601,7 @@ instruct cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl) - effect(USE lbl); - - ins_cost(XFER_COST + BRANCH_COST); -- format %{ "float_b$cmp $op1, $op2 \t#@cmpF_branch"%} -+ format %{ "float_b$cmp $op1, $op2, $lbl \t#@cmpF_branch"%} - - ins_encode %{ - __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label)); -@@ -8618,7 +8618,7 @@ instruct cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl) - effect(USE lbl); - - ins_cost(XFER_COST + BRANCH_COST); -- format %{ "float_b$cmp $op1, $op2\t#@cmpF_loop"%} -+ format %{ "float_b$cmp $op1, $op2, $lbl\t#@cmpF_loop"%} - - ins_encode %{ - __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label)); -@@ -8636,7 +8636,7 @@ instruct cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl) - effect(USE lbl); - - ins_cost(XFER_COST + BRANCH_COST); -- format %{ "double_b$cmp $op1, $op2\t#@cmpD_branch"%} -+ format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_branch"%} - - ins_encode %{ - __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), -@@ -8654,7 +8654,7 @@ instruct cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl) - effect(USE lbl); - - ins_cost(XFER_COST + BRANCH_COST); -- format %{ "double_b$cmp $op1, $op2\t#@cmpD_loop"%} -+ format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_loop"%} - - ins_encode %{ - __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), -@@ -8929,7 +8929,7 @@ instruct far_cmpFlag_branch(cmpOp cmp, rFlagsReg cr, label lbl) %{ - effect(USE lbl); - - ins_cost(BRANCH_COST); -- format %{ "far_b$cmp $cr, zr, L\t#@far_cmpFlag_branch"%} -+ format %{ "far_b$cmp $cr, zr, $lbl\t#@far_cmpFlag_branch"%} - - ins_encode %{ - __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label), /* is_far */ true); -@@ -9138,7 +9138,7 @@ instruct far_cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl) - effect(USE lbl); - - ins_cost(XFER_COST + BRANCH_COST * 2); -- format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_branch"%} -+ format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_branch"%} - - ins_encode %{ - __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), -@@ -9154,7 +9154,7 @@ instruct far_cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl) - effect(USE lbl); - - ins_cost(XFER_COST + BRANCH_COST * 2); -- format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_loop"%} -+ format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_loop"%} - - ins_encode %{ - __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), -@@ -9171,7 +9171,7 @@ instruct far_cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl) - effect(USE lbl); - - ins_cost(XFER_COST + BRANCH_COST * 2); -- format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_branch"%} -+ format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_branch"%} - - ins_encode %{ - __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), -@@ -9187,7 +9187,7 @@ instruct far_cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl) - effect(USE lbl); - - ins_cost(XFER_COST + BRANCH_COST * 2); -- format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_loop"%} -+ format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_loop"%} - - ins_encode %{ - __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), -diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp -index 74c38c3d044..9970229c5c5 100644 ---- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp -+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp -@@ -118,16 +118,28 @@ class StubGenerator: public StubCodeGenerator { - // we don't need to save x6-x7 and x28-x31 which both C and Java treat as - // volatile - // -- // we save x18-x27 which Java uses as temporary registers and C -- // expects to be callee-save -+ // we save x9, x18-x27, f8-f9, and f18-f27 which Java uses as temporary -+ // registers and C expects to be callee-save - // - // so the stub frame looks like this when we enter Java code - // - // [ return_from_Java ] <--- sp - // [ argument word n ] - // ... -- // -22 [ argument word 1 ] -- // -21 [ saved x27 ] <--- sp_after_call -+ // -34 [ argument word 1 ] -+ // -33 [ saved f27 ] <--- sp_after_call -+ // -32 [ saved f26 ] -+ // -31 [ saved f25 ] -+ // -30 [ saved f24 ] -+ // -29 [ saved f23 ] -+ // -28 [ saved f22 ] -+ // -27 [ saved f21 ] -+ // -26 [ saved f20 ] -+ // -25 [ saved f19 ] -+ // -24 [ saved f18 ] -+ // -23 [ saved f9 ] -+ // -22 [ saved f8 ] -+ // -21 [ saved x27 ] - // -20 [ saved x26 ] - // -19 [ saved x25 ] - // -18 [ saved x24 ] -@@ -152,7 +164,20 @@ class StubGenerator: public StubCodeGenerator { - - // Call stub stack layout word offsets from fp - enum call_stub_layout { -- sp_after_call_off = -21, -+ sp_after_call_off = -33, -+ -+ f27_off = -33, -+ f26_off = -32, -+ f25_off = -31, -+ f24_off = -30, -+ f23_off = -29, -+ f22_off = -28, -+ f21_off = -27, -+ f20_off = -26, -+ f19_off = -25, -+ f18_off = -24, -+ f9_off = -23, -+ f8_off = -22, - - x27_off = -21, - x26_off = -20, -@@ -198,6 +223,19 @@ class StubGenerator: public StubCodeGenerator { - - const Address thread (fp, thread_off * wordSize); - -+ const Address f27_save (fp, f27_off * wordSize); -+ const Address f26_save (fp, f26_off * wordSize); -+ const Address f25_save (fp, f25_off * wordSize); -+ const Address f24_save (fp, f24_off * wordSize); -+ const Address f23_save (fp, f23_off * wordSize); -+ const Address f22_save (fp, f22_off * wordSize); -+ const Address f21_save (fp, f21_off * wordSize); -+ const Address f20_save (fp, f20_off * wordSize); -+ const Address f19_save (fp, f19_off * wordSize); -+ const Address f18_save (fp, f18_off * wordSize); -+ const Address f9_save (fp, f9_off * wordSize); -+ const Address f8_save (fp, f8_off * wordSize); -+ - const Address x27_save (fp, x27_off * wordSize); - const Address x26_save (fp, x26_off * wordSize); - const Address x25_save (fp, x25_off * wordSize); -@@ -244,6 +282,19 @@ class StubGenerator: public StubCodeGenerator { - __ sd(x26, x26_save); - __ sd(x27, x27_save); +@@ -41,6 +41,7 @@ package compiler.intrinsics.sha.cli; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; ++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; + import compiler.intrinsics.sha.cli.testcases.UseSHASpecificTestCaseForUnsupportedCPU; -+ __ fsd(f8, f8_save); -+ __ fsd(f9, f9_save); -+ __ fsd(f18, f18_save); -+ __ fsd(f19, f19_save); -+ __ fsd(f20, f20_save); -+ __ fsd(f21, f21_save); -+ __ fsd(f22, f22_save); -+ __ fsd(f23, f23_save); -+ __ fsd(f24, f24_save); -+ __ fsd(f25, f25_save); -+ __ fsd(f26, f26_save); -+ __ fsd(f27, f27_save); -+ - // install Java thread in global register now we have saved - // whatever value it held - __ mv(xthread, c_rarg7); -@@ -335,6 +386,19 @@ class StubGenerator: public StubCodeGenerator { - #endif +@@ -53,6 +54,8 @@ public class TestUseSHAOptionOnUnsupportedCPU { + SHAOptionsBase.USE_SHA_OPTION), + new GenericTestCaseForUnsupportedAArch64CPU( + SHAOptionsBase.USE_SHA_OPTION), ++ new GenericTestCaseForUnsupportedRISCV64CPU( ++ SHAOptionsBase.USE_SHA_OPTION), + new UseSHASpecificTestCaseForUnsupportedCPU( + SHAOptionsBase.USE_SHA_OPTION), + new GenericTestCaseForOtherCPU( +diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java +index faa9fdbae6..2663500204 100644 +--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java ++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -32,26 +32,27 @@ import jdk.test.lib.cli.predicate.OrPredicate; - // restore callee-save registers -+ __ fld(f27, f27_save); -+ __ fld(f26, f26_save); -+ __ fld(f25, f25_save); -+ __ fld(f24, f24_save); -+ __ fld(f23, f23_save); -+ __ fld(f22, f22_save); -+ __ fld(f21, f21_save); -+ __ fld(f20, f20_save); -+ __ fld(f19, f19_save); -+ __ fld(f18, f18_save); -+ __ fld(f9, f9_save); -+ __ fld(f8, f8_save); -+ - __ ld(x27, x27_save); - __ ld(x26, x26_save); - __ ld(x25, x25_save); -diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp -index 5d1187c2a27..c4338715f95 100644 ---- a/src/hotspot/cpu/riscv/vmreg_riscv.cpp -+++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp -@@ -40,7 +40,7 @@ void VMRegImpl::set_regName() { - FloatRegister freg = ::as_FloatRegister(0); - for ( ; i < ConcreteRegisterImpl::max_fpr ; ) { - for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) { -- regName[i++] = reg->name(); -+ regName[i++] = freg->name(); + /** + * Generic test case for SHA-related options targeted to any CPU except +- * AArch64, PPC, S390x, SPARC and X86. ++ * AArch64, RISCV64, PPC, S390x, SPARC and X86. + */ + public class GenericTestCaseForOtherCPU extends + SHAOptionsBase.TestCase { + public GenericTestCaseForOtherCPU(String optionName) { +- // Execute the test case on any CPU except AArch64, PPC, S390x, SPARC and X86. ++ // Execute the test case on any CPU except AArch64, RISCV64, PPC, S390x, SPARC and X86. + super(optionName, new NotPredicate( + new OrPredicate(Platform::isAArch64, ++ new OrPredicate(Platform::isRISCV64, + new OrPredicate(Platform::isS390x, + new OrPredicate(Platform::isSparc, + new OrPredicate(Platform::isPPC, + new OrPredicate(Platform::isX64, +- Platform::isX86))))))); ++ Platform::isX86)))))))); } - freg = freg->successor(); - } - -From 69ea557c320ad7b2f35fc0e986af9b485f95addf Mon Sep 17 00:00:00 2001 -From: Xiaolin Zheng -Date: Fri, 28 Oct 2022 11:56:21 +0000 -Subject: [PATCH 137/140] 8295926: RISC-V: C1: Fix - LIRGenerator::do_LibmIntrinsic - -Reviewed-by: yadongwang, fyang ---- - .../cpu/riscv/c1_LIRGenerator_riscv.cpp | 21 +++-- - .../floatingpoint/TestLibmIntrinsics.java | 80 +++++++++++++++++++ - 2 files changed, 96 insertions(+), 5 deletions(-) - create mode 100644 test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java - -diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp -index f9242251491..c41819fc2ae 100644 ---- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp -+++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp -@@ -679,19 +679,30 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { - void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) { - LIRItem value(x->argument_at(0), this); - value.set_destroys_register(); -+ - LIR_Opr calc_result = rlock_result(x); - LIR_Opr result_reg = result_register_for(x->type()); -+ - CallingConvention* cc = NULL; -- BasicTypeList signature(1); -- signature.append(T_DOUBLE); -- if (x->id() == vmIntrinsics::_dpow) { signature.append(T_DOUBLE); } -- cc = frame_map()->c_calling_convention(&signature); -- value.load_item_force(cc->at(0)); -+ - if (x->id() == vmIntrinsics::_dpow) { - LIRItem value1(x->argument_at(1), this); -+ - value1.set_destroys_register(); -+ -+ BasicTypeList signature(2); -+ signature.append(T_DOUBLE); -+ signature.append(T_DOUBLE); -+ cc = frame_map()->c_calling_convention(&signature); -+ value.load_item_force(cc->at(0)); - value1.load_item_force(cc->at(1)); -+ } else { -+ BasicTypeList signature(1); -+ signature.append(T_DOUBLE); -+ cc = frame_map()->c_calling_convention(&signature); -+ value.load_item_force(cc->at(0)); - } -+ - switch (x->id()) { - case vmIntrinsics::_dexp: - if (StubRoutines::dexp() != NULL) { __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args()); } -diff --git a/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java + + @Override + protected void verifyWarnings() throws Throwable { + String shouldPassMessage = String.format("JVM should start with " + + "option '%s' without any warnings", optionName); +- // Verify that on non-x86, non-SPARC and non-AArch64 CPU usage of ++ // Verify that on non-x86, non-RISCV64, non-SPARC and non-AArch64 CPU usage of + // SHA-related options will not cause any warnings. + CommandLineOptionTest.verifySameJVMStartup(null, + new String[] { ".*" + optionName + ".*" }, shouldPassMessage, +diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java new file mode 100644 -index 00000000000..5c711efddea +index 0000000000..8566d57c39 --- /dev/null -+++ b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java -@@ -0,0 +1,80 @@ ++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java +@@ -0,0 +1,115 @@ +/* -+ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Alibaba Group Holding Limited. All rights reserved. ++ * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -82025,422 +56882,462 @@ index 00000000000..5c711efddea + * questions. + */ + -+/* -+ * @test -+ * @summary Test libm intrinsics -+ * @library /test/lib / -+ * -+ * @build jdk.test.whitebox.WhiteBox -+ * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox -+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -+ * -XX:-BackgroundCompilation -XX:-UseOnStackReplacement -+ * compiler.floatingpoint.TestLibmIntrinsics -+ */ -+ -+package compiler.floatingpoint; -+ -+import compiler.whitebox.CompilerWhiteBoxTest; -+import jdk.test.whitebox.WhiteBox; -+ -+import java.lang.reflect.Method; -+ -+public class TestLibmIntrinsics { ++package compiler.intrinsics.sha.cli.testcases; + -+ private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox(); ++import compiler.intrinsics.sha.cli.SHAOptionsBase; ++import jdk.test.lib.process.ExitCode; ++import jdk.test.lib.Platform; ++import jdk.test.lib.cli.CommandLineOptionTest; ++import jdk.test.lib.cli.predicate.AndPredicate; ++import jdk.test.lib.cli.predicate.NotPredicate; + -+ private static final double pi = 3.1415926; ++/** ++ * Generic test case for SHA-related options targeted to RISCV64 CPUs ++ * which don't support instruction required by the tested option. ++ */ ++public class GenericTestCaseForUnsupportedRISCV64CPU extends ++ SHAOptionsBase.TestCase { + -+ private static final double expected = 2.5355263553695413; ++ final private boolean checkUseSHA; + -+ static double m() { -+ return Math.pow(pi, Math.sin(Math.cos(Math.tan(Math.log(Math.log10(Math.exp(pi))))))); ++ public GenericTestCaseForUnsupportedRISCV64CPU(String optionName) { ++ this(optionName, true); + } + -+ static public void main(String[] args) throws NoSuchMethodException { -+ Method test_method = compiler.floatingpoint.TestLibmIntrinsics.class.getDeclaredMethod("m"); ++ public GenericTestCaseForUnsupportedRISCV64CPU(String optionName, boolean checkUseSHA) { ++ super(optionName, new AndPredicate(Platform::isRISCV64, ++ new NotPredicate(SHAOptionsBase.getPredicateForOption( ++ optionName)))); + -+ double interpreter_result = m(); ++ this.checkUseSHA = checkUseSHA; ++ } + -+ // Compile with C1 if possible -+ WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_SIMPLE); ++ @Override ++ protected void verifyWarnings() throws Throwable { ++ String shouldPassMessage = String.format("JVM startup should pass with" ++ + "option '-XX:-%s' without any warnings", optionName); ++ //Verify that option could be disabled without any warnings. ++ CommandLineOptionTest.verifySameJVMStartup(null, new String[] { ++ SHAOptionsBase.getWarningForUnsupportedCPU(optionName) ++ }, shouldPassMessage, shouldPassMessage, ExitCode.OK, ++ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, ++ CommandLineOptionTest.prepareBooleanFlag(optionName, false)); + -+ double c1_result = m(); ++ if (checkUseSHA) { ++ shouldPassMessage = String.format("If JVM is started with '-XX:-" ++ + "%s' '-XX:+%s', output should contain warning.", ++ SHAOptionsBase.USE_SHA_OPTION, optionName); + -+ WHITE_BOX.deoptimizeMethod(test_method); ++ // Verify that when the tested option is enabled, then ++ // a warning will occur in VM output if UseSHA is disabled. ++ if (!optionName.equals(SHAOptionsBase.USE_SHA_OPTION)) { ++ CommandLineOptionTest.verifySameJVMStartup( ++ new String[] { SHAOptionsBase.getWarningForUnsupportedCPU(optionName) }, ++ null, ++ shouldPassMessage, ++ shouldPassMessage, ++ ExitCode.OK, ++ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, ++ CommandLineOptionTest.prepareBooleanFlag(SHAOptionsBase.USE_SHA_OPTION, false), ++ CommandLineOptionTest.prepareBooleanFlag(optionName, true)); ++ } ++ } ++ } + -+ // Compile it with C2 if possible -+ WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION); ++ @Override ++ protected void verifyOptionValues() throws Throwable { ++ // Verify that option is disabled by default. ++ CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", ++ String.format("Option '%s' should be disabled by default", ++ optionName), ++ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS); + -+ double c2_result = m(); ++ if (checkUseSHA) { ++ // Verify that option is disabled even if it was explicitly enabled ++ // using CLI options. ++ CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", ++ String.format("Option '%s' should be off on unsupported " ++ + "RISCV64CPU even if set to true directly", optionName), ++ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, ++ CommandLineOptionTest.prepareBooleanFlag(optionName, true)); + -+ if (interpreter_result != c1_result || -+ interpreter_result != c2_result || -+ c1_result != c2_result) { -+ System.out.println("interpreter = " + interpreter_result + " c1 = " + c1_result + " c2 = " + c2_result); -+ throw new RuntimeException("Test Failed"); ++ // Verify that option is disabled when +UseSHA was passed to JVM. ++ CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", ++ String.format("Option '%s' should be off on unsupported " ++ + "RISCV64CPU even if %s flag set to JVM", ++ optionName, CommandLineOptionTest.prepareBooleanFlag( ++ SHAOptionsBase.USE_SHA_OPTION, true)), ++ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, ++ CommandLineOptionTest.prepareBooleanFlag( ++ SHAOptionsBase.USE_SHA_OPTION, true)); + } + } +} - -From ec57f23aa4001315a030cacd55aa5ef7c3269fbb Mon Sep 17 00:00:00 2001 -From: Kuai Wei -Date: Mon, 9 Oct 2023 11:07:34 +0800 -Subject: [PATCH 138/140] Fix test error after port 8295926 - ---- - .../jtreg/compiler/floatingpoint/TestLibmIntrinsics.java | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java -index 5c711efddea..5a1b659bbe0 100644 ---- a/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java -+++ b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java -@@ -27,8 +27,8 @@ - * @summary Test libm intrinsics - * @library /test/lib / +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java +index 2e3e2717a6..7be8af6d03 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * -- * @build jdk.test.whitebox.WhiteBox -- * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox -+ * @build sun.hotspot.WhiteBox -+ * @run driver ClassFileInstaller sun.hotspot.WhiteBox - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI - * -XX:-BackgroundCompilation -XX:-UseOnStackReplacement - * compiler.floatingpoint.TestLibmIntrinsics -@@ -37,7 +37,7 @@ - package compiler.floatingpoint; - - import compiler.whitebox.CompilerWhiteBoxTest; --import jdk.test.whitebox.WhiteBox; -+import sun.hotspot.WhiteBox; - - import java.lang.reflect.Method; - - -From b115ec4381ad3ad8cbe9ca3d225cb438538916ac Mon Sep 17 00:00:00 2001 -From: Kuai Wei -Date: Tue, 17 Oct 2023 14:22:49 +0800 -Subject: [PATCH 139/140] Revert JDK-8247533: SA stack walking sometimes fails - with sun.jvm.hotspot.debugger.DebuggerException: get_thread_regs failed for a - lwp - ---- - .../native/libsaproc/LinuxDebuggerLocal.c | 8 +------ - .../linux/native/libsaproc/ps_proc.c | 3 +-- - .../native/libsaproc/MacosxDebuggerLocal.m | 24 +++++++------------ - .../debugger/bsd/BsdDebuggerLocal.java | 2 +- - .../jvm/hotspot/debugger/bsd/BsdThread.java | 10 +++----- - .../debugger/linux/LinuxDebuggerLocal.java | 2 +- - .../hotspot/debugger/linux/LinuxThread.java | 10 +++----- - .../windbg/amd64/WindbgAMD64Thread.java | 15 ++++-------- - .../windows/native/libsaproc/sawindbg.cpp | 14 +++-------- - 9 files changed, 27 insertions(+), 61 deletions(-) - -diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c -index 6f1887f8113..45a927fb5ee 100644 ---- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c -+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c -@@ -413,13 +413,7 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo - - struct ps_prochandle* ph = get_proc_handle(env, this_obj); - if (get_lwp_regs(ph, lwp_id, &gregs) != true) { -- // This is not considered fatal and does happen on occassion, usually with an -- // ESRCH error. The root cause is not fully understood, but by ignoring this error -- // and returning NULL, stacking walking code will get null registers and fallback -- // to using the "last java frame" if setup. -- fprintf(stdout, "WARNING: getThreadIntegerRegisterSet0: get_lwp_regs failed for lwp (%d)\n", lwp_id); -- fflush(stdout); -- return NULL; -+ THROW_NEW_DEBUGGER_EXCEPTION_("get_thread_regs failed for a lwp", 0); - } - - #undef NPRGREG -diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c -index 691c3f6684a..de5254d859e 100644 ---- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c -+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c -@@ -144,8 +144,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use + * This code is free software; you can redistribute it and/or modify it +@@ -25,7 +25,7 @@ + * @test + * @bug 8074981 + * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test +- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" ++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 + * -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java +index 0e06a9e432..797927b42b 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -25,7 +25,7 @@ + * @test + * @bug 8074981 + * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test +- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" ++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 + * -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java +index c3cdbf3746..be8f7d586c 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -25,7 +25,7 @@ + * @test + * @bug 8074981 + * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test +- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" ++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 + * -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java +index d33bd411f1..d96d5e29c0 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -25,7 +25,7 @@ + * @test + * @bug 8074981 + * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test +- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" ++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions + * -XX:LoopUnrollLimit=250 -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java +index 992fa4b516..b09c873d05 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -25,7 +25,7 @@ + * @test + * @bug 8138583 + * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : double abs & neg test +- * @requires os.arch=="aarch64" ++ * @requires os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 + * -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java +index 3e79b3528b..fe40ed6f98 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -25,7 +25,7 @@ + * @test + * @bug 8138583 + * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : float abs & neg test +- * @requires os.arch=="aarch64" ++ * @requires os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 + * -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java +index 6603dd224e..5163191049 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -25,7 +25,7 @@ + * @test + * @bug 8135028 + * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double sqrt test +- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" ++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 + * -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java +index d9a0c98800..d999ae423c 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -25,7 +25,7 @@ + * @test + * @bug 8074981 + * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double test +- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" ++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 + * -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java +index 722db95aed..65912a5c7f 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -25,7 +25,7 @@ + * @test + * @bug 8074981 + * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : float test +- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" ++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 + * -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java +index f58f21feb2..fffdc2f756 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -25,7 +25,7 @@ + * @test + * @bug 8074981 + * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : int test +- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" ++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 + * -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java +index acb86812d2..2c866f26f0 100644 +--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java ++++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java +@@ -24,7 +24,7 @@ - #ifdef PTRACE_GETREGS_REQ - if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) { -- print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp(%d) errno(%d) \"%s\"\n", pid, -- errno, strerror(errno)); -+ print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp %d\n", pid); - return false; - } - return true; -diff --git a/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m b/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m -index e46370a1f18..18b8b4282fe 100644 ---- a/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m -+++ b/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m -@@ -685,7 +685,7 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo - JNIEnv *env, jobject this_obj, - jlong thread_id) - { -- print_debug("getThreadIntegerRegisterSet0 called\n"); -+ print_debug("getThreadRegisterSet0 called\n"); + /* @test + * @bug 8167409 +- * @requires (os.arch != "aarch64") & (os.arch != "arm") ++ * @requires (os.arch != "aarch64") & (os.arch != "riscv64") & (os.arch != "arm") + * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.argumentcorruption.CheckLongArgs + */ + package compiler.runtime.criticalnatives.argumentcorruption; +diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java +index eab36f9311..1da369fde2 100644 +--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java ++++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java +@@ -24,7 +24,7 @@ - struct ps_prochandle* ph = get_proc_handle(env, this_obj); - if (ph != NULL && ph->core != NULL) { -@@ -705,13 +705,7 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo - result = thread_get_state(tid, HSDB_THREAD_STATE, (thread_state_t)&state, &count); + /* @test + * @bug 8167408 +- * @requires (os.arch != "aarch64") & (os.arch != "arm") ++ * @requires (os.arch != "aarch64") & (os.arch != "riscv64") & (os.arch != "arm") + * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.lookup.LookUp + */ + package compiler.runtime.criticalnatives.lookup; +diff --git a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java +index 7774dabcb5..7afe3560f3 100644 +--- a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java ++++ b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -61,15 +61,17 @@ public class IntrinsicPredicates { - if (result != KERN_SUCCESS) { -- // This is not considered fatal. Unlike on Linux and Windows, we haven't seen a -- // failure to get thread registers, but if it were to fail the response should -- // be the same. By ignoring this error and returning NULL, stacking walking code -- // will get null registers and fallback to using the "last java frame" if setup. -- fprintf(stdout, "WARNING: getThreadIntegerRegisterSet0: thread_get_state failed (%d) for thread (%d)\n", -- result, tid); -- fflush(stdout); -+ print_error("getregs: thread_get_state(%d) failed (%d)\n", tid, result); - return NULL; - } + public static final BooleanSupplier SHA1_INSTRUCTION_AVAILABLE + = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha1" }, null), ++ new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha1" }, null), + new OrPredicate(new CPUSpecificPredicate("s390.*", new String[] { "sha1" }, null), + new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha1" }, null), + // x86 variants + new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null), +- new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null)))))); ++ new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null))))))); -@@ -814,25 +808,25 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo - */ - JNIEXPORT jint JNICALL - Java_sun_jvm_hotspot_debugger_macosx_MacOSXDebuggerLocal_translateTID0( -- JNIEnv *env, jobject this_obj, jint tid) -+ JNIEnv *env, jobject this_obj, jint tid) - { - print_debug("translateTID0 called on tid = 0x%x\n", (int)tid); + public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE + = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha256" }, null), ++ new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha256" }, null), + new OrPredicate(new CPUSpecificPredicate("s390.*", new String[] { "sha256" }, null), + new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha256" }, null), + new OrPredicate(new CPUSpecificPredicate("ppc64.*", new String[] { "sha" }, null), +@@ -79,10 +81,11 @@ public class IntrinsicPredicates { + new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "avx2", "bmi2" }, null), +- new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null)))))))))); ++ new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null))))))))))); - kern_return_t result; - thread_t foreign_tid, usable_tid; - mach_msg_type_name_t type; -- -+ - foreign_tid = tid; -- -+ - task_t gTask = getTask(env, this_obj); -- result = mach_port_extract_right(gTask, foreign_tid, -- MACH_MSG_TYPE_COPY_SEND, -+ result = mach_port_extract_right(gTask, foreign_tid, -+ MACH_MSG_TYPE_COPY_SEND, - &usable_tid, &type); - if (result != KERN_SUCCESS) - return -1; -- -+ - print_debug("translateTID0: 0x%x -> 0x%x\n", foreign_tid, usable_tid); -- -+ - return (jint) usable_tid; - } + public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE + = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha512" }, null), ++ new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha512" }, null), + new OrPredicate(new CPUSpecificPredicate("s390.*", new String[] { "sha512" }, null), + new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha512" }, null), + new OrPredicate(new CPUSpecificPredicate("ppc64.*", new String[] { "sha" }, null), +@@ -92,7 +95,7 @@ public class IntrinsicPredicates { + new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "avx2", "bmi2" }, null), +- new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null)))))))))); ++ new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null))))))))))); -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java -index d0557a7d254..655b450c3fc 100644 ---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java -@@ -166,7 +166,7 @@ public WorkerThreadTask execute(WorkerThreadTask task) throws DebuggerException - } catch (InterruptedException x) {} - } - if (lastException != null) { -- throw new DebuggerException(lastException.getMessage(), lastException); -+ throw new DebuggerException(lastException); - } else { - return task; - } -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java -index c52d3a51d54..0d637f30f14 100644 ---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java + public static final BooleanSupplier ANY_SHA_INSTRUCTION_AVAILABLE + = new OrPredicate(IntrinsicPredicates.SHA1_INSTRUCTION_AVAILABLE, +diff --git a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java +index 57256aa5a3..d4d43b01ae 100644 +--- a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java ++++ b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java @@ -1,5 +1,5 @@ /* -- * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -67,12 +67,8 @@ public String toString() { - public ThreadContext getContext() throws IllegalThreadStateException { - long[] data = debugger.getThreadIntegerRegisterSet(unique_thread_id); - ThreadContext context = BsdThreadContextFactory.createThreadContext(debugger); -- // null means we failed to get the register set for some reason. The caller -- // is responsible for dealing with the set of null registers in that case. -- if (data != null) { -- for (int i = 0; i < data.length; i++) { -- context.setRegister(i, data[i]); -- } -+ for (int i = 0; i < data.length; i++) { -+ context.setRegister(i, data[i]); - } - return context; - } -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java -index 6a0648f508a..cb6712b58ee 100644 ---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java -@@ -173,7 +173,7 @@ public WorkerThreadTask execute(WorkerThreadTask task) throws DebuggerException - } catch (InterruptedException x) {} - } - if (lastException != null) { -- throw new DebuggerException(lastException.getMessage(), lastException); -+ throw new DebuggerException(lastException); - } else { - return task; - } -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java -index 3fe795d34bc..52307b9cdcf 100644 ---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java +@@ -112,7 +112,7 @@ public class CheckForProperDetailStackTrace { + // It's ok for ARM not to have symbols, because it does not support NMT detail + // when targeting thumb2. It's also ok for Windows not to have symbols, because + // they are only available if the symbols file is included with the build. +- if (Platform.isWindows() || Platform.isARM()) { ++ if (Platform.isWindows() || Platform.isARM() || Platform.isRISCV64()) { + return; // we are done + } + output.reportDiagnosticSummary(); +diff --git a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java +index 127bb6abcd..eab19273ad 100644 +--- a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java ++++ b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java @@ -1,5 +1,5 @@ /* -- * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -73,12 +73,8 @@ public String toString() { - public ThreadContext getContext() throws IllegalThreadStateException { - long[] data = debugger.getThreadIntegerRegisterSet(lwp_id); - ThreadContext context = LinuxThreadContextFactory.createThreadContext(debugger); -- // null means we failed to get the register set for some reason. The caller -- // is responsible for dealing with the set of null registers in that case. -- if (data != null) { -- for (int i = 0; i < data.length; i++) { -- context.setRegister(i, data[i]); -- } -+ for (int i = 0; i < data.length; i++) { -+ context.setRegister(i, data[i]); - } - return context; +@@ -239,7 +239,7 @@ public class ReservedStackTest { + return Platform.isAix() || + (Platform.isLinux() && + (Platform.isPPC() || Platform.isS390x() || Platform.isX64() || +- Platform.isX86())) || ++ Platform.isX86() || Platform.isRISCV64())) || + Platform.isOSX() || + Platform.isSolaris(); } -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java -index 377650a0a1c..ec5aea35e8c 100644 ---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java +diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java +index 126a43a900..feb4de5388 100644 +--- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java ++++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java +@@ -45,7 +45,7 @@ import java.util.Set; + */ + public class TestMutuallyExclusivePlatformPredicates { + private static enum MethodGroup { +- ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"), ++ ARCH("isAArch64", "isARM", "isRISCV64", "isPPC", "isS390x", "isSparc", "isX64", "isX86"), + BITNESS("is32bit", "is64bit"), + OS("isAix", "isLinux", "isOSX", "isSolaris", "isWindows"), + VM_TYPE("isClient", "isServer", "isGraal", "isMinimal", "isZero", "isEmbedded"), +diff --git a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java +index 7990c49a1f..abeff80e5e 100644 +--- a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java ++++ b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java @@ -1,5 +1,5 @@ /* -- * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2013, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -30,9 +30,9 @@ - - class WindbgAMD64Thread implements ThreadProxy { - private WindbgDebugger debugger; -- private long sysId; // SystemID for Windows thread, stored in OSThread::_thread_id -+ private long sysId; - private boolean gotID; -- private long id; // ThreadID for Windows thread, returned by GetThreadIdBySystemId -+ private long id; - - // The address argument must be the address of the OSThread::_thread_id - WindbgAMD64Thread(WindbgDebugger debugger, Address addr) { -@@ -50,12 +50,8 @@ class WindbgAMD64Thread implements ThreadProxy { - public ThreadContext getContext() throws IllegalThreadStateException { - long[] data = debugger.getThreadIntegerRegisterSet(getThreadID()); - WindbgAMD64ThreadContext context = new WindbgAMD64ThreadContext(debugger); -- // null means we failed to get the register set for some reason. The caller -- // is responsible for dealing with the set of null registers in that case. -- if (data != null) { -- for (int i = 0; i < data.length; i++) { -- context.setRegister(i, data[i]); -- } -+ for (int i = 0; i < data.length; i++) { -+ context.setRegister(i, data[i]); - } - return context; - } -@@ -90,7 +86,6 @@ public String toString() { - private long getThreadID() { - if (!gotID) { - id = debugger.getThreadIdFromSysId(sysId); -- gotID = true; +@@ -54,8 +54,8 @@ public class TestCPUInformation { + Events.assertField(event, "hwThreads").atLeast(1); + Events.assertField(event, "cores").atLeast(1); + Events.assertField(event, "sockets").atLeast(1); +- Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390"); +- Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390"); ++ Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390"); ++ Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390"); + } } - - return id; -diff --git a/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp b/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp -index e3b218b4dae..314cf69c957 100644 ---- a/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp -+++ b/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp -@@ -45,7 +45,6 @@ - - #include - #include --#include - - #define DEBUG_NO_IMPLEMENTATION - #include -@@ -766,16 +765,9 @@ JNIEXPORT jlong JNICALL Java_sun_jvm_hotspot_debugger_windbg_WindbgDebuggerLocal - CHECK_EXCEPTION_(0); - - ULONG id = 0; -- HRESULT hr = ptrIDebugSystemObjects->GetThreadIdBySystemId((ULONG)sysId, &id); -- if (hr != S_OK) { -- // This is not considered fatal and does happen on occassion, usually with an -- // 0x80004002 "No such interface supported". The root cause is not fully understood, -- // but by ignoring this error and returning NULL, stacking walking code will get -- // null registers and fallback to using the "last java frame" if setup. -- printf("WARNING: GetThreadIdBySystemId failed with 0x%x for sysId (%" PRIu64 ")\n", -- hr, sysId); -- return -1; -- } -+ COM_VERIFY_OK_(ptrIDebugSystemObjects->GetThreadIdBySystemId((ULONG)sysId, &id), -+ "Windbg Error: GetThreadIdBySystemId failed!", 0); -+ - return (jlong) id; } +diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java +index 6269373c2b..e1511772e7 100644 +--- a/test/lib/jdk/test/lib/Platform.java ++++ b/test/lib/jdk/test/lib/Platform.java +@@ -205,6 +205,10 @@ public class Platform { + return isArch("arm.*"); + } - -From 4b01e13731fc330ca3d57a5cd532c91bc66579c8 Mon Sep 17 00:00:00 2001 -From: Kuai Wei -Date: Wed, 31 Jan 2024 17:26:31 +0800 -Subject: [PATCH 140/140] Remove unused zSyscall_linux_riscv.hpp - ---- - .../linux_riscv/gc/z/zSyscall_linux_riscv.hpp | 42 ------------------- - 1 file changed, 42 deletions(-) - delete mode 100644 src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp - -diff --git a/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp -deleted file mode 100644 -index 1aa58f27871..00000000000 ---- a/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp -+++ /dev/null -@@ -1,42 +0,0 @@ --/* -- * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. -- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -- * -- * This code is free software; you can redistribute it and/or modify it -- * under the terms of the GNU General Public License version 2 only, as -- * published by the Free Software Foundation. -- * -- * This code is distributed in the hope that it will be useful, but WITHOUT -- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- * version 2 for more details (a copy is included in the LICENSE file that -- * accompanied this code). -- * -- * You should have received a copy of the GNU General Public License version -- * 2 along with this work; if not, write to the Free Software Foundation, -- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -- * -- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -- * or visit www.oracle.com if you need additional information or have any -- * questions. -- * -- */ -- --#ifndef OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP --#define OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP -- --#include -- --// --// Support for building on older Linux systems --// -- --#ifndef SYS_memfd_create --#define SYS_memfd_create 279 --#endif --#ifndef SYS_fallocate --#define SYS_fallocate 47 --#endif -- --#endif // OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP ++ public static boolean isRISCV64() { ++ return isArch("riscv64"); ++ } ++ + public static boolean isPPC() { + return isArch("ppc.*"); + } diff --git a/LoongArch64-support.patch b/LoongArch64-support.patch index bf78938519963d04f67592ed50d962f0cee255e9..9c2bb8354556a1dcd1cbf01de61e1125db177f12 100644 --- a/LoongArch64-support.patch +++ b/LoongArch64-support.patch @@ -38046,13 +38046,13 @@ index 0000000000..49302590c3 +#endif // CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp new file mode 100644 -index 0000000000..3ed4c36651 +index 0000000000..6e27a69747 --- /dev/null +++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp -@@ -0,0 +1,1625 @@ +@@ -0,0 +1,1626 @@ +/* Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, Cavium. All rights reserved. (By BELLSOFT) -+ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * Copyright (c) 2022, 2024, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -38951,7 +38951,7 @@ index 0000000000..3ed4c36651 + b(Q_DONE); + bind(JX_IS_0); + if (UseLASX) { -+ xvfmul_d(v28, v18, v6); // f[0,1] * x[0] ++ xvfmul_d(v28, v18, v6); // f[0,3] * x[0] + fmul_d(v30, v19, v6); // f[4] * x[0] + } else { + vfmul_d(v28, v18, v6); // f[0,1] * x[0] @@ -39180,6 +39180,7 @@ index 0000000000..3ed4c36651 + st_w(tmp2, SCR2, 0); + addi_w(SCR1, SCR1, 24); + addi_w(jz, jz, 1); ++ alsl_d(SCR2, jz, iqBase, 2 - 1); + st_w(tmp3, SCR2, 0); // iq[jz] = (int) fw + b(Z_ZERO_CHECK_DONE); + bind(Z_IS_LESS_THAN_TWO24B); @@ -104792,7 +104793,7 @@ index 3687754e71..791e4ed43f 100644 void generate_c1_load_barrier_stub(LIR_Assembler* ce, ZLoadBarrierStubC1* stub) const; diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp -index 0af357ea56..66a8006780 100644 +index 2842a11f92..4f58ec4be3 100644 --- a/src/hotspot/os/linux/os_linux.cpp +++ b/src/hotspot/os/linux/os_linux.cpp @@ -23,6 +23,12 @@ @@ -104808,7 +104809,7 @@ index 0af357ea56..66a8006780 100644 // no precompiled headers #include "jvm.h" #include "classfile/classLoader.hpp" -@@ -4068,6 +4074,8 @@ size_t os::Linux::find_large_page_size() { +@@ -4060,6 +4066,8 @@ size_t os::Linux::find_large_page_size() { IA64_ONLY(256 * M) PPC_ONLY(4 * M) S390_ONLY(1 * M) @@ -110338,7 +110339,7 @@ index ce23aafa8f..d3dfb74d5b 100644 assert(_owner != Self, "invariant"); assert(_Responsible != Self, "invariant"); diff --git a/src/hotspot/share/runtime/os.cpp b/src/hotspot/share/runtime/os.cpp -index e0f4a2af1f..09cc4b1ba5 100644 +index 1c540bb621..0e44240d40 100644 --- a/src/hotspot/share/runtime/os.cpp +++ b/src/hotspot/share/runtime/os.cpp @@ -22,6 +22,12 @@ @@ -110672,7 +110673,7 @@ index 8318e8e021..07064e76ee 100644 // This C bool type must be int for compatibility with Linux calls and // it would be a mistake to equivalence it to C++ bool on many platforms diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c -index de5254d859..eefe55959c 100644 +index c22b5d1cb3..36d6343960 100644 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c @@ -22,6 +22,12 @@ @@ -110688,12 +110689,12 @@ index de5254d859..eefe55959c 100644 #include #include #include -@@ -142,7 +148,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use - #define PTRACE_GETREGS_REQ PT_GETREGS - #endif - --#ifdef PTRACE_GETREGS_REQ -+#if defined(PTRACE_GETREGS_REQ) && !defined(loongarch64) +@@ -151,7 +157,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use + return false; + } + return true; +-#elif defined(PTRACE_GETREGS_REQ) ++#elif defined(PTRACE_GETREGS_REQ) && !defined(loongarch64) if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) { print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp %d\n", pid); return false; @@ -116585,7 +116586,7 @@ index 127bb6abcd..c9277604ae 100644 Platform.isSolaris(); } diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java -index 77458554b7..05aee6b84c 100644 +index 126a43a900..55bd135f6e 100644 --- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java +++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java @@ -45,7 +45,7 @@ import java.util.Set; @@ -116625,35 +116626,8 @@ index 7990c49a1f..025048c6b0 100644 } } } -diff --git a/test/jdk/sun/security/pkcs11/PKCS11Test.java b/test/jdk/sun/security/pkcs11/PKCS11Test.java -index b14daf6c6d..da33514c75 100644 ---- a/test/jdk/sun/security/pkcs11/PKCS11Test.java -+++ b/test/jdk/sun/security/pkcs11/PKCS11Test.java -@@ -21,6 +21,12 @@ - * questions. - */ - -+/* -+ * This file has been modified by Loongson Technology in 2022, These -+ * modifications are Copyright (c) 2021, 2022, Loongson Technology, and are made -+ * available on the same license terms set forth above. -+ */ -+ - // common infrastructure for SunPKCS11 tests - - import java.io.BufferedReader; -@@ -747,6 +753,9 @@ public abstract class PKCS11Test { - "/usr/lib64/" }); - osMap.put("Linux-ppc64-64", new String[] { "/usr/lib64/" }); - osMap.put("Linux-ppc64le-64", new String[] { "/usr/lib64/" }); -+ osMap.put("Linux-mips64el-64", new String[]{"/usr/lib64/"}); -+ osMap.put("Linux-loongarch64-64", new String[]{"/usr/lib/loongarch64-linux-gnu/", -+ "/usr/lib64/" }); - osMap.put("Linux-s390x-64", new String[] { "/usr/lib64/" }); - osMap.put("Windows-x86-32", new String[] {}); - osMap.put("Windows-amd64-64", new String[] {}); diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java -index 5b3f1889cb..aaf8867a7c 100644 +index 6269373c2b..440ec4664f 100644 --- a/test/lib/jdk/test/lib/Platform.java +++ b/test/lib/jdk/test/lib/Platform.java @@ -21,6 +21,12 @@ @@ -116668,8 +116642,8 @@ index 5b3f1889cb..aaf8867a7c 100644 + package jdk.test.lib; - import java.io.FileNotFoundException; -@@ -226,6 +232,14 @@ public class Platform { + import java.io.BufferedReader; +@@ -229,6 +235,14 @@ public class Platform { return isArch("(i386)|(x86(?!_64))"); } diff --git a/delete_expired_certificates.patch b/delete_expired_certificates.patch index 3e654b7774f145bca6cee0a764b16d15dd779cde..04e75c0b71888621ca579be0cc487ee38509d23f 100644 --- a/delete_expired_certificates.patch +++ b/delete_expired_certificates.patch @@ -116,7 +116,7 @@ diff --git a/test/jdk/sun/security/lib/cacerts/VerifyCACerts.java b/test/jdk/sun index 122a01901..c131bd493 100644 --- a/test/jdk/sun/security/lib/cacerts/VerifyCACerts.java +++ b/test/jdk/sun/security/lib/cacerts/VerifyCACerts.java -@@ -47,12 +47,12 @@ public class VerifyCACerts { +@@ -48,12 +48,12 @@ public class VerifyCACerts { + File.separator + "security" + File.separator + "cacerts"; // The numbers of certs now. @@ -131,8 +131,7 @@ index 122a01901..c131bd493 100644 // map of cert alias to SHA-256 fingerprint @SuppressWarnings("serial") - private static final Map FINGERPRINT_MAP = new HashMap<>() { -@@ -109,8 +109,6 @@ public class VerifyCACerts { +@@ -111,8 +111,6 @@ public class VerifyCACerts { "7E:37:CB:8B:4C:47:09:0C:AB:36:55:1B:A6:F4:5D:B8:40:68:0F:BA:16:6A:95:2D:B1:00:71:7F:43:05:3F:C2"); put("digicerthighassuranceevrootca [jdk]", "74:31:E5:F4:C3:C1:CE:46:90:77:4F:0B:61:E0:54:40:88:3B:A9:A0:1E:D0:0B:A6:AB:D7:80:6E:D3:B1:18:CF"); @@ -141,7 +140,7 @@ index 122a01901..c131bd493 100644 put("geotrustprimaryca [jdk]", "37:D5:10:06:C5:12:EA:AB:62:64:21:F1:EC:8C:92:01:3F:C5:F8:2A:E9:8E:E5:33:EB:46:19:B8:DE:B4:D0:6C"); put("geotrustprimarycag2 [jdk]", -@@ -145,10 +143,6 @@ public class VerifyCACerts { +@@ -147,10 +145,6 @@ public class VerifyCACerts { "96:BC:EC:06:26:49:76:F3:74:60:77:9A:CF:28:C5:A7:CF:E8:A3:C0:AA:E1:1A:8F:FC:EE:05:C0:BD:DF:08:C6"); put("letsencryptisrgx2 [jdk]", "69:72:9B:8E:15:A8:6E:FC:17:7A:57:AF:B7:17:1D:FC:64:AD:D2:8C:2F:CA:8C:F1:50:7E:34:45:3C:CB:14:70"); @@ -152,7 +151,7 @@ index 122a01901..c131bd493 100644 put("quovadisrootca1g3 [jdk]", "8A:86:6F:D1:B2:76:B5:7E:57:8E:92:1C:65:82:8A:2B:ED:58:E9:F2:F2:88:05:41:34:B7:F1:F4:BF:C9:CC:74"); put("quovadisrootca2 [jdk]", -@@ -282,12 +276,6 @@ public class VerifyCACerts { +@@ -292,12 +286,6 @@ public class VerifyCACerts { add("addtrustexternalca [jdk]"); // Valid until: Sat May 30 10:44:50 GMT 2020 add("addtrustqualifiedca [jdk]"); diff --git a/openjdk-11.spec b/openjdk-11.spec index 3b4b978d0117d460220a5e5c47b7bc8c5c5cf565..9c7776aac3f57a4f1d083ed71dafd10f62d691b9 100644 --- a/openjdk-11.spec +++ b/openjdk-11.spec @@ -762,7 +762,7 @@ Provides: java-src%{?1} = %{epoch}:%{version}-%{release} Name: java-%{javaver}-%{origin} Version: %{newjavaver}.%{buildver} -Release: 0 +Release: 6 # java-1.5.0-ibm from jpackage.org set Epoch to 1 for unknown reasons # and this change was brought into RHEL-4. java-1.5.0-ibm packages # also included the epoch in their virtual provides. This created a @@ -791,7 +791,7 @@ Group: Development/Languages # The test code includes copies of NSS under the Mozilla Public License v2.0 # The PCSClite headers are under a BSD with advertising license # The elliptic curve cryptography (ECC) source code is licensed under the LGPLv2.1 or any later version -License: ASL 1.1 and ASL 2.0 and BSD and BSD with advertising and GPL+ and GPLv2 and GPLv2 with exceptions and IJG and LGPLv2+ and MIT and MPLv2.0 and Public Domain and W3C and zlib and ISC and FTL and RSA +License: ASL 1.1 and ASL 2.0 and BSD and BSD with advertising and GPL+ and GPLv2 and GPLv2 with exceptions and IJG and LGPLv2+ and MIT and MPLv2.0 and Public Domain and W3C and zlib and ISC and FTL and RSA-MD URL: http://openjdk.java.net/ @@ -1588,9 +1588,10 @@ else end end -- run content of included file with fake args +arg = nil; -- it is better to null the arg up, no meter if they exists or not, and use cjc as module in unified way, instead of relaying on "main" method during require "copy_jdk_configs.lua" cjc = require "copy_jdk_configs.lua" -arg = {"--currentjvm", "%{uniquesuffix %{nil}}", "--jvmdir", "%{_jvmdir %{nil}}", "--origname", "%{name}", "--origjavaver", "%{javaver}", "--arch", "%{_arch}", "--temp", "%{rpm_state_dir}/%{name}.%{_arch}"} -cjc.mainProgram(arg) +args = {"--currentjvm", "%{uniquesuffix %{nil}}", "--jvmdir", "%{_jvmdir %{nil}}", "--origname", "%{name}", "--origjavaver", "%{javaver}", "--arch", "%{_arch}", "--temp", "%{rpm_state_dir}/%{name}.%{_arch}"} +cjc.mainProgram(args) -- the returns from copy_jdk_configs.lua should not affect this 'main', so it should run under all circumstances, except fatal error %post %{post_script %{nil}} @@ -1715,7 +1716,25 @@ cjc.mainProgram(arg) %changelog -* Thu July 18 2024 DXwangg - 1.11.0.24.8-0 +* Fri Aug 30 2024 songliyang - 1.11.0.24.8-6 +- update License + +* Thu Aug 1 2024 aoqi - 1.11.0.24.8-5 +- update LoongArch64 port to 11.0.24 + +* Thu July 29 2024 DXwangg - 1.11.0.24.8-4 +- modified delete_expired_certificates.patch + +* Thu Jul 25 2024 songliyang - 1.11.0.24.8-3 +- update Loongarch support patch to fix the error while applying in prep stage + +* Tue Jul 23 2024 songliyang - 1.11.0.24.8-2 +- null the arg to solve openjdk-headless install error + +* Thu Jul 18 2024 Dingli Zhang - 1.11.0.24.8-1 +- update riscv64 port to 11.0.24 + +* Thu Jul 18 2024 DXwangg - 1.11.0.24.8-0 - update to 11.0.24+8(GA) * Thu Jun 20 2024 aoqi - 1.11.0.23.9-6