diff --git a/Add-riscv64-support.patch b/Add-riscv64-support.patch
index 59017ae02c5b6c185a441f428acd08dfc203eb6e..b06b0fe866fb0ae52a579323d4ef4e80effd0c9b 100644
--- a/Add-riscv64-support.patch
+++ b/Add-riscv64-support.patch
@@ -1,346 +1,129 @@
-From dfa792539047c39d0d25244265bc8368163d5768 Mon Sep 17 00:00:00 2001
-From: Fei Yang <fyang@openjdk.org>
-Date: Thu, 24 Mar 2022 09:22:46 +0000
-Subject: [PATCH 001/140] Cherry-picked JDK-8276799: initial load of RISC-V
- backend (cannot pass compilation)
-
----
- make/autoconf/build-aux/config.guess          |     2 +-
- make/autoconf/hotspot.m4                      |     3 +-
- make/autoconf/libraries.m4                    |     8 +-
- make/autoconf/platform.m4                     |     6 +-
- make/hotspot/gensrc/GensrcAdlc.gmk            |     9 +-
- .../cpu/aarch64/c1_LIRAssembler_aarch64.cpp   |     6 +-
- src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp   |     7 +-
- src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp   |     8 +-
- .../cpu/riscv/abstractInterpreter_riscv.cpp   |   177 +
- src/hotspot/cpu/riscv/assembler_riscv.cpp     |   372 +
- src/hotspot/cpu/riscv/assembler_riscv.hpp     |  3047 +++++
- .../cpu/riscv/assembler_riscv.inline.hpp      |    47 +
- src/hotspot/cpu/riscv/bytes_riscv.hpp         |   167 +
- src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp  |   353 +
- src/hotspot/cpu/riscv/c1_Defs_riscv.hpp       |    84 +
- .../cpu/riscv/c1_FpuStackSim_riscv.cpp        |    30 +
- .../cpu/riscv/c1_FpuStackSim_riscv.hpp        |    32 +
- src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp   |   388 +
- src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp   |   148 +
- .../cpu/riscv/c1_LIRAssembler_arith_riscv.cpp |   281 +
- .../cpu/riscv/c1_LIRAssembler_arith_riscv.hpp |    37 +
- .../riscv/c1_LIRAssembler_arraycopy_riscv.cpp |   388 +
- .../riscv/c1_LIRAssembler_arraycopy_riscv.hpp |    52 +
- .../cpu/riscv/c1_LIRAssembler_riscv.cpp       |  2267 ++++
- .../cpu/riscv/c1_LIRAssembler_riscv.hpp       |   132 +
- .../cpu/riscv/c1_LIRGenerator_riscv.cpp       |  1075 ++
- src/hotspot/cpu/riscv/c1_LIR_riscv.cpp        |    55 +
- src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp |    33 +
- src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp |    83 +
- .../cpu/riscv/c1_MacroAssembler_riscv.cpp     |   432 +
- .../cpu/riscv/c1_MacroAssembler_riscv.hpp     |   120 +
- src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp   |  1172 ++
- src/hotspot/cpu/riscv/c1_globals_riscv.hpp    |    65 +
- .../cpu/riscv/c2_MacroAssembler_riscv.cpp     |  1646 +++
- .../cpu/riscv/c2_MacroAssembler_riscv.hpp     |   193 +
- src/hotspot/cpu/riscv/c2_globals_riscv.hpp    |    83 +
- src/hotspot/cpu/riscv/c2_init_riscv.cpp       |    38 +
- .../riscv/c2_safepointPollStubTable_riscv.cpp |    47 +
- src/hotspot/cpu/riscv/codeBuffer_riscv.hpp    |    36 +
- src/hotspot/cpu/riscv/compiledIC_riscv.cpp    |   149 +
- src/hotspot/cpu/riscv/copy_riscv.hpp          |   136 +
- src/hotspot/cpu/riscv/disassembler_riscv.hpp  |    58 +
- .../cpu/riscv/foreign_globals_riscv.cpp       |    44 +
- .../cpu/riscv/foreign_globals_riscv.hpp       |    32 +
- src/hotspot/cpu/riscv/frame_riscv.cpp         |   697 +
- src/hotspot/cpu/riscv/frame_riscv.hpp         |   202 +
- src/hotspot/cpu/riscv/frame_riscv.inline.hpp  |   248 +
- .../gc/g1/g1BarrierSetAssembler_riscv.cpp     |   484 +
- .../gc/g1/g1BarrierSetAssembler_riscv.hpp     |    78 +
- .../cpu/riscv/gc/g1/g1Globals_riscv.hpp       |    31 +
- .../gc/shared/barrierSetAssembler_riscv.cpp   |   302 +
- .../gc/shared/barrierSetAssembler_riscv.hpp   |    79 +
- .../gc/shared/barrierSetNMethod_riscv.cpp     |   171 +
- .../cardTableBarrierSetAssembler_riscv.cpp    |   111 +
- .../cardTableBarrierSetAssembler_riscv.hpp    |    42 +
- .../modRefBarrierSetAssembler_riscv.cpp       |    55 +
- .../modRefBarrierSetAssembler_riscv.hpp       |    55 +
- .../c1/shenandoahBarrierSetC1_riscv.cpp       |   117 +
- .../shenandoahBarrierSetAssembler_riscv.cpp   |   712 ++
- .../shenandoahBarrierSetAssembler_riscv.hpp   |    88 +
- .../riscv/gc/shenandoah/shenandoah_riscv64.ad |   285 +
- .../riscv/gc/z/zBarrierSetAssembler_riscv.cpp |   441 +
- .../riscv/gc/z/zBarrierSetAssembler_riscv.hpp |   101 +
- src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp |   212 +
- src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp |    36 +
- src/hotspot/cpu/riscv/gc/z/z_riscv64.ad       |   233 +
- .../cpu/riscv/globalDefinitions_riscv.hpp     |    52 +
- src/hotspot/cpu/riscv/globals_riscv.hpp       |    99 +
- src/hotspot/cpu/riscv/icBuffer_riscv.cpp      |    79 +
- src/hotspot/cpu/riscv/icache_riscv.cpp        |    51 +
- src/hotspot/cpu/riscv/icache_riscv.hpp        |    42 +
- src/hotspot/cpu/riscv/interp_masm_riscv.cpp   |  1940 +++
- src/hotspot/cpu/riscv/interp_masm_riscv.hpp   |   285 +
- src/hotspot/cpu/riscv/interpreterRT_riscv.cpp |   295 +
- src/hotspot/cpu/riscv/interpreterRT_riscv.hpp |    68 +
- .../cpu/riscv/javaFrameAnchor_riscv.hpp       |    86 +
- .../cpu/riscv/jniFastGetField_riscv.cpp       |   214 +
- src/hotspot/cpu/riscv/jniTypes_riscv.hpp      |   106 +
- .../cpu/riscv/macroAssembler_riscv.cpp        |  4016 ++++++
- .../cpu/riscv/macroAssembler_riscv.hpp        |   858 ++
- .../cpu/riscv/macroAssembler_riscv.inline.hpp |    31 +
- src/hotspot/cpu/riscv/matcher_riscv.hpp       |   169 +
- src/hotspot/cpu/riscv/methodHandles_riscv.cpp |   461 +
- src/hotspot/cpu/riscv/methodHandles_riscv.hpp |    57 +
- src/hotspot/cpu/riscv/nativeInst_riscv.cpp    |   429 +
- src/hotspot/cpu/riscv/nativeInst_riscv.hpp    |   572 +
- src/hotspot/cpu/riscv/registerMap_riscv.cpp   |    45 +
- src/hotspot/cpu/riscv/registerMap_riscv.hpp   |    43 +
- src/hotspot/cpu/riscv/register_riscv.cpp      |    73 +
- src/hotspot/cpu/riscv/register_riscv.hpp      |   324 +
- src/hotspot/cpu/riscv/relocInfo_riscv.cpp     |   113 +
- src/hotspot/cpu/riscv/relocInfo_riscv.hpp     |    44 +
- src/hotspot/cpu/riscv/riscv.ad                | 10611 ++++++++++++++++
- src/hotspot/cpu/riscv/riscv_b.ad              |   527 +
- src/hotspot/cpu/riscv/riscv_v.ad              |  2065 +++
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp |  2761 ++++
- src/hotspot/cpu/riscv/stubGenerator_riscv.cpp |  3864 ++++++
- src/hotspot/cpu/riscv/stubRoutines_riscv.cpp  |    58 +
- src/hotspot/cpu/riscv/stubRoutines_riscv.hpp  |   161 +
- .../templateInterpreterGenerator_riscv.cpp    |  1794 +++
- src/hotspot/cpu/riscv/templateTable_riscv.cpp |  3951 ++++++
- src/hotspot/cpu/riscv/templateTable_riscv.hpp |    42 +
- .../riscv/universalNativeInvoker_riscv.cpp    |    33 +
- .../cpu/riscv/universalUpcallHandle_riscv.cpp |    42 +
- src/hotspot/cpu/riscv/vmStructs_riscv.hpp     |    42 +
- src/hotspot/cpu/riscv/vm_version_riscv.cpp    |   230 +
- src/hotspot/cpu/riscv/vm_version_riscv.hpp    |    72 +
- src/hotspot/cpu/riscv/vmreg_riscv.cpp         |    64 +
- src/hotspot/cpu/riscv/vmreg_riscv.hpp         |    68 +
- src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp  |    46 +
- src/hotspot/cpu/riscv/vtableStubs_riscv.cpp   |   260 +
- src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp |     9 +-
- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp   |     7 +-
- src/hotspot/os/linux/os_linux.cpp             |     2 +
- .../linux_riscv/assembler_linux_riscv.cpp     |    26 +
- .../os_cpu/linux_riscv/atomic_linux_riscv.hpp |   134 +
- .../os_cpu/linux_riscv/bytes_linux_riscv.hpp  |    45 +
- .../os_cpu/linux_riscv/copy_linux_riscv.hpp   |    31 +
- .../linux_riscv/gc/z/zSyscall_linux_riscv.hpp |    42 +
- .../linux_riscv/globals_linux_riscv.hpp       |    43 +
- .../linux_riscv/orderAccess_linux_riscv.hpp   |    63 +
- .../os_cpu/linux_riscv/os_linux_riscv.cpp     |   466 +
- .../os_cpu/linux_riscv/os_linux_riscv.hpp     |    59 +
- .../prefetch_linux_riscv.inline.hpp           |    38 +
- .../os_cpu/linux_riscv/thread_linux_riscv.cpp |    92 +
- .../os_cpu/linux_riscv/thread_linux_riscv.hpp |    48 +
- .../linux_riscv/vmStructs_linux_riscv.hpp     |    55 +
- .../linux_riscv/vm_version_linux_riscv.cpp    |   118 +
- src/hotspot/share/c1/c1_LIR.cpp               |   112 +-
- src/hotspot/share/c1/c1_LIR.hpp               |   209 +-
- src/hotspot/share/c1/c1_LIRAssembler.cpp      |    15 +-
- src/hotspot/share/c1/c1_LIRAssembler.hpp      |     5 +-
- src/hotspot/share/c1/c1_LinearScan.cpp        |    18 +-
- .../gc/shenandoah/shenandoahArguments.cpp     |     4 +-
- src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp   |     4 +-
- .../share/jfr/utilities/jfrBigEndian.hpp      |     2 +-
- src/hotspot/share/opto/regmask.hpp            |     2 +-
- .../share/runtime/abstract_vm_version.cpp     |     3 +-
- src/hotspot/share/runtime/synchronizer.cpp    |     2 +-
- src/hotspot/share/runtime/thread.hpp          |     2 +-
- src/hotspot/share/runtime/thread.inline.hpp   |     4 +-
- src/hotspot/share/utilities/macros.hpp        |    26 +
- .../native/libsaproc/LinuxDebuggerLocal.c     |    49 +-
- .../linux/native/libsaproc/libproc.h          |     4 +-
- .../classes/sun/jvm/hotspot/HotSpotAgent.java |     3 +
- .../debugger/MachineDescriptionRISCV64.java   |    40 +
- .../debugger/linux/LinuxCDebugger.java        |    13 +-
- .../linux/riscv64/LinuxRISCV64CFrame.java     |    90 +
- .../riscv64/LinuxRISCV64ThreadContext.java    |    48 +
- .../proc/riscv64/ProcRISCV64Thread.java       |    88 +
- .../riscv64/ProcRISCV64ThreadContext.java     |    48 +
- .../riscv64/ProcRISCV64ThreadFactory.java     |    46 +
- .../remote/riscv64/RemoteRISCV64Thread.java   |    55 +
- .../riscv64/RemoteRISCV64ThreadContext.java   |    48 +
- .../riscv64/RemoteRISCV64ThreadFactory.java   |    46 +
- .../debugger/risv64/RISCV64ThreadContext.java |   172 +
- .../sun/jvm/hotspot/runtime/Threads.java      |     5 +-
- .../LinuxRISCV64JavaThreadPDAccess.java       |   134 +
- .../riscv64/RISCV64CurrentFrameGuess.java     |   223 +
- .../hotspot/runtime/riscv64/RISCV64Frame.java |   556 +
- .../riscv64/RISCV64JavaCallWrapper.java       |    61 +
- .../runtime/riscv64/RISCV64RegisterMap.java   |    53 +
- .../jvm/hotspot/utilities/PlatformInfo.java   |     4 +-
- test/hotspot/jtreg/compiler/c2/TestBit.java   |     7 +-
- ...eSHA1IntrinsicsOptionOnUnsupportedCPU.java |     5 +-
- ...HA256IntrinsicsOptionOnUnsupportedCPU.java |     5 +-
- ...HA512IntrinsicsOptionOnUnsupportedCPU.java |     5 +-
- .../cli/TestUseSHAOptionOnUnsupportedCPU.java |     5 +-
- .../testcases/GenericTestCaseForOtherCPU.java |    11 +-
- ...nericTestCaseForUnsupportedRISCV64CPU.java |   115 +
- .../loopopts/superword/ProdRed_Double.java    |     4 +-
- .../loopopts/superword/ProdRed_Float.java     |     4 +-
- .../loopopts/superword/ProdRed_Int.java       |     4 +-
- .../loopopts/superword/ReductionPerf.java     |     4 +-
- .../superword/SumRedAbsNeg_Double.java        |     4 +-
- .../superword/SumRedAbsNeg_Float.java         |     4 +-
- .../loopopts/superword/SumRedSqrt_Double.java |     4 +-
- .../loopopts/superword/SumRed_Double.java     |     4 +-
- .../loopopts/superword/SumRed_Float.java      |     4 +-
- .../loopopts/superword/SumRed_Int.java        |     4 +-
- .../sha/predicate/IntrinsicPredicates.java    |    11 +-
- .../NMT/CheckForProperDetailStackTrace.java   |     4 +-
- .../ReservedStack/ReservedStackTest.java      |     4 +-
- .../HeapMonitorEventsForTwoThreadsTest.java   |     1 -
- ...stMutuallyExclusivePlatformPredicates.java |     2 +-
- .../jdk/jfr/event/os/TestCPUInformation.java  |     6 +-
- test/lib/jdk/test/lib/Platform.java           |     4 +
- 187 files changed, 59079 insertions(+), 189 deletions(-)
- create mode 100644 src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/assembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/assembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/assembler_riscv.inline.hpp
- create mode 100644 src/hotspot/cpu/riscv/bytes_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_Defs_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LIR_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_globals_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c2_globals_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c2_init_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/codeBuffer_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/compiledIC_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/copy_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/disassembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/foreign_globals_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/foreign_globals_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/frame_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/frame_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/frame_riscv.inline.hpp
- create mode 100644 src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
- create mode 100644 src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/gc/z/z_riscv64.ad
- create mode 100644 src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/globals_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/icBuffer_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/icache_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/icache_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/interp_masm_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/interp_masm_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/interpreterRT_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/jniTypes_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
- create mode 100644 src/hotspot/cpu/riscv/matcher_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/methodHandles_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/methodHandles_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/nativeInst_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/nativeInst_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/registerMap_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/registerMap_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/register_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/register_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/relocInfo_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/relocInfo_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/riscv.ad
- create mode 100644 src/hotspot/cpu/riscv/riscv_b.ad
- create mode 100644 src/hotspot/cpu/riscv/riscv_v.ad
- create mode 100644 src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/templateTable_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/templateTable_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/vmStructs_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/vm_version_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/vm_version_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp
- create mode 100644 src/hotspot/cpu/riscv/vtableStubs_riscv.cpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java
- create mode 100644 test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
-
-diff --git a/make/autoconf/build-aux/config.guess b/make/autoconf/build-aux/config.guess
-index a88a9adec3f..15111d827ab 100644
---- a/make/autoconf/build-aux/config.guess
-+++ b/make/autoconf/build-aux/config.guess
+diff --git a/.github/workflows/build-cross-compile.yml b/.github/workflows/build-cross-compile.yml
+index 385b097b9f..b1c333f711 100644
+--- a/.github/workflows/build-cross-compile.yml
++++ b/.github/workflows/build-cross-compile.yml
+@@ -54,28 +54,39 @@ jobs:
+           - arm
+           - s390x
+           - ppc64le
++          - riscv64
+         include:
+           - target-cpu: aarch64
+             gnu-arch: aarch64
+             debian-arch: arm64
+             debian-repository: https://httpredir.debian.org/debian/
+             debian-version: bullseye
++            tolerate-sysroot-errors: false
+           - target-cpu: arm
+             gnu-arch: arm
+             debian-arch: armhf
+             debian-repository: https://httpredir.debian.org/debian/
+             debian-version: bullseye
++            tolerate-sysroot-errors: false
+             gnu-abi: eabihf
+           - target-cpu: s390x
+             gnu-arch: s390x
+             debian-arch: s390x
+             debian-repository: https://httpredir.debian.org/debian/
+             debian-version: bullseye
++            tolerate-sysroot-errors: false
+           - target-cpu: ppc64le
+             gnu-arch: powerpc64le
+             debian-arch: ppc64el
+             debian-repository: https://httpredir.debian.org/debian/
+             debian-version: bullseye
++            tolerate-sysroot-errors: false
++          - target-cpu: riscv64
++            gnu-arch: riscv64
++            debian-arch: riscv64
++            debian-repository: https://httpredir.debian.org/debian/
++            debian-version: sid
++            tolerate-sysroot-errors: true
+ 
+     steps:
+       - name: 'Checkout the JDK source'
+@@ -113,6 +124,7 @@ jobs:
+         if: steps.get-cached-sysroot.outputs.cache-hit != 'true'
+ 
+       - name: 'Create sysroot'
++        id: create-sysroot
+         run: >
+           sudo debootstrap
+           --arch=${{ matrix.debian-arch }}
+@@ -123,6 +135,7 @@ jobs:
+           ${{ matrix.debian-version }}
+           sysroot
+           ${{ matrix.debian-repository }}
++        continue-on-error: ${{ matrix.tolerate-sysroot-errors }}
+         if: steps.get-cached-sysroot.outputs.cache-hit != 'true'
+ 
+       - name: 'Prepare sysroot'
+@@ -134,7 +147,12 @@ jobs:
+           rm -rf sysroot/usr/{sbin,bin,share}
+           rm -rf sysroot/usr/lib/{apt,gcc,udev,systemd}
+           rm -rf sysroot/usr/libexec/gcc
+-        if: steps.get-cached-sysroot.outputs.cache-hit != 'true'
++        if: steps.create-sysroot.outcome == 'success' && steps.get-cached-sysroot.outputs.cache-hit != 'true'
++
++      - name: 'Remove broken sysroot'
++        run: |
++          sudo rm -rf sysroot/
++        if: steps.create-sysroot.outcome != 'success' && steps.get-cached-sysroot.outputs.cache-hit != 'true'
+ 
+       - name: 'Configure'
+         run: >
+@@ -153,6 +171,7 @@ jobs:
+           echo "Dumping config.log:" &&
+           cat config.log &&
+           exit 1)
++        if: steps.create-sysroot.outcome == 'success' || steps.get-cached-sysroot.outputs.cache-hit == 'true'
+ 
+       - name: 'Build'
+         id: build
+@@ -160,3 +179,4 @@ jobs:
+         with:
+           make-target: 'hotspot ${{ inputs.make-arguments }}'
+           platform: linux-${{ matrix.target-cpu }}
++        if: steps.create-sysroot.outcome == 'success' || steps.get-cached-sysroot.outputs.cache-hit == 'true'
+diff --git a/.jcheck/conf b/.jcheck/conf
+index be7ad6d26f..e35eb77696 100644
+--- a/.jcheck/conf
++++ b/.jcheck/conf
+@@ -1,5 +1,5 @@
+ [general]
+-project=jdk-updates
++project=riscv-port
+ jbs=JDK
+ version=11.0.24
+ 
+diff --git a/make/autoconf/build-aux/config.sub b/make/autoconf/build-aux/config.sub
+index 3c280ac7c0..6c66c221e0 100644
+--- a/make/autoconf/build-aux/config.sub
++++ b/make/autoconf/build-aux/config.sub
 @@ -1,6 +1,6 @@
  #!/bin/sh
  #
--# Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights reserved.
-+# Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved.
- # Copyright (c) 2021, Azul Systems, Inc. All rights reserved.
+-# Copyright (c) 2014, 2020, Oracle and/or its affiliates. All rights reserved.
++# Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
  # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  #
+ # This code is free software; you can redistribute it and/or modify it
+@@ -40,6 +40,13 @@ if echo $* | grep pc-msys >/dev/null ; then
+     exit
+ fi
+ 
++# Canonicalize for riscv which autoconf-config.sub doesn't handle
++if echo $* | grep '^riscv\(32\|64\)-linux' >/dev/null ; then
++    result=`echo $@ | sed 's/linux/unknown-linux/'`
++    echo $result
++    exit
++fi
++
+ # First, filter out everything that doesn't begin with "aarch64-"
+ if ! echo $* | grep '^aarch64-' >/dev/null ; then
+     . $DIR/autoconf-config.sub "$@"
 diff --git a/make/autoconf/hotspot.m4 b/make/autoconf/hotspot.m4
-index 9bb34363e5c..f84e8f84c60 100644
+index 9bb34363e5..f84e8f84c6 100644
 --- a/make/autoconf/hotspot.m4
 +++ b/make/autoconf/hotspot.m4
 @@ -370,7 +370,8 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES],
@@ -353,32 +136,8 @@ index 9bb34363e5c..f84e8f84c60 100644
        AC_MSG_RESULT([yes])
      else
        DISABLED_JVM_FEATURES="$DISABLED_JVM_FEATURES shenandoahgc"
-diff --git a/make/autoconf/libraries.m4 b/make/autoconf/libraries.m4
-index 16e906bdc6a..5c49fd9285d 100644
---- a/make/autoconf/libraries.m4
-+++ b/make/autoconf/libraries.m4
-@@ -1,5 +1,5 @@
- #
--# Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved.
-+# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved.
- # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- #
- # This code is free software; you can redistribute it and/or modify it
-@@ -130,6 +130,12 @@ AC_DEFUN_ONCE([LIB_SETUP_LIBRARIES],
-     BASIC_JVM_LIBS="$BASIC_JVM_LIBS -lthread"
-   fi
- 
-+  # Because RISC-V only has word-sized atomics, it requries libatomic where
-+  # other common architectures do not.  So link libatomic by default.
-+  if test "x$OPENJDK_TARGET_OS" = xlinux && test "x$OPENJDK_TARGET_CPU" = xriscv64; then
-+    BASIC_JVM_LIBS="$BASIC_JVM_LIBS -latomic"
-+  fi
-+
-   # perfstat lib
-   if test "x$OPENJDK_TARGET_OS" = xaix; then
-     BASIC_JVM_LIBS="$BASIC_JVM_LIBS -lperfstat"
 diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4
-index 26a58eb2ee8..67972d89248 100644
+index 5d1d9efa39..565ca18e20 100644
 --- a/make/autoconf/platform.m4
 +++ b/make/autoconf/platform.m4
 @@ -1,5 +1,5 @@
@@ -397,17 +156,8 @@ index 26a58eb2ee8..67972d89248 100644
  
    # The cpu defines below are for zero, we don't support them directly.
    elif test "x$OPENJDK_$1_CPU" = xsparc; then
-@@ -564,8 +566,6 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER],
-     HOTSPOT_$1_CPU_DEFINE=S390
-   elif test "x$OPENJDK_$1_CPU" = xs390x; then
-     HOTSPOT_$1_CPU_DEFINE=S390
--  elif test "x$OPENJDK_$1_CPU" = xriscv64; then
--    HOTSPOT_$1_CPU_DEFINE=RISCV
-   elif test "x$OPENJDK_$1_CPU" = xloongarch64; then
-     HOTSPOT_$1_CPU_DEFINE=LOONGARCH64
-   elif test "x$OPENJDK_$1_CPU" != x; then
 diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk
-index c5a3ac5724b..67f4c6f0574 100644
+index c5a3ac5724..51137b99db 100644
 --- a/make/hotspot/gensrc/GensrcAdlc.gmk
 +++ b/make/hotspot/gensrc/GensrcAdlc.gmk
 @@ -1,5 +1,5 @@
@@ -417,13 +167,12 @@ index c5a3ac5724b..67f4c6f0574 100644
  # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  #
  # This code is free software; you can redistribute it and/or modify it
-@@ -150,6 +150,13 @@ ifeq ($(call check-jvm-feature, compiler2), true)
+@@ -150,6 +150,12 @@ ifeq ($(call check-jvm-feature, compiler2), true)
        $d/os_cpu/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH).ad \
      )))
  
 +  ifeq ($(HOTSPOT_TARGET_CPU_ARCH), riscv)
 +    AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
-+        $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_v.ad \
 +        $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_b.ad \
 +    )))
 +  endif
@@ -431,79 +180,9 @@ index c5a3ac5724b..67f4c6f0574 100644
    ifeq ($(call check-jvm-feature, shenandoahgc), true)
      AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
          $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/shenandoah/shenandoah_$(HOTSPOT_TARGET_CPU).ad \
-diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
-index fdd2c0ca3d7..63f193de86e 100644
---- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
-+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
-  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-@@ -1593,7 +1593,9 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
- }
- 
- 
--void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
-+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
-+                          LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
-+  assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on aarch64");
- 
-   Assembler::Condition acond, ncond;
-   switch (condition) {
-diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
-index f0a7229aa18..cb095052534 100644
---- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
-+++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2008, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -1824,7 +1824,10 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
- }
- 
- 
--void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
-+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
-+                          LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
-+  assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on arm");
-+
-   AsmCondition acond = al;
-   AsmCondition ncond = nv;
-   if (opr1 != opr2) {
-diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
-index 847f7d61d2f..d74db914331 100644
---- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
-+++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
-@@ -1,6 +1,6 @@
- /*
-- * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2012, 2019, SAP SE. All rights reserved.
-+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2012, 2021 SAP SE. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -1553,8 +1553,10 @@ inline void load_to_reg(LIR_Assembler *lasm, LIR_Opr src, LIR_Opr dst) {
-   }
- }
- 
-+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
-+                          LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
-+  assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on ppc");
- 
--void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
-   if (opr1->is_equal(opr2) || opr1->is_same_register(opr2)) {
-     load_to_reg(this, opr1, result); // Condition doesn't matter.
-     return;
 diff --git a/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp b/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp
 new file mode 100644
-index 00000000000..31c63abe71d
+index 0000000000..31c63abe71
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp
 @@ -0,0 +1,177 @@
@@ -686,10 +365,10 @@ index 00000000000..31c63abe71d
 +}
 diff --git a/src/hotspot/cpu/riscv/assembler_riscv.cpp b/src/hotspot/cpu/riscv/assembler_riscv.cpp
 new file mode 100644
-index 00000000000..f15ef5304c5
+index 0000000000..a83d43a8f1
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/assembler_riscv.cpp
-@@ -0,0 +1,372 @@
+@@ -0,0 +1,365 @@
 +/*
 + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
@@ -974,9 +653,9 @@ index 00000000000..f15ef5304c5
 +  }
 +#endif
 +  assert(is_unsigned_imm_in_range(imm64, 47, 0) || (imm64 == (uintptr_t)-1),
-+         "48-bit overflow in address constant");
-+  // Load upper 32 bits
-+  int32_t imm = imm64 >> 16;
++         "bit 47 overflows in address constant");
++  // Load upper 31 bits
++  int32_t imm = imm64 >> 17;
 +  int64_t upper = imm, lower = imm;
 +  lower = (lower << 52) >> 52;
 +  upper -= lower;
@@ -984,13 +663,13 @@ index 00000000000..f15ef5304c5
 +  lui(Rd, upper);
 +  addi(Rd, Rd, lower);
 +
-+  // Load the rest 16 bits.
++  // Load the rest 17 bits.
 +  slli(Rd, Rd, 11);
-+  addi(Rd, Rd, (imm64 >> 5) & 0x7ff);
-+  slli(Rd, Rd, 5);
++  addi(Rd, Rd, (imm64 >> 6) & 0x7ff);
++  slli(Rd, Rd, 6);
 +
 +  // This offset will be used by following jalr/ld.
-+  offset = imm64 & 0x1f;
++  offset = imm64 & 0x3f;
 +}
 +
 +void Assembler::movptr(Register Rd, uintptr_t imm64) {
@@ -1003,13 +682,6 @@ index 00000000000..f15ef5304c5
 +  addi(Rd, Rd, offset);
 +}
 +
-+void Assembler::ifence() {
-+  fence_i();
-+  if (UseConservativeFence) {
-+    fence(ir, ir);
-+  }
-+}
-+
 +#define INSN(NAME, NEG_INSN)                                                         \
 +  void Assembler::NAME(Register Rs, Register Rt, const address &dest) {              \
 +    NEG_INSN(Rt, Rs, dest);                                                          \
@@ -1064,10 +736,10 @@ index 00000000000..f15ef5304c5
 +}
 diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
 new file mode 100644
-index 00000000000..4923962a496
+index 0000000000..9e7d271860
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
-@@ -0,0 +1,3047 @@
+@@ -0,0 +1,3057 @@
 +/*
 + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
@@ -1253,13 +925,22 @@ index 00000000000..4923962a496
 +    : _base(noreg), _index(noreg), _offset(0), _mode(no_mode), _target(NULL) { }
 +  Address(Register r)
 +    : _base(r), _index(noreg), _offset(0), _mode(base_plus_offset), _target(NULL) { }
-+
-+  template<typename T, ENABLE_IF(std::is_integral<T>::value)>
-+  Address(Register r, T o)
-+    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) {}
-+
++  Address(Register r, int o)
++    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
++  Address(Register r, long o)
++    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
++  Address(Register r, long long o)
++    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
++  Address(Register r, unsigned int o)
++    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
++  Address(Register r, unsigned long o)
++    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
++  Address(Register r, unsigned long long o)
++    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
++#ifdef ASSERT
 +  Address(Register r, ByteSize disp)
-+    : Address(r, in_bytes(disp)) {}
++    : _base(r), _index(noreg), _offset(in_bytes(disp)), _mode(base_plus_offset), _target(0) { }
++#endif
 +  Address(address target, RelocationHolder const& rspec)
 +    : _base(noreg),
 +      _index(noreg),
@@ -1338,14 +1019,6 @@ index 00000000000..4923962a496
 +
 +  enum { instruction_size = 4 };
 +
-+  //---<  calculate length of instruction  >---
-+  // We just use the values set above.
-+  // instruction must start at passed address
-+  static unsigned int instr_len(unsigned char *instr) { return instruction_size; }
-+
-+  //---<  longest instructions  >---
-+  static unsigned int instr_maxlen() { return instruction_size; }
-+
 +  enum RoundingMode {
 +    rne = 0b000,     // round to Nearest, ties to Even
 +    rtz = 0b001,     // round towards Zero
@@ -1387,7 +1060,6 @@ index 00000000000..4923962a496
 +  void movptr(Register Rd, address addr);
 +  void movptr_with_offset(Register Rd, address addr, int32_t &offset);
 +  void movptr(Register Rd, uintptr_t imm64);
-+  void ifence();
 +  void j(const address &dest, Register temp = t0);
 +  void j(const Address &adr, Register temp = t0);
 +  void j(Label &l, Register temp = t0);
@@ -1966,7 +1638,6 @@ index 00000000000..4923962a496
 +    emit(insn);                                             \
 +  }
 +
-+  INSN(fence_i, 0b0001111, 0b001, 0b000000000000);
 +  INSN(ecall,   0b1110011, 0b000, 0b000000000000);
 +  INSN(_ebreak, 0b1110011, 0b000, 0b000000000001);
 +
@@ -3014,6 +2685,7 @@ index 00000000000..4923962a496
 +
 +// ====================================
 +// RISC-V Bit-Manipulation Extension
++// Currently only support Zba, Zbb and Zbs bitmanip extensions.
 +// ====================================
 +#define INSN(NAME, op, funct3, funct7)                  \
 +  void NAME(Register Rd, Register Rs1, Register Rs2) {  \
@@ -3088,6 +2760,7 @@ index 00000000000..4923962a496
 +
 +  INSN(rori,    0b0010011, 0b101, 0b011000);
 +  INSN(slli_uw, 0b0011011, 0b001, 0b000010);
++  INSN(bexti,   0b0010011, 0b101, 0b010010);
 +
 +#undef INSN
 +
@@ -4097,6 +3770,13 @@ index 00000000000..4923962a496
 +  Assembler(CodeBuffer* code) : AbstractAssembler(code), _in_compressible_region(false) {
 +  }
 +
++  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
++                                                Register tmp,
++                                                int offset) {
++    ShouldNotCallThis();
++    return RegisterOrConstant();
++  }
++
 +  // Stack overflow checking
 +  virtual void bang_stack_with_offset(int offset) { Unimplemented(); }
 +
@@ -4114,10 +3794,12 @@ index 00000000000..4923962a496
 +  virtual ~Assembler() {}
 +};
 +
++class BiasedLockingCounters;
++
 +#endif // CPU_RISCV_ASSEMBLER_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp
 new file mode 100644
-index 00000000000..7ffe8803985
+index 0000000000..7ffe880398
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp
 @@ -0,0 +1,47 @@
@@ -4170,10 +3852,10 @@ index 00000000000..7ffe8803985
 +#endif // CPU_RISCV_ASSEMBLER_RISCV_INLINE_HPP
 diff --git a/src/hotspot/cpu/riscv/bytes_riscv.hpp b/src/hotspot/cpu/riscv/bytes_riscv.hpp
 new file mode 100644
-index 00000000000..23d982f9abd
+index 0000000000..f60e0e38ae
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/bytes_riscv.hpp
-@@ -0,0 +1,167 @@
+@@ -0,0 +1,165 @@
 +/*
 + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2012, 2016 SAP SE. All rights reserved.
@@ -4203,8 +3885,6 @@ index 00000000000..23d982f9abd
 +#ifndef CPU_RISCV_BYTES_RISCV_HPP
 +#define CPU_RISCV_BYTES_RISCV_HPP
 +
-+#include "memory/allStatic.hpp"
-+
 +class Bytes: AllStatic {
 + public:
 +  // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
@@ -4343,10 +4023,10 @@ index 00000000000..23d982f9abd
 +#endif // CPU_RISCV_BYTES_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
 new file mode 100644
-index 00000000000..dcd0472c540
+index 0000000000..12980c12de
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
-@@ -0,0 +1,353 @@
+@@ -0,0 +1,339 @@
 +/*
 + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
@@ -4388,20 +4068,6 @@ index 00000000000..dcd0472c540
 +
 +#define __ ce->masm()->
 +
-+void C1SafepointPollStub::emit_code(LIR_Assembler* ce) {
-+  __ bind(_entry);
-+  InternalAddress safepoint_pc(__ pc() - __ offset() + safepoint_offset());
-+  __ code_section()->relocate(__ pc(), safepoint_pc.rspec());
-+  __ la(t0, safepoint_pc.target());
-+  __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset()));
-+
-+  assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
-+         "polling page return stub not created yet");
-+  address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
-+
-+  __ far_jump(RuntimeAddress(stub));
-+}
-+
 +void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
 +  __ bind(_entry);
 +  Metadata *m = _method->as_constant_ptr()->as_metadata();
@@ -4421,7 +4087,7 @@ index 00000000000..dcd0472c540
 +}
 +
 +RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index)
-+  : _index(index), _array(), _throw_index_out_of_bounds_exception(true) {
++  : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) {
 +  assert(info != NULL, "must have info");
 +  _info = new CodeEmitInfo(info);
 +}
@@ -4446,7 +4112,7 @@ index 00000000000..dcd0472c540
 +  if (_throw_index_out_of_bounds_exception) {
 +    stub_id = Runtime1::throw_index_exception_id;
 +  } else {
-+    assert(_array != LIR_Opr::nullOpr(), "sanity");
++    assert(_array != NULL, "sanity");
 +    __ mv(t1, _array->as_pointer_register());
 +    stub_id = Runtime1::throw_range_check_failed_id;
 +  }
@@ -4653,7 +4319,7 @@ index 00000000000..dcd0472c540
 +  const int args_num = 5;
 +  VMRegPair args[args_num];
 +  BasicType signature[args_num] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT };
-+  SharedRuntime::java_calling_convention(signature, args, args_num);
++  SharedRuntime::java_calling_convention(signature, args, args_num, true);
 +
 +  // push parameters
 +  Register r[args_num];
@@ -4702,7 +4368,7 @@ index 00000000000..dcd0472c540
 +#undef __
 diff --git a/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp b/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp
 new file mode 100644
-index 00000000000..4417ad63091
+index 0000000000..4417ad6309
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp
 @@ -0,0 +1,84 @@
@@ -4792,7 +4458,7 @@ index 00000000000..4417ad63091
 +#endif // CPU_RISCV_C1_DEFS_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp
 new file mode 100644
-index 00000000000..e3a2606c532
+index 0000000000..e3a2606c53
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp
 @@ -0,0 +1,30 @@
@@ -4828,7 +4494,7 @@ index 00000000000..e3a2606c532
 +// No FPU stack on RISCV
 diff --git a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp
 new file mode 100644
-index 00000000000..7bc3d311501
+index 0000000000..7bc3d31150
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp
 @@ -0,0 +1,32 @@
@@ -4866,7 +4532,7 @@ index 00000000000..7bc3d311501
 +#endif // CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
 new file mode 100644
-index 00000000000..172031941b2
+index 0000000000..682ebe8262
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
 @@ -0,0 +1,388 @@
@@ -5028,8 +4694,8 @@ index 00000000000..172031941b2
 +LIR_Opr FrameMap::fpu10_float_opr;
 +LIR_Opr FrameMap::fpu10_double_opr;
 +
-+LIR_Opr FrameMap::_caller_save_cpu_regs[] = {};
-+LIR_Opr FrameMap::_caller_save_fpu_regs[] = {};
++LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, };
++LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, };
 +
 +//--------------------------------------------------------
 +//               FrameMap
@@ -5186,7 +4852,7 @@ index 00000000000..172031941b2
 +
 +  VMRegPair regs;
 +  BasicType sig_bt = T_OBJECT;
-+  SharedRuntime::java_calling_convention(&sig_bt, &regs, 1);
++  SharedRuntime::java_calling_convention(&sig_bt, &regs, 1, true);
 +  receiver_opr = as_oop_opr(regs.first()->as_Register());
 +
 +  for (i = 0; i < nof_caller_save_fpu_regs; i++) {
@@ -5260,7 +4926,7 @@ index 00000000000..172031941b2
 +}
 diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp
 new file mode 100644
-index 00000000000..01281f5c9e1
+index 0000000000..01281f5c9e
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp
 @@ -0,0 +1,148 @@
@@ -5414,10 +5080,10 @@ index 00000000000..01281f5c9e1
 +#endif // CPU_RISCV_C1_FRAMEMAP_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
 new file mode 100644
-index 00000000000..4c1c13dc290
+index 0000000000..2a99d49c94
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
-@@ -0,0 +1,281 @@
+@@ -0,0 +1,285 @@
 +/*
 + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
@@ -5610,7 +5276,7 @@ index 00000000000..4c1c13dc290
 +        code == lir_add ? __ add(dreg, lreg_lo, c) : __ sub(dreg, lreg_lo, c);
 +        break;
 +      case lir_div:
-+        assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
++        assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant");
 +        if (c == 1) {
 +          // move lreg_lo to dreg if divisor is 1
 +          __ mv(dreg, lreg_lo);
@@ -5628,7 +5294,7 @@ index 00000000000..4c1c13dc290
 +        }
 +        break;
 +      case lir_rem:
-+        assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
++        assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant");
 +        if (c == 1) {
 +          // move 0 to dreg if divisor is 1
 +          __ mv(dreg, zr);
@@ -5658,7 +5324,9 @@ index 00000000000..4c1c13dc290
 +  switch (code) {
 +    case lir_add: __ fadd_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
 +    case lir_sub: __ fsub_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
++    case lir_mul_strictfp: // fall through
 +    case lir_mul: __ fmul_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
++    case lir_div_strictfp: // fall through
 +    case lir_div: __ fdiv_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
 +    default:
 +      ShouldNotReachHere();
@@ -5671,7 +5339,9 @@ index 00000000000..4c1c13dc290
 +    switch (code) {
 +      case lir_add: __ fadd_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
 +      case lir_sub: __ fsub_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
++      case lir_mul_strictfp: // fall through
 +      case lir_mul: __ fmul_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
++      case lir_div_strictfp: // fall through
 +      case lir_div: __ fdiv_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
 +      default:
 +        ShouldNotReachHere();
@@ -5701,7 +5371,7 @@ index 00000000000..4c1c13dc290
 +#undef __
 diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp
 new file mode 100644
-index 00000000000..ab0a9963fc1
+index 0000000000..ab0a9963fc
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp
 @@ -0,0 +1,37 @@
@@ -5744,7 +5414,7 @@ index 00000000000..ab0a9963fc1
 +#endif // CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp
 new file mode 100644
-index 00000000000..b7f53e395f3
+index 0000000000..b7f53e395f
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp
 @@ -0,0 +1,388 @@
@@ -6138,7 +5808,7 @@ index 00000000000..b7f53e395f3
 +#undef __
 diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp
 new file mode 100644
-index 00000000000..06a0f248ca6
+index 0000000000..06a0f248ca
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp
 @@ -0,0 +1,52 @@
@@ -6196,10 +5866,10 @@ index 00000000000..06a0f248ca6
 +#endif // CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
 new file mode 100644
-index 00000000000..742c2126e60
+index 0000000000..1e482d7cc2
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -0,0 +1,2267 @@
+@@ -0,0 +1,2268 @@
 +/*
 + * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
@@ -6243,7 +5913,6 @@ index 00000000000..742c2126e60
 +#include "oops/objArrayKlass.hpp"
 +#include "runtime/frame.inline.hpp"
 +#include "runtime/sharedRuntime.hpp"
-+#include "utilities/powerOfTwo.hpp"
 +#include "vmreg_riscv.inline.hpp"
 +
 +#ifndef PRODUCT
@@ -6293,18 +5962,6 @@ index 00000000000..742c2126e60
 +
 +bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; }
 +
-+void LIR_Assembler::clinit_barrier(ciMethod* method) {
-+  assert(VM_Version::supports_fast_class_init_checks(), "sanity");
-+  assert(!method->holder()->is_not_initialized(), "initialization should have been started");
-+
-+  Label L_skip_barrier;
-+
-+  __ mov_metadata(t1, method->holder()->constant_encoding());
-+  __ clinit_barrier(t1, t0, &L_skip_barrier /* L_fast_path */);
-+  __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
-+  __ bind(L_skip_barrier);
-+}
-+
 +LIR_Opr LIR_Assembler::receiverOpr() {
 +  return FrameMap::receiver_opr;
 +}
@@ -6569,11 +6226,7 @@ index 00000000000..742c2126e60
 +  if (method()->is_synchronized()) {
 +    monitor_address(0, FrameMap::r10_opr);
 +    stub = new MonitorExitStub(FrameMap::r10_opr, true, 0);
-+    if (UseHeavyMonitors) {
-+      __ j(*stub->entry());
-+    } else {
-+      __ unlock_object(x15, x14, x10, *stub->entry());
-+    }
++    __ unlock_object(x15, x14, x10, *stub->entry());
 +    __ bind(*stub->continuation());
 +  }
 +
@@ -6626,7 +6279,7 @@ index 00000000000..742c2126e60
 +  return offset;
 +}
 +
-+void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) {
++void LIR_Assembler::return_op(LIR_Opr result) {
 +  assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == x10, "word returns are in x10");
 +
 +  // Pop the stack before the safepoint code
@@ -6636,18 +6289,20 @@ index 00000000000..742c2126e60
 +    __ reserved_stack_check();
 +  }
 +
-+  code_stub->set_safepoint_offset(__ offset());
-+  __ relocate(relocInfo::poll_return_type);
-+  __ safepoint_poll(*code_stub->entry(), true /* at_return */, false /* acquire */, true /* in_nmethod */);
++  address polling_page(os::get_polling_page());
++  __ read_polling_page(t0, polling_page, relocInfo::poll_return_type);
 +  __ ret();
 +}
 +
 +int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
++  address polling_page(os::get_polling_page());
 +  guarantee(info != NULL, "Shouldn't be NULL");
-+  __ get_polling_page(t0, relocInfo::poll_type);
++  assert(os::is_poll_address(polling_page), "should be");
++  int32_t offset = 0;
++  __ get_polling_page(t0, polling_page, offset, relocInfo::poll_type);
 +  add_debug_info_for_branch(info);  // This isn't just debug info:
 +                                    // it's the oop map
-+  __ read_polling_page(t0, 0, relocInfo::poll_type);
++  __ read_polling_page(t0, offset, relocInfo::poll_type);
 +  return __ offset();
 +}
 +
@@ -6878,7 +6533,7 @@ index 00000000000..742c2126e60
 +  }
 +}
 +
-+void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide) {
++void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) {
 +  LIR_Address* to_addr = dest->as_address_ptr();
 +  // t0 was used as tmp reg in as_Address, so we use t1 as compressed_src
 +  Register compressed_src = t1;
@@ -7000,7 +6655,7 @@ index 00000000000..742c2126e60
 +  reg2stack(temp, dest, dest->type(), false);
 +}
 +
-+void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide) {
++void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) {
 +  assert(src->is_address(), "should not call otherwise");
 +  assert(dest->is_register(), "should not call otherwise");
 +
@@ -7045,7 +6700,14 @@ index 00000000000..742c2126e60
 +      __ ld(dest->as_register(), as_Address(from_addr));
 +      break;
 +    case T_ADDRESS:
-+      __ ld(dest->as_register(), as_Address(from_addr));
++      // FIXME: OMG this is a horrible kludge.  Any offset from an
++      // address that matches klass_offset_in_bytes() will be loaded
++      // as a word, not a long.
++      if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) {
++        __ lwu(dest->as_register(), as_Address(from_addr));
++      } else {
++        __ ld(dest->as_register(), as_Address(from_addr));
++      }
 +      break;
 +    case T_INT:
 +      __ lw(dest->as_register(), as_Address(from_addr));
@@ -7073,10 +6735,10 @@ index 00000000000..742c2126e60
 +    if (UseCompressedOops && !wide) {
 +      __ decode_heap_oop(dest->as_register());
 +    }
-+
-+    if (!UseZGC) {
-+      // Load barrier has not yet been applied, so ZGC can't verify the oop here
-+      __ verify_oop(dest->as_register());
++    __ verify_oop(dest->as_register());
++  } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) {
++    if (UseCompressedClassPointers) {
++      __ decode_klass_not_null(dest->as_register());
 +    }
 +  }
 +}
@@ -7119,11 +6781,13 @@ index 00000000000..742c2126e60
 +  Label done;
 +  move_op(opr2, result, type, lir_patch_none, NULL,
 +          false,   // pop_fpu_stack
++          false,   // unaligned
 +          false);  // wide
 +  __ j(done);
 +  __ bind(label);
 +  move_op(opr1, result, type, lir_patch_none, NULL,
 +          false,   // pop_fpu_stack
++          false,   // unaligned
 +          false);  // wide
 +  __ bind(done);
 +}
@@ -7470,7 +7134,7 @@ index 00000000000..742c2126e60
 +    assert(op->addr()->is_address(), "what else?");
 +    LIR_Address* addr_ptr = op->addr()->as_address_ptr();
 +    assert(addr_ptr->disp() == 0, "need 0 disp");
-+    assert(addr_ptr->index() == LIR_Opr::illegalOpr(), "need 0 index");
++    assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index");
 +    addr = as_reg(addr_ptr->base());
 +  }
 +  Register newval = as_reg(op->new_value());
@@ -7586,6 +7250,11 @@ index 00000000000..742c2126e60
 +  add_call_info(code_offset(), op->info());
 +}
 +
++/* Currently, vtable-dispatch is only enabled for sparc platforms */
++void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) {
++  ShouldNotReachHere();
++}
++
 +void LIR_Assembler::emit_static_call_stub() {
 +  address call_pc = __ pc();
 +  assert((__ offset() % 4) == 0, "bad alignment");
@@ -7711,12 +7380,16 @@ index 00000000000..742c2126e60
 +  Register obj = op->obj_opr()->as_register();  // may not be an oop
 +  Register hdr = op->hdr_opr()->as_register();
 +  Register lock = op->lock_opr()->as_register();
-+  if (UseHeavyMonitors) {
++  if (!UseFastLocking) {
 +    __ j(*op->stub()->entry());
 +  } else if (op->code() == lir_lock) {
++    Register scratch = noreg;
++    if (UseBiasedLocking) {
++      scratch = op->scratch_opr()->as_register();
++    }
 +    assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
 +    // add debug info for NullPointerException only if one is possible
-+    int null_check_offset = __ lock_object(hdr, obj, lock, *op->stub()->entry());
++    int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry());
 +    if (op->info() != NULL) {
 +      add_debug_info_for_null_check(null_check_offset, op->info());
 +    }
@@ -7729,23 +7402,6 @@ index 00000000000..742c2126e60
 +  __ bind(*op->stub()->continuation());
 +}
 +
-+void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) {
-+  Register obj = op->obj()->as_pointer_register();
-+  Register result = op->result_opr()->as_pointer_register();
-+
-+  CodeEmitInfo* info = op->info();
-+  if (info != NULL) {
-+    add_debug_info_for_null_check_here(info);
-+  }
-+
-+  if (UseCompressedClassPointers) {
-+    __ lwu(result, Address(obj, oopDesc::klass_offset_in_bytes()));
-+    __ decode_klass_not_null(result);
-+  } else {
-+    __ ld(result, Address(obj, oopDesc::klass_offset_in_bytes()));
-+  }
-+}
-+
 +void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
 +  ciMethod* method = op->profiled_method();
 +  int bci          = op->profiled_bci();
@@ -8016,11 +7672,14 @@ index 00000000000..742c2126e60
 +
 +
 +void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
-+  if (patch_code != lir_patch_none) {
++#if INCLUDE_SHENANDOAHGC
++  if (UseShenandoahGC && patch_code != lir_patch_none) {
 +    deoptimize_trap(info);
 +    return;
 +  }
++#endif
 +
++  assert(patch_code == lir_patch_none, "Patch code not supported");
 +  LIR_Address* adr = addr->as_address_ptr();
 +  Register dst = dest->as_register_lo();
 +
@@ -8063,7 +7722,7 @@ index 00000000000..742c2126e60
 +
 +void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) {
 +  if (dest->is_address() || src->is_address()) {
-+    move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, /* wide */ false);
++    move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, /*unaligned*/ false, /* wide */ false);
 +  } else {
 +    ShouldNotReachHere();
 +  }
@@ -8223,6 +7882,18 @@ index 00000000000..742c2126e60
 +  }
 +}
 +
++void LIR_Assembler::set_24bit_FPU() { Unimplemented(); }
++
++void LIR_Assembler::reset_FPU() { Unimplemented(); }
++
++void LIR_Assembler::fpop() { Unimplemented(); }
++
++void LIR_Assembler::fxch(int i) { Unimplemented(); }
++
++void LIR_Assembler::fld(int i) { Unimplemented(); }
++
++void LIR_Assembler::ffree(int i) { Unimplemented(); }
++
 +void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) {
 +  __ cmpxchg(addr, cmpval, newval, Assembler::int32, Assembler::aq /* acquire */,
 +             Assembler::rl /* release */, t0, true /* result as bool */);
@@ -8469,10 +8140,10 @@ index 00000000000..742c2126e60
 +#undef __
 diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
 new file mode 100644
-index 00000000000..051328c3a8a
+index 0000000000..5c81f1c704
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
-@@ -0,0 +1,132 @@
+@@ -0,0 +1,133 @@
 +/*
 + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
@@ -8548,6 +8219,7 @@ index 00000000000..051328c3a8a
 +    // CompiledStaticCall::to_interp_stub_size() (14) + CompiledStaticCall::to_trampoline_stub_size() (1 + 3 + address)
 +    _call_stub_size = 14 * NativeInstruction::instruction_size +
 +                      (NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size),
++    _call_aot_stub_size = 0,
 +    // See emit_exception_handler for detail
 +    // verify_not_null_oop + far_call + should_not_reach_here + invalidate_registers(DEBUG_ONLY)
 +    _exception_handler_size = DEBUG_ONLY(584) NOT_DEBUG(548), // or smaller
@@ -8607,10 +8279,10 @@ index 00000000000..051328c3a8a
 +#endif // CPU_RISCV_C1_LIRASSEMBLER_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
 new file mode 100644
-index 00000000000..e126f148cdf
+index 0000000000..c41819fc2a
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-@@ -0,0 +1,1075 @@
+@@ -0,0 +1,1094 @@
 +/*
 + * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
@@ -8651,7 +8323,6 @@ index 00000000000..e126f148cdf
 +#include "ci/ciTypeArrayKlass.hpp"
 +#include "runtime/sharedRuntime.hpp"
 +#include "runtime/stubRoutines.hpp"
-+#include "utilities/powerOfTwo.hpp"
 +#include "vmreg_riscv.inline.hpp"
 +
 +#ifdef ASSERT
@@ -8819,6 +8490,7 @@ index 00000000000..e126f148cdf
 +      break;
 +    default:
 +      ShouldNotReachHere();
++      r = NULL;
 +  }
 +  return r;
 +}
@@ -8890,6 +8562,11 @@ index 00000000000..e126f148cdf
 +
 +  // "lock" stores the address of the monitor stack slot, so this is not an oop
 +  LIR_Opr lock = new_register(T_INT);
++  // Need a scratch register for biased locking
++  LIR_Opr scratch = LIR_OprFact::illegalOpr;
++  if (UseBiasedLocking) {
++    scratch = new_register(T_INT);
++  }
 +
 +  CodeEmitInfo* info_for_exception = NULL;
 +  if (x->needs_null_check()) {
@@ -8898,7 +8575,7 @@ index 00000000000..e126f148cdf
 +  // this CodeEmitInfo must not have the xhandlers because here the
 +  // object is already locked (xhandlers expect object to be unlocked)
 +  CodeEmitInfo* info = state_for(x, x->state(), true);
-+  monitor_enter(obj.result(), lock, syncTempOpr(), LIR_OprFact::illegalOpr,
++  monitor_enter(obj.result(), lock, syncTempOpr(), scratch,
 +                x->monitor_no(), info_for_exception, info);
 +}
 +
@@ -8968,7 +8645,12 @@ index 00000000000..e126f148cdf
 +  right.load_item();
 +
 +  LIR_Opr reg = rlock(x);
-+  arithmetic_op_fpu(x->op(), reg, left.result(), right.result());
++  LIR_Opr tmp = LIR_OprFact::illegalOpr;
++  if (x->is_strictfp() && (x->op() == Bytecodes::_dmul || x->op() == Bytecodes::_ddiv)) {
++    tmp = new_register(T_DOUBLE);
++  }
++
++  arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp());
 +
 +  set_result(x, round_item(reg));
 +}
@@ -8990,7 +8672,7 @@ index 00000000000..e126f148cdf
 +      // no need to do div-by-zero check if the divisor is a non-zero constant
 +      if (c != 0) { need_zero_check = false; }
 +      // do not load right if the divisor is a power-of-2 constant
-+      if (c > 0 && is_power_of_2(c)) {
++      if (c > 0 && is_power_of_2_long(c)) {
 +        right.dont_load_item();
 +      } else {
 +        right.load_item();
@@ -9001,7 +8683,7 @@ index 00000000000..e126f148cdf
 +    if (need_zero_check) {
 +      CodeEmitInfo* info = state_for(x);
 +      __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0));
-+      __ branch(lir_cond_equal, new DivByZeroStub(info));
++      __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info));
 +    }
 +
 +    rlock_result(x);
@@ -9075,7 +8757,7 @@ index 00000000000..e126f148cdf
 +    if (need_zero_check) {
 +      CodeEmitInfo* info = state_for(x);
 +      __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0));
-+      __ branch(lir_cond_equal, new DivByZeroStub(info));
++      __ branch(lir_cond_equal, T_INT, new DivByZeroStub(info));
 +    }
 +
 +    LIR_Opr ill = LIR_OprFact::illegalOpr;
@@ -9254,16 +8936,14 @@ index 00000000000..e126f148cdf
 +      do_LibmIntrinsic(x);
 +      break;
 +    case vmIntrinsics::_dabs: // fall through
-+    case vmIntrinsics::_dsqrt: // fall through
-+    case vmIntrinsics::_dsqrt_strict: {
++    case vmIntrinsics::_dsqrt: {
 +      assert(x->number_of_arguments() == 1, "wrong type");
 +      LIRItem value(x->argument_at(0), this);
 +      value.load_item();
 +      LIR_Opr dst = rlock_result(x);
 +
 +      switch (x->id()) {
-+        case vmIntrinsics::_dsqrt: // fall through
-+        case vmIntrinsics::_dsqrt_strict: {
++        case vmIntrinsics::_dsqrt: {
 +          __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
 +          break;
 +        }
@@ -9284,19 +8964,30 @@ index 00000000000..e126f148cdf
 +void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
 +  LIRItem value(x->argument_at(0), this);
 +  value.set_destroys_register();
++
 +  LIR_Opr calc_result = rlock_result(x);
 +  LIR_Opr result_reg = result_register_for(x->type());
++
 +  CallingConvention* cc = NULL;
-+  BasicTypeList signature(1);
-+  signature.append(T_DOUBLE);
-+  if (x->id() == vmIntrinsics::_dpow) { signature.append(T_DOUBLE); }
-+  cc = frame_map()->c_calling_convention(&signature);
-+  value.load_item_force(cc->at(0));
++
 +  if (x->id() == vmIntrinsics::_dpow) {
 +    LIRItem value1(x->argument_at(1), this);
++
 +    value1.set_destroys_register();
++
++    BasicTypeList signature(2);
++    signature.append(T_DOUBLE);
++    signature.append(T_DOUBLE);
++    cc = frame_map()->c_calling_convention(&signature);
++    value.load_item_force(cc->at(0));
 +    value1.load_item_force(cc->at(1));
++  } else {
++    BasicTypeList signature(1);
++    signature.append(T_DOUBLE);
++    cc = frame_map()->c_calling_convention(&signature);
++    value.load_item_force(cc->at(0));
 +  }
++
 +  switch (x->id()) {
 +    case vmIntrinsics::_dexp:
 +      if (StubRoutines::dexp() != NULL) { __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args()); }
@@ -9663,9 +9354,9 @@ index 00000000000..e126f148cdf
 +  profile_branch(x, cond);
 +  move_to_phi(x->state());
 +  if (x->x()->type()->is_float_kind()) {
-+    __ branch(lir_cond(cond), x->tsux(), x->usux());
++    __ branch(lir_cond(cond), right->type(), x->tsux(), x->usux());
 +  } else {
-+    __ branch(lir_cond(cond), x->tsux());
++    __ branch(lir_cond(cond), right->type(), x->tsux());
 +  }
 +  assert(x->default_sux() == x->fsux(), "wrong destination above");
 +  __ jump(x->default_sux());
@@ -9688,7 +9379,7 @@ index 00000000000..e126f148cdf
 +}
 diff --git a/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp
 new file mode 100644
-index 00000000000..5f1c394ab3d
+index 0000000000..0317ed9003
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp
 @@ -0,0 +1,55 @@
@@ -9721,22 +9412,22 @@ index 00000000000..5f1c394ab3d
 +#include "asm/register.hpp"
 +#include "c1/c1_LIR.hpp"
 +
-+FloatRegister LIR_Opr::as_float_reg() const {
++FloatRegister LIR_OprDesc::as_float_reg() const {
 +  return as_FloatRegister(fpu_regnr());
 +}
 +
-+FloatRegister LIR_Opr::as_double_reg() const {
++FloatRegister LIR_OprDesc::as_double_reg() const {
 +  return as_FloatRegister(fpu_regnrLo());
 +}
 +
 +// Reg2 unused.
 +LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) {
 +  assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform");
-+  return (LIR_Opr)(intptr_t)((reg1 << LIR_Opr::reg1_shift) |
-+                             (reg1 << LIR_Opr::reg2_shift) |
-+                             LIR_Opr::double_type          |
-+                             LIR_Opr::fpu_register         |
-+                             LIR_Opr::double_size);
++  return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) |
++                             (reg1 << LIR_OprDesc::reg2_shift) |
++                             LIR_OprDesc::double_type          |
++                             LIR_OprDesc::fpu_register         |
++                             LIR_OprDesc::double_size);
 +}
 +
 +#ifndef PRODUCT
@@ -9749,7 +9440,7 @@ index 00000000000..5f1c394ab3d
 +#endif // PRODUCT
 diff --git a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp
 new file mode 100644
-index 00000000000..78a61128bdd
+index 0000000000..78a61128bd
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp
 @@ -0,0 +1,33 @@
@@ -9788,7 +9479,7 @@ index 00000000000..78a61128bdd
 +}
 diff --git a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp
 new file mode 100644
-index 00000000000..d7ca7b0fd05
+index 0000000000..d7ca7b0fd0
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp
 @@ -0,0 +1,83 @@
@@ -9877,10 +9568,10 @@ index 00000000000..d7ca7b0fd05
 +#endif // CPU_RISCV_C1_LINEARSCAN_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
 new file mode 100644
-index 00000000000..6f656c8c533
+index 0000000000..99d981f97f
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-@@ -0,0 +1,432 @@
+@@ -0,0 +1,443 @@
 +/*
 + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
@@ -9916,8 +9607,8 @@ index 00000000000..6f656c8c533
 +#include "gc/shared/collectedHeap.hpp"
 +#include "interpreter/interpreter.hpp"
 +#include "oops/arrayOop.hpp"
-+#include "oops/markWord.hpp"
 +#include "runtime/basicLock.hpp"
++#include "runtime/biasedLocking.hpp"
 +#include "runtime/os.hpp"
 +#include "runtime/sharedRuntime.hpp"
 +#include "runtime/stubRoutines.hpp"
@@ -9933,7 +9624,7 @@ index 00000000000..6f656c8c533
 +  }
 +}
 +
-+int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) {
++int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) {
 +  const int aligned_mask = BytesPerWord - 1;
 +  const int hdr_offset = oopDesc::mark_offset_in_bytes();
 +  assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
@@ -9945,19 +9636,17 @@ index 00000000000..6f656c8c533
 +  // save object being locked into the BasicObjectLock
 +  sd(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
 +
-+  null_check_offset = offset();
-+
-+  if (DiagnoseSyncOnValueBasedClasses != 0) {
-+    load_klass(hdr, obj);
-+    lwu(hdr, Address(hdr, Klass::access_flags_offset()));
-+    andi(t0, hdr, JVM_ACC_IS_VALUE_BASED_CLASS);
-+    bnez(t0, slow_case, true /* is_far */);
++  if (UseBiasedLocking) {
++    assert(scratch != noreg, "should have scratch register at this point");
++    null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case);
++  } else {
++    null_check_offset = offset();
 +  }
 +
 +  // Load object header
 +  ld(hdr, Address(obj, hdr_offset));
 +  // and mark it as unlocked
-+  ori(hdr, hdr, markWord::unlocked_value);
++  ori(hdr, hdr, markOopDesc::unlocked_value);
 +  // save unlocked object header into the displaced header location on the stack
 +  sd(hdr, Address(disp_hdr, 0));
 +  // test if object header is still the same (i.e. unlocked), and if so, store the
@@ -9988,6 +9677,10 @@ index 00000000000..6f656c8c533
 +  // otherwise we don't care about the result and handle locking via runtime call
 +  bnez(hdr, slow_case, /* is_far */ true);
 +  bind(done);
++  if (PrintBiasedLockingStatistics) {
++    la(t1, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr()));
++    add_memory_int32(Address(t1, 0), 1);
++  }
 +  return null_check_offset;
 +}
 +
@@ -9997,13 +9690,21 @@ index 00000000000..6f656c8c533
 +  assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
 +  Label done;
 +
++  if (UseBiasedLocking) {
++    // load object
++    ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
++    biased_locking_exit(obj, hdr, done);
++  }
++
 +  // load displaced header
 +  ld(hdr, Address(disp_hdr, 0));
 +  // if the loaded hdr is NULL we had recursive locking
 +  // if we had recursive locking, we are done
 +  beqz(hdr, done);
-+  // load object
-+  ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
++  if (!UseBiasedLocking) {
++    // load object
++    ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
++  }
 +  verify_oop(obj);
 +  // test if object header is pointing to the displaced header, and if so, restore
 +  // the displaced header in the object - if the object header is not pointing to
@@ -10030,8 +9731,13 @@ index 00000000000..6f656c8c533
 +
 +void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2) {
 +  assert_different_registers(obj, klass, len);
-+  // This assumes that all prototype bits fitr in an int32_t
-+  mv(tmp1, (int32_t)(intptr_t)markWord::prototype().value());
++  if (UseBiasedLocking && !len->is_valid()) {
++    assert_different_registers(obj, klass, len, tmp1, tmp2);
++    ld(tmp1, Address(klass, Klass::prototype_header_offset()));
++  } else {
++    // This assumes that all prototype bits fitr in an int32_t
++    mv(tmp1, (int32_t)(intptr_t)markOopDesc::prototype());
++  }
 +  sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes()));
 +
 +  if (UseCompressedClassPointers) { // Take care not to kill klass
@@ -10185,15 +9891,17 @@ index 00000000000..6f656c8c533
 +}
 +
 +void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) {
++  // If we have to make this method not-entrant we'll overwrite its
++  // first instruction with a jump. For this action to be legal we
++  // must ensure that this first instruction is a J, JAL or NOP.
++  // Make it a NOP.
++  nop();
++
 +  assert(bang_size_in_bytes >= framesize, "stack bang size incorrect");
 +  // Make sure there is enough stack space for this method's activation.
 +  // Note that we do this before creating a frame.
 +  generate_stack_overflow_check(bang_size_in_bytes);
 +  MacroAssembler::build_frame(framesize);
-+
-+  // Insert nmethod entry barrier into frame.
-+  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+  bs->nmethod_entry_barrier(this);
 +}
 +
 +void C1_MacroAssembler::remove_frame(int framesize) {
@@ -10201,13 +9909,7 @@ index 00000000000..6f656c8c533
 +}
 +
 +
-+void C1_MacroAssembler::verified_entry(bool breakAtEntry) {
-+  // If we have to make this method not-entrant we'll overwrite its
-+  // first instruction with a jump. For this action to be legal we
-+  // must ensure that this first instruction is a J, JAL or NOP.
-+  // Make it a NOP.
-+
-+  nop();
++void C1_MacroAssembler::verified_entry() {
 +}
 +
 +void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) {
@@ -10315,10 +10017,10 @@ index 00000000000..6f656c8c533
 +}
 diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
 new file mode 100644
-index 00000000000..dfd3c17d7c7
+index 0000000000..1950cee5dd
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
-@@ -0,0 +1,120 @@
+@@ -0,0 +1,121 @@
 +/*
 + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
@@ -10380,8 +10082,9 @@ index 00000000000..dfd3c17d7c7
 +  // hdr     : must be x10, contents destroyed
 +  // obj     : must point to the object to lock, contents preserved
 +  // disp_hdr: must point to the displaced header location, contents preserved
++  // scratch : scratch register, contents destroyed
 +  // returns code offset at which to add null check debug information
-+  int lock_object  (Register swap, Register obj, Register disp_hdr, Label& slow_case);
++  int lock_object  (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case);
 +
 +  // unlocking
 +  // hdr     : contents destroyed
@@ -10441,10 +10144,10 @@ index 00000000000..dfd3c17d7c7
 +#endif // CPU_RISCV_C1_MACROASSEMBLER_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
 new file mode 100644
-index 00000000000..f523c9ed50a
+index 0000000000..329df2e1ca
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
-@@ -0,0 +1,1172 @@
+@@ -0,0 +1,1210 @@
 +/*
 + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
@@ -10493,7 +10196,6 @@ index 00000000000..f523c9ed50a
 +#include "runtime/stubRoutines.hpp"
 +#include "runtime/vframe.hpp"
 +#include "runtime/vframeArray.hpp"
-+#include "utilities/powerOfTwo.hpp"
 +#include "vmreg_riscv.inline.hpp"
 +
 +
@@ -10614,19 +10316,14 @@ index 00000000000..f523c9ed50a
 +  return call_RT(oop_result, metadata_result, entry, arg_num);
 +}
 +
-+enum return_state_t {
-+  does_not_return, requires_return
-+};
-+
 +// Implementation of StubFrame
 +
 +class StubFrame: public StackObj {
 + private:
 +  StubAssembler* _sasm;
-+  bool _return_state;
 +
 + public:
-+  StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state=requires_return);
++  StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments);
 +  void load_argument(int offset_in_words, Register reg);
 +
 +  ~StubFrame();
@@ -10644,9 +10341,8 @@ index 00000000000..f523c9ed50a
 +
 +#define __ _sasm->
 +
-+StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state) {
++StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) {
 +  _sasm = sasm;
-+  _return_state = return_state;
 +  __ prologue(name, must_gc_arguments);
 +}
 +
@@ -10658,11 +10354,7 @@ index 00000000000..f523c9ed50a
 +
 +
 +StubFrame::~StubFrame() {
-+  if (_return_state == requires_return) {
-+    __ epilogue();
-+  } else {
-+    __ should_not_reach_here();
-+  }
++  __ epilogue();
 +  _sasm = NULL;
 +}
 +
@@ -10825,6 +10517,7 @@ index 00000000000..f523c9ed50a
 +  assert_cond(oop_maps != NULL);
 +  oop_maps->add_gc_map(call_offset, oop_map);
 +
++  __ should_not_reach_here();
 +  return oop_maps;
 +}
 +
@@ -10872,7 +10565,9 @@ index 00000000000..f523c9ed50a
 +      sasm->set_frame_size(frame_size);
 +      break;
 +    }
-+    default: ShouldNotReachHere();
++    default:
++      __ should_not_reach_here();
++      break;
 +  }
 +
 +  // verify that only x10 and x13 are valid at this time
@@ -10928,6 +10623,9 @@ index 00000000000..f523c9ed50a
 +      restore_live_registers(sasm, id != handle_exception_nofpu_id);
 +      break;
 +    case handle_exception_from_callee_id:
++      // Pop the return address.
++      __ leave();
++      __ ret();  // jump to exception handler
 +      break;
 +    default: ShouldNotReachHere();
 +  }
@@ -11032,37 +10730,80 @@ index 00000000000..f523c9ed50a
 +#endif
 +  __ reset_last_Java_frame(true);
 +
++  // check for pending exceptions
++  { Label L;
++    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
++    __ beqz(t0, L);
++    // exception pending => remove activation and forward to exception handler
++
++    { Label L1;
++      __ bnez(x10, L1);                                 // have we deoptimized?
++      __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
++      __ bind(L1);
++    }
++
++    // the deopt blob expects exceptions in the special fields of
++    // JavaThread, so copy and clear pending exception.
++
++    // load and clear pending exception
++    __ ld(x10, Address(xthread, Thread::pending_exception_offset()));
++    __ sd(zr, Address(xthread, Thread::pending_exception_offset()));
++
++    // check that there is really a valid exception
++    __ verify_not_null_oop(x10);
++
++    // load throwing pc: this is the return address of the stub
++    __ ld(x13, Address(fp, wordSize));
++
 +#ifdef ASSERT
-+  // Check that fields in JavaThread for exception oop and issuing pc are empty
-+  Label oop_empty;
-+  __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
-+  __ beqz(t0, oop_empty);
-+  __ stop("exception oop must be empty");
-+  __ bind(oop_empty);
++    // Check that fields in JavaThread for exception oop and issuing pc are empty
++    Label oop_empty;
++    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
++    __ beqz(t0, oop_empty);
++    __ stop("exception oop must be empty");
++    __ bind(oop_empty);
 +
-+  Label pc_empty;
-+  __ ld(t0, Address(xthread, JavaThread::exception_pc_offset()));
-+  __ beqz(t0, pc_empty);
-+  __ stop("exception pc must be empty");
-+  __ bind(pc_empty);
++    Label pc_empty;
++    __ ld(t0, Address(xthread, JavaThread::exception_pc_offset()));
++    __ beqz(t0, pc_empty);
++    __ stop("exception pc must be empty");
++    __ bind(pc_empty);
 +#endif
 +
-+  // Runtime will return true if the nmethod has been deoptimized, this is the
-+  // expected scenario and anything else is an error. Note that we maintain a
-+  // check on the result purely as a defensive measure.
-+  Label no_deopt;
-+  __ beqz(x10, no_deopt);                                // Have we deoptimized?
++    // store exception oop and throwing pc to JavaThread
++    __ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
++    __ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
++
++    restore_live_registers(sasm);
++
++    __ leave();
++
++    // Forward the exception directly to deopt blob. We can blow no
++    // registers and must leave throwing pc on the stack.  A patch may
++    // have values live in registers so the entry point with the
++    // exception in tls.
++    __ far_jump(RuntimeAddress(deopt_blob->unpack_with_exception_in_tls()));
++
++    __ bind(L);
++  }
 +
-+  // Perform a re-execute. The proper return address is already on the stack,
-+  // we just need to restore registers, pop all of our frames but the return
-+  // address and jump to the deopt blob.
++  // Runtime will return true if the nmethod has been deoptimized during
++  // the patching process. In that case we must do a deopt reexecute instead.
++  Label cont;
++
++  __ beqz(x10, cont);                                 // have we deoptimized?
++
++  // Will reexecute. Proper return address is already on the stack we just restore
++  // registers, pop all of our frame but the return address and jump to the deopt blob
 +
 +  restore_live_registers(sasm);
 +  __ leave();
 +  __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
 +
-+  __ bind(no_deopt);
-+  __ stop("deopt not performed");
++  __ bind(cont);
++  restore_live_registers(sasm);
++  __ leave();
++  __ ret();
 +
 +  return oop_maps;
 +}
@@ -11088,13 +10829,13 @@ index 00000000000..f523c9ed50a
 +
 +    case throw_div0_exception_id:
 +      {
-+        StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments);
 +        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false);
 +      }
 +      break;
 +
 +    case throw_null_pointer_exception_id:
-+      { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments, does_not_return);
++      { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments);
 +        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false);
 +      }
 +      break;
@@ -11373,14 +11114,14 @@ index 00000000000..f523c9ed50a
 +
 +    case throw_class_cast_exception_id:
 +      {
-+        StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments);
 +        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true);
 +      }
 +      break;
 +
 +    case throw_incompatible_class_change_error_id:
 +      {
-+        StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments);
 +        oop_maps = generate_exception_throw(sasm,
 +                                            CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
 +      }
@@ -11474,7 +11215,7 @@ index 00000000000..f523c9ed50a
 +
 +    case deoptimize_id:
 +      {
-+        StubFrame f(sasm, "deoptimize", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "deoptimize", dont_gc_arguments);
 +        OopMap* oop_map = save_live_registers(sasm);
 +        assert_cond(oop_map != NULL);
 +        f.load_argument(0, c_rarg1);
@@ -11493,7 +11234,7 @@ index 00000000000..f523c9ed50a
 +
 +    case throw_range_check_failed_id:
 +      {
-+        StubFrame f(sasm, "range_check_failed", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "range_check_failed", dont_gc_arguments);
 +        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true);
 +      }
 +      break;
@@ -11509,7 +11250,7 @@ index 00000000000..f523c9ed50a
 +
 +    case access_field_patching_id:
 +      {
-+        StubFrame f(sasm, "access_field_patching", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "access_field_patching", dont_gc_arguments);
 +        // we should set up register map
 +        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
 +      }
@@ -11517,7 +11258,7 @@ index 00000000000..f523c9ed50a
 +
 +    case load_klass_patching_id:
 +      {
-+        StubFrame f(sasm, "load_klass_patching", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "load_klass_patching", dont_gc_arguments);
 +        // we should set up register map
 +        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching));
 +      }
@@ -11525,7 +11266,7 @@ index 00000000000..f523c9ed50a
 +
 +    case load_mirror_patching_id:
 +      {
-+        StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments);
 +        // we should set up register map
 +        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
 +      }
@@ -11533,7 +11274,7 @@ index 00000000000..f523c9ed50a
 +
 +    case load_appendix_patching_id:
 +      {
-+        StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments);
 +        // we should set up register map
 +        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
 +      }
@@ -11556,14 +11297,14 @@ index 00000000000..f523c9ed50a
 +
 +    case throw_index_exception_id:
 +      {
-+        StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments);
 +        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
 +      }
 +      break;
 +
 +    case throw_array_store_exception_id:
 +      {
-+        StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments);
 +        // tos + 0: link
 +        //     + 1: return address
 +        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true);
@@ -11572,7 +11313,7 @@ index 00000000000..f523c9ed50a
 +
 +    case predicate_failed_trap_id:
 +      {
-+        StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments);
 +
 +        OopMap* map = save_live_registers(sasm);
 +        assert_cond(map != NULL);
@@ -11595,7 +11336,7 @@ index 00000000000..f523c9ed50a
 +        StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments);
 +        save_live_registers(sasm);
 +
-+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast<int (*)(oopDesc*)>(SharedRuntime::dtrace_object_alloc)), c_rarg0);
++        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), c_rarg0);
 +
 +        restore_live_registers(sasm);
 +      }
@@ -11603,7 +11344,7 @@ index 00000000000..f523c9ed50a
 +
 +    default:
 +      {
-+        StubFrame f(sasm, "unimplemented entry", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "unimplemented entry", dont_gc_arguments);
 +        __ li(x10, (int) id);
 +        __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), x10);
 +        __ should_not_reach_here();
@@ -11619,10 +11360,10 @@ index 00000000000..f523c9ed50a
 +const char *Runtime1::pd_name_for_address(address entry) { Unimplemented(); return 0; }
 diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
 new file mode 100644
-index 00000000000..fe46f7b21c8
+index 0000000000..9316d4be02
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-@@ -0,0 +1,65 @@
+@@ -0,0 +1,71 @@
 +/*
 + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
@@ -11657,8 +11398,10 @@ index 00000000000..fe46f7b21c8
 +// Sets the default values for platform dependent flags used by the client compiler.
 +// (see c1_globals.hpp)
 +
-+#ifndef COMPILER2
++#ifndef TIERED
 +define_pd_global(bool, BackgroundCompilation,        true );
++define_pd_global(bool, UseTLAB,                      true );
++define_pd_global(bool, ResizeTLAB,                   true );
 +define_pd_global(bool, InlineIntrinsics,             true );
 +define_pd_global(bool, PreferInterpreterNativeStubs, false);
 +define_pd_global(bool, ProfileTraps,                 false);
@@ -11667,6 +11410,7 @@ index 00000000000..fe46f7b21c8
 +define_pd_global(intx, CompileThreshold,             1500 );
 +
 +define_pd_global(intx, OnStackReplacePercentage,     933  );
++define_pd_global(intx, FreqInlineSize,               325  );
 +define_pd_global(intx, NewSizeThreadIncrease,        4*K  );
 +define_pd_global(intx, InitialCodeCacheSize,         160*K);
 +define_pd_global(intx, ReservedCodeCacheSize,        32*M );
@@ -11677,25 +11421,28 @@ index 00000000000..fe46f7b21c8
 +define_pd_global(intx, CodeCacheExpansionSize,       32*K );
 +define_pd_global(uintx, CodeCacheMinBlockLength,     1);
 +define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
++define_pd_global(uintx, MetaspaceSize,               12*M );
 +define_pd_global(bool, NeverActAsServerClassMachine, true );
-+define_pd_global(uint64_t, MaxRAM,                  1ULL*G);
++define_pd_global(uint64_t, MaxRAM,                   1ULL*G);
 +define_pd_global(bool, CICompileOSR,                 true );
-+#endif // !COMPILER2
++#endif // !TIERED
 +define_pd_global(bool, UseTypeProfile,               false);
++define_pd_global(bool, RoundFPResults,               true );
 +
++define_pd_global(bool, LIRFillDelaySlots,            false);
 +define_pd_global(bool, OptimizeSinglePrecision,      true );
 +define_pd_global(bool, CSEArrayLength,               false);
 +define_pd_global(bool, TwoOperandLIRForm,            false);
 +
 +#endif // CPU_RISCV_C1_GLOBALS_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
+diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
 new file mode 100644
-index 00000000000..27770dc17aa
+index 0000000000..3da1f1c6d8
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
-@@ -0,0 +1,1646 @@
++++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
+@@ -0,0 +1,90 @@
 +/*
-+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -11719,1635 +11466,1167 @@ index 00000000000..27770dc17aa
 + *
 + */
 +
-+#include "precompiled.hpp"
-+#include "asm/assembler.hpp"
-+#include "asm/assembler.inline.hpp"
-+#include "opto/c2_MacroAssembler.hpp"
-+#include "opto/intrinsicnode.hpp"
-+#include "opto/subnode.hpp"
-+#include "runtime/stubRoutines.hpp"
++#ifndef CPU_RISCV_C2_GLOBALS_RISCV_HPP
++#define CPU_RISCV_C2_GLOBALS_RISCV_HPP
 +
-+#ifdef PRODUCT
-+#define BLOCK_COMMENT(str) /* nothing */
-+#define STOP(error) stop(error)
-+#else
-+#define BLOCK_COMMENT(str) block_comment(str)
-+#define STOP(error) block_comment(error); stop(error)
-+#endif
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
 +
-+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++// Sets the default values for platform dependent flags used by the server compiler.
++// (see c2_globals.hpp).  Alpha-sorted.
 +
-+// short string
-+// StringUTF16.indexOfChar
-+// StringLatin1.indexOfChar
-+void C2_MacroAssembler::string_indexof_char_short(Register str1, Register cnt1,
-+                                                  Register ch, Register result,
-+                                                  bool isL)
-+{
-+  Register ch1 = t0;
-+  Register index = t1;
++define_pd_global(bool, BackgroundCompilation,        true);
++define_pd_global(bool, UseTLAB,                      true);
++define_pd_global(bool, ResizeTLAB,                   true);
++define_pd_global(bool, CICompileOSR,                 true);
++define_pd_global(bool, InlineIntrinsics,             true);
++define_pd_global(bool, PreferInterpreterNativeStubs, false);
++define_pd_global(bool, ProfileTraps,                 true);
++define_pd_global(bool, UseOnStackReplacement,        true);
++define_pd_global(bool, ProfileInterpreter,           true);
++define_pd_global(bool, TieredCompilation,            trueInTiered);
++define_pd_global(intx, CompileThreshold,             10000);
 +
-+  BLOCK_COMMENT("string_indexof_char_short {");
++define_pd_global(intx, OnStackReplacePercentage,     140);
++define_pd_global(intx, ConditionalMoveLimit,         0);
++define_pd_global(intx, FLOATPRESSURE,                32);
++define_pd_global(intx, FreqInlineSize,               325);
++define_pd_global(intx, MinJumpTableSize,             10);
++define_pd_global(intx, INTPRESSURE,                  24);
++define_pd_global(intx, InteriorEntryAlignment,       16);
++define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
++define_pd_global(intx, LoopUnrollLimit,              60);
++define_pd_global(intx, LoopPercentProfileLimit,      10);
++// InitialCodeCacheSize derived from specjbb2000 run.
++define_pd_global(intx, InitialCodeCacheSize,         2496*K); // Integral multiple of CodeCacheExpansionSize
++define_pd_global(intx, CodeCacheExpansionSize,       64*K);
 +
-+  Label LOOP, LOOP1, LOOP4, LOOP8;
-+  Label MATCH,  MATCH1, MATCH2, MATCH3,
-+        MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH;
++// Ergonomics related flags
++define_pd_global(uint64_t,MaxRAM,                    128ULL*G);
++define_pd_global(intx, RegisterCostAreaRatio,        16000);
 +
-+  mv(result, -1);
-+  mv(index, zr);
++// Peephole and CISC spilling both break the graph, and so makes the
++// scheduler sick.
++define_pd_global(bool, OptoPeephole,                 false);
++define_pd_global(bool, UseCISCSpill,                 false);
++define_pd_global(bool, OptoScheduling,               true);
++define_pd_global(bool, OptoBundling,                 false);
++define_pd_global(bool, OptoRegScheduling,            false);
++define_pd_global(bool, SuperWordLoopUnrollAnalysis,  true);
++define_pd_global(bool, IdealizeClearArrayNode,       true);
 +
-+  bind(LOOP);
-+  addi(t0, index, 8);
-+  ble(t0, cnt1, LOOP8);
-+  addi(t0, index, 4);
-+  ble(t0, cnt1, LOOP4);
-+  j(LOOP1);
++define_pd_global(intx, ReservedCodeCacheSize,        48*M);
++define_pd_global(intx, NonProfiledCodeHeapSize,      21*M);
++define_pd_global(intx, ProfiledCodeHeapSize,         22*M);
++define_pd_global(intx, NonNMethodCodeHeapSize,       5*M );
++define_pd_global(uintx, CodeCacheMinBlockLength,     6);
++define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
 +
-+  bind(LOOP8);
-+  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
-+  beq(ch, ch1, MATCH);
-+  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
-+  beq(ch, ch1, MATCH1);
-+  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
-+  beq(ch, ch1, MATCH2);
-+  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
-+  beq(ch, ch1, MATCH3);
-+  isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8));
-+  beq(ch, ch1, MATCH4);
-+  isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10));
-+  beq(ch, ch1, MATCH5);
-+  isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12));
-+  beq(ch, ch1, MATCH6);
-+  isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14));
-+  beq(ch, ch1, MATCH7);
-+  addi(index, index, 8);
-+  addi(str1, str1, isL ? 8 : 16);
-+  blt(index, cnt1, LOOP);
-+  j(NOMATCH);
++// Heap related flags
++define_pd_global(uintx,MetaspaceSize,    ScaleForWordSize(16*M));
 +
-+  bind(LOOP4);
-+  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
-+  beq(ch, ch1, MATCH);
-+  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
-+  beq(ch, ch1, MATCH1);
-+  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
-+  beq(ch, ch1, MATCH2);
-+  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
-+  beq(ch, ch1, MATCH3);
-+  addi(index, index, 4);
-+  addi(str1, str1, isL ? 4 : 8);
-+  bge(index, cnt1, NOMATCH);
++// Ergonomics related flags
++define_pd_global(bool, NeverActAsServerClassMachine, false);
 +
-+  bind(LOOP1);
-+  isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1));
-+  beq(ch, ch1, MATCH);
-+  addi(index, index, 1);
-+  addi(str1, str1, isL ? 1 : 2);
-+  blt(index, cnt1, LOOP1);
-+  j(NOMATCH);
++define_pd_global(bool, TrapBasedRangeChecks,         false); // Not needed.
 +
-+  bind(MATCH1);
-+  addi(index, index, 1);
-+  j(MATCH);
++#endif // CPU_RISCV_C2_GLOBALS_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/c2_init_riscv.cpp b/src/hotspot/cpu/riscv/c2_init_riscv.cpp
+new file mode 100644
+index 0000000000..cdbd69807b
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/c2_init_riscv.cpp
+@@ -0,0 +1,38 @@
++/*
++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  bind(MATCH2);
-+  addi(index, index, 2);
-+  j(MATCH);
++#include "precompiled.hpp"
++#include "opto/compile.hpp"
++#include "opto/node.hpp"
 +
-+  bind(MATCH3);
-+  addi(index, index, 3);
-+  j(MATCH);
++// processor dependent initialization for riscv
 +
-+  bind(MATCH4);
-+  addi(index, index, 4);
-+  j(MATCH);
++extern void reg_mask_init();
 +
-+  bind(MATCH5);
-+  addi(index, index, 5);
-+  j(MATCH);
++void Compile::pd_compiler2_init() {
++  guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
++  reg_mask_init();
++}
+diff --git a/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp
+new file mode 100644
+index 0000000000..14a68b4502
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp
+@@ -0,0 +1,36 @@
++/*
++ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  bind(MATCH6);
-+  addi(index, index, 6);
-+  j(MATCH);
++#ifndef CPU_RISCV_CODEBUFFER_RISCV_HPP
++#define CPU_RISCV_CODEBUFFER_RISCV_HPP
 +
-+  bind(MATCH7);
-+  addi(index, index, 7);
++private:
++  void pd_initialize() {}
 +
-+  bind(MATCH);
-+  mv(result, index);
-+  bind(NOMATCH);
-+  BLOCK_COMMENT("} string_indexof_char_short");
-+}
++public:
++  void flush_bundle(bool start_new_bundle) {}
 +
-+// StringUTF16.indexOfChar
-+// StringLatin1.indexOfChar
-+void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1,
-+                                            Register ch, Register result,
-+                                            Register tmp1, Register tmp2,
-+                                            Register tmp3, Register tmp4,
-+                                            bool isL)
-+{
-+  Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG;
-+  Register ch1 = t0;
-+  Register orig_cnt = t1;
-+  Register mask1 = tmp3;
-+  Register mask2 = tmp2;
-+  Register match_mask = tmp1;
-+  Register trailing_char = tmp4;
-+  Register unaligned_elems = tmp4;
++#endif // CPU_RISCV_CODEBUFFER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
+new file mode 100644
+index 0000000000..a4de342a93
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
+@@ -0,0 +1,149 @@
++/*
++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  BLOCK_COMMENT("string_indexof_char {");
-+  beqz(cnt1, NOMATCH);
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/compiledIC.hpp"
++#include "code/icBuffer.hpp"
++#include "code/nmethod.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/mutexLocker.hpp"
++#include "runtime/safepoint.hpp"
 +
-+  addi(t0, cnt1, isL ? -32 : -16);
-+  bgtz(t0, DO_LONG);
-+  string_indexof_char_short(str1, cnt1, ch, result, isL);
-+  j(DONE);
++// ----------------------------------------------------------------------------
 +
-+  bind(DO_LONG);
-+  mv(orig_cnt, cnt1);
-+  if (AvoidUnalignedAccesses) {
-+    Label ALIGNED;
-+    andi(unaligned_elems, str1, 0x7);
-+    beqz(unaligned_elems, ALIGNED);
-+    sub(unaligned_elems, unaligned_elems, 8);
-+    neg(unaligned_elems, unaligned_elems);
-+    if (!isL) {
-+      srli(unaligned_elems, unaligned_elems, 1);
-+    }
-+    // do unaligned part per element
-+    string_indexof_char_short(str1, unaligned_elems, ch, result, isL);
-+    bgez(result, DONE);
-+    mv(orig_cnt, cnt1);
-+    sub(cnt1, cnt1, unaligned_elems);
-+    bind(ALIGNED);
-+  }
++#define __ _masm.
++address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
++  precond(cbuf.stubs()->start() != badAddress);
++  precond(cbuf.stubs()->end() != badAddress);
++  // Stub is fixed up when the corresponding call is converted from
++  // calling compiled code to calling interpreted code.
++  // mv xmethod, 0
++  // jalr -4 # to self
 +
-+  // duplicate ch
-+  if (isL) {
-+    slli(ch1, ch, 8);
-+    orr(ch, ch1, ch);
++  if (mark == NULL) {
++    mark = cbuf.insts_mark();  // Get mark within main instrs section.
 +  }
-+  slli(ch1, ch, 16);
-+  orr(ch, ch1, ch);
-+  slli(ch1, ch, 32);
-+  orr(ch, ch1, ch);
 +
-+  if (!isL) {
-+    slli(cnt1, cnt1, 1);
-+  }
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a stub.
++  MacroAssembler _masm(&cbuf);
 +
-+  uint64_t mask0101 = UCONST64(0x0101010101010101);
-+  uint64_t mask0001 = UCONST64(0x0001000100010001);
-+  mv(mask1, isL ? mask0101 : mask0001);
-+  uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f);
-+  uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff);
-+  mv(mask2, isL ? mask7f7f : mask7fff);
++  address base = __ start_a_stub(to_interp_stub_size());
++  int offset = __ offset();
++  if (base == NULL) {
++    return NULL;  // CodeBuffer::expand failed
++  }
++  // static stub relocation stores the instruction address of the call
++  __ relocate(static_stub_Relocation::spec(mark));
 +
-+  bind(CH1_LOOP);
-+  ld(ch1, Address(str1));
-+  addi(str1, str1, 8);
-+  addi(cnt1, cnt1, -8);
-+  compute_match_mask(ch1, ch, match_mask, mask1, mask2);
-+  bnez(match_mask, HIT);
-+  bgtz(cnt1, CH1_LOOP);
-+  j(NOMATCH);
++  __ emit_static_call_stub();
 +
-+  bind(HIT);
-+  ctzc_bit(trailing_char, match_mask, isL, ch1, result);
-+  srli(trailing_char, trailing_char, 3);
-+  addi(cnt1, cnt1, 8);
-+  ble(cnt1, trailing_char, NOMATCH);
-+  // match case
-+  if (!isL) {
-+    srli(cnt1, cnt1, 1);
-+    srli(trailing_char, trailing_char, 1);
-+  }
++  assert((__ offset() - offset) <= (int)to_interp_stub_size(), "stub too big");
++  __ end_a_stub();
++  return base;
++}
++#undef __
 +
-+  sub(result, orig_cnt, cnt1);
-+  add(result, result, trailing_char);
-+  j(DONE);
++int CompiledStaticCall::to_interp_stub_size() {
++  // (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr
++  return 12 * NativeInstruction::instruction_size;
++}
 +
-+  bind(NOMATCH);
-+  mv(result, -1);
++int CompiledStaticCall::to_trampoline_stub_size() {
++  // Somewhat pessimistically, we count 4 instructions here (although
++  // there are only 3) because we sometimes emit an alignment nop.
++  // Trampoline stubs are always word aligned.
++  return NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size;
++}
 +
-+  bind(DONE);
-+  BLOCK_COMMENT("} string_indexof_char");
++// Relocation entries for call stub, compiled java to interpreter.
++int CompiledStaticCall::reloc_to_interp_stub() {
++  return 4; // 3 in emit_to_interp_stub + 1 in emit_call
 +}
 +
-+typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp);
++void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) {
++  address stub = find_stub(false /* is_aot */);
++  guarantee(stub != NULL, "stub not found");
 +
-+// Search for needle in haystack and return index or -1
-+// x10: result
-+// x11: haystack
-+// x12: haystack_len
-+// x13: needle
-+// x14: needle_len
-+void C2_MacroAssembler::string_indexof(Register haystack, Register needle,
-+                                       Register haystack_len, Register needle_len,
-+                                       Register tmp1, Register tmp2,
-+                                       Register tmp3, Register tmp4,
-+                                       Register tmp5, Register tmp6,
-+                                       Register result, int ae)
-+{
-+  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
++  if (TraceICs) {
++    ResourceMark rm;
++    tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
++                  p2i(instruction_address()),
++                  callee->name_and_sig_as_C_string());
++  }
 +
-+  Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH;
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
++#ifndef PRODUCT
++  NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address());
 +
-+  Register ch1 = t0;
-+  Register ch2 = t1;
-+  Register nlen_tmp = tmp1; // needle len tmp
-+  Register hlen_tmp = tmp2; // haystack len tmp
-+  Register result_tmp = tmp4;
++  // read the value once
++  volatile intptr_t data = method_holder->data();
++  assert(data == 0 || data == (intptr_t)callee(),
++         "a) MT-unsafe modification of inline cache");
++  assert(data == 0 || jump->jump_destination() == entry,
++         "b) MT-unsafe modification of inline cache");
++#endif
++  // Update stub.
++  method_holder->set_data((intptr_t)callee());
++  NativeGeneralJump::insert_unconditional(method_holder->next_instruction_address(), entry);
++  ICache::invalidate_range(stub, to_interp_stub_size());
++  // Update jump to call.
++  set_destination_mt_safe(stub);
++}
 +
-+  bool isLL = ae == StrIntrinsicNode::LL;
++void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
++  assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
++  // Reset stub.
++  address stub = static_stub->addr();
++  assert(stub != NULL, "stub not found");
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
++  method_holder->set_data(0);
++}
 +
-+  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
-+  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
-+  int needle_chr_shift = needle_isL ? 0 : 1;
-+  int haystack_chr_shift = haystack_isL ? 0 : 1;
-+  int needle_chr_size = needle_isL ? 1 : 2;
-+  int haystack_chr_size = haystack_isL ? 1 : 2;
-+  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
-+                              (load_chr_insn)&MacroAssembler::lhu;
-+  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
-+                                (load_chr_insn)&MacroAssembler::lhu;
++//-----------------------------------------------------------------------------
++// Non-product mode code
++#ifndef PRODUCT
 +
-+  BLOCK_COMMENT("string_indexof {");
++void CompiledDirectStaticCall::verify() {
++  // Verify call.
++  _call->verify();
++  _call->verify_alignment();
 +
-+  // Note, inline_string_indexOf() generates checks:
-+  // if (pattern.count > src.count) return -1;
-+  // if (pattern.count == 0) return 0;
++  // Verify stub.
++  address stub = find_stub(false /* is_aot */);
++  assert(stub != NULL, "no stub found for static call");
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
++  NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
 +
-+  // We have two strings, a source string in haystack, haystack_len and a pattern string
-+  // in needle, needle_len. Find the first occurence of pattern in source or return -1.
++  // Verify state.
++  assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
++}
 +
-+  // For larger pattern and source we use a simplified Boyer Moore algorithm.
-+  // With a small pattern and source we use linear scan.
++#endif // !PRODUCT
+diff --git a/src/hotspot/cpu/riscv/copy_riscv.hpp b/src/hotspot/cpu/riscv/copy_riscv.hpp
+new file mode 100644
+index 0000000000..05da242e35
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/copy_riscv.hpp
+@@ -0,0 +1,59 @@
++/*
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm.
-+  sub(result_tmp, haystack_len, needle_len);
-+  // needle_len < 8, use linear scan
-+  sub(t0, needle_len, 8);
-+  bltz(t0, LINEARSEARCH);
-+  // needle_len >= 256, use linear scan
-+  sub(t0, needle_len, 256);
-+  bgez(t0, LINEARSTUB);
-+  // needle_len >= haystack_len/4, use linear scan
-+  srli(t0, haystack_len, 2);
-+  bge(needle_len, t0, LINEARSTUB);
++#ifndef CPU_RISCV_COPY_RISCV_HPP
++#define CPU_RISCV_COPY_RISCV_HPP
 +
-+  // Boyer-Moore-Horspool introduction:
-+  // The Boyer Moore alogorithm is based on the description here:-
-+  //
-+  // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
-+  //
-+  // This describes and algorithm with 2 shift rules. The 'Bad Character' rule
-+  // and the 'Good Suffix' rule.
-+  //
-+  // These rules are essentially heuristics for how far we can shift the
-+  // pattern along the search string.
-+  //
-+  // The implementation here uses the 'Bad Character' rule only because of the
-+  // complexity of initialisation for the 'Good Suffix' rule.
-+  //
-+  // This is also known as the Boyer-Moore-Horspool algorithm:
-+  //
-+  // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
-+  //
-+  // #define ASIZE 256
-+  //
-+  //    int bm(unsigned char *pattern, int m, unsigned char *src, int n) {
-+  //      int i, j;
-+  //      unsigned c;
-+  //      unsigned char bc[ASIZE];
-+  //
-+  //      /* Preprocessing */
-+  //      for (i = 0; i < ASIZE; ++i)
-+  //        bc[i] = m;
-+  //      for (i = 0; i < m - 1; ) {
-+  //        c = pattern[i];
-+  //        ++i;
-+  //        // c < 256 for Latin1 string, so, no need for branch
-+  //        #ifdef PATTERN_STRING_IS_LATIN1
-+  //        bc[c] = m - i;
-+  //        #else
-+  //        if (c < ASIZE) bc[c] = m - i;
-+  //        #endif
-+  //      }
-+  //
-+  //      /* Searching */
-+  //      j = 0;
-+  //      while (j <= n - m) {
-+  //        c = src[i+j];
-+  //        if (pattern[m-1] == c)
-+  //          int k;
-+  //          for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
-+  //          if (k < 0) return j;
-+  //          // c < 256 for Latin1 string, so, no need for branch
-+  //          #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1
-+  //          // LL case: (c< 256) always true. Remove branch
-+  //          j += bc[pattern[j+m-1]];
-+  //          #endif
-+  //          #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF
-+  //          // UU case: need if (c<ASIZE) check. Skip 1 character if not.
-+  //          if (c < ASIZE)
-+  //            j += bc[pattern[j+m-1]];
-+  //          else
-+  //            j += 1
-+  //          #endif
-+  //          #ifdef SOURCE_IS_UTF_AND_PATTERN_IS_LATIN1
-+  //          // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
-+  //          if (c < ASIZE)
-+  //            j += bc[pattern[j+m-1]];
-+  //          else
-+  //            j += m
-+  //          #endif
-+  //      }
-+  //      return -1;
-+  //    }
++// Inline functions for memory copy and fill.
 +
-+  // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result
-+  Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
-+        BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;
++// Contains inline asm implementations
++#include OS_CPU_HEADER_INLINE(copy)
 +
-+  Register haystack_end = haystack_len;
-+  Register skipch = tmp2;
++static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
++  julong* to = (julong*) tohw;
++  julong  v  = ((julong) value << 32) | value;
++  while (count-- > 0) {
++    *to++ = v;
++  }
++}
 +
-+  // pattern length is >=8, so, we can read at least 1 register for cases when
-+  // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
-+  // UL case. We'll re-read last character in inner pre-loop code to have
-+  // single outer pre-loop load
-+  const int firstStep = isLL ? 7 : 3;
++static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
++  pd_fill_to_words(tohw, count, value);
++}
 +
-+  const int ASIZE = 256;
-+  const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd)
++static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
++  (void)memset(to, value, count);
++}
 +
-+  sub(sp, sp, ASIZE);
++static void pd_zero_to_words(HeapWord* tohw, size_t count) {
++  pd_fill_to_words(tohw, count, 0);
++}
 +
-+  // init BC offset table with default value: needle_len
-+  slli(t0, needle_len, 8);
-+  orr(t0, t0, needle_len); // [63...16][needle_len][needle_len]
-+  slli(tmp1, t0, 16);
-+  orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len]
-+  slli(tmp1, t0, 32);
-+  orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len]
++static void pd_zero_to_bytes(void* to, size_t count) {
++  (void)memset(to, 0, count);
++}
 +
-+  mv(ch1, sp);  // ch1 is t0
-+  mv(tmp6, ASIZE / STORE_BYTES); // loop iterations
++#endif // CPU_RISCV_COPY_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/depChecker_riscv.hpp b/src/hotspot/cpu/riscv/depChecker_riscv.hpp
+new file mode 100644
+index 0000000000..e9ff307b64
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/depChecker_riscv.hpp
+@@ -0,0 +1,32 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  bind(BM_INIT_LOOP);
-+  // for (i = 0; i < ASIZE; ++i)
-+  //   bc[i] = m;
-+  for (int i = 0; i < 4; i++) {
-+    sd(tmp5, Address(ch1, i * wordSize));
-+  }
-+  add(ch1, ch1, 32);
-+  sub(tmp6, tmp6, 4);
-+  bgtz(tmp6, BM_INIT_LOOP);
++#ifndef CPU_RISCV_VM_DEPCHECKER_RISCV_HPP
++#define CPU_RISCV_VM_DEPCHECKER_RISCV_HPP
 +
-+  sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern
-+  Register orig_haystack = tmp5;
-+  mv(orig_haystack, haystack);
-+  // result_tmp = tmp4
-+  shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift);
-+  sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1
-+  mv(tmp3, needle);
++// Nothing to do on riscv
 +
-+  //  for (i = 0; i < m - 1; ) {
-+  //    c = pattern[i];
-+  //    ++i;
-+  //    // c < 256 for Latin1 string, so, no need for branch
-+  //    #ifdef PATTERN_STRING_IS_LATIN1
-+  //    bc[c] = m - i;
-+  //    #else
-+  //    if (c < ASIZE) bc[c] = m - i;
-+  //    #endif
-+  //  }
-+  bind(BCLOOP);
-+  (this->*needle_load_1chr)(ch1, Address(tmp3), noreg);
-+  add(tmp3, tmp3, needle_chr_size);
-+  if (!needle_isL) {
-+    // ae == StrIntrinsicNode::UU
-+    mv(tmp6, ASIZE);
-+    bgeu(ch1, tmp6, BCSKIP);
-+  }
-+  add(tmp4, sp, ch1);
-+  sb(ch2, Address(tmp4)); // store skip offset to BC offset table
++#endif // CPU_RISCV_VM_DEPCHECKER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/disassembler_riscv.hpp b/src/hotspot/cpu/riscv/disassembler_riscv.hpp
+new file mode 100644
+index 0000000000..06bca5298c
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/disassembler_riscv.hpp
+@@ -0,0 +1,38 @@
++/*
++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  bind(BCSKIP);
-+  sub(ch2, ch2, 1); // for next pattern element, skip distance -1
-+  bgtz(ch2, BCLOOP);
++#ifndef CPU_RISCV_DISASSEMBLER_RISCV_HPP
++#define CPU_RISCV_DISASSEMBLER_RISCV_HPP
 +
-+  // tmp6: pattern end, address after needle
-+  shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift);
-+  if (needle_isL == haystack_isL) {
-+    // load last 8 bytes (8LL/4UU symbols)
-+    ld(tmp6, Address(tmp6, -wordSize));
-+  } else {
-+    // UL: from UTF-16(source) search Latin1(pattern)
-+    lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols)
-+    // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d
-+    // We'll have to wait until load completed, but it's still faster than per-character loads+checks
-+    srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a
-+    slli(ch2, tmp6, XLEN - 24);
-+    srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b
-+    slli(ch1, tmp6, XLEN - 16);
-+    srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c
-+    andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d
-+    slli(ch2, ch2, 16);
-+    orr(ch2, ch2, ch1); // 0x00000b0c
-+    slli(result, tmp3, 48); // use result as temp register
-+    orr(tmp6, tmp6, result); // 0x0a00000d
-+    slli(result, ch2, 16);
-+    orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d
-+  }
++static int pd_instruction_alignment() {
++  return 1;
++}
 +
-+  // i = m - 1;
-+  // skipch = j + i;
-+  // if (skipch == pattern[m - 1]
-+  //   for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
-+  // else
-+  //   move j with bad char offset table
-+  bind(BMLOOPSTR2);
-+  // compare pattern to source string backward
-+  shadd(result, nlen_tmp, haystack, result, haystack_chr_shift);
-+  (this->*haystack_load_1chr)(skipch, Address(result), noreg);
-+  sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8
-+  if (needle_isL == haystack_isL) {
-+    // re-init tmp3. It's for free because it's executed in parallel with
-+    // load above. Alternative is to initialize it before loop, but it'll
-+    // affect performance on in-order systems with 2 or more ld/st pipelines
-+    srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1]
-+  }
-+  if (!isLL) { // UU/UL case
-+    slli(ch2, nlen_tmp, 1); // offsets in bytes
-+  }
-+  bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char
-+  add(result, haystack, isLL ? nlen_tmp : ch2);
-+  ld(ch2, Address(result)); // load 8 bytes from source string
-+  mv(ch1, tmp6);
-+  if (isLL) {
-+    j(BMLOOPSTR1_AFTER_LOAD);
-+  } else {
-+    sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
-+    j(BMLOOPSTR1_CMP);
-+  }
++static const char* pd_cpu_opts() {
++  return "";
++}
 +
-+  bind(BMLOOPSTR1);
-+  shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift);
-+  (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
-+  shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift);
-+  (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
++#endif // CPU_RISCV_DISASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
+new file mode 100644
+index 0000000000..d4fcbdcbbd
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
+@@ -0,0 +1,694 @@
++/*
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  bind(BMLOOPSTR1_AFTER_LOAD);
-+  sub(nlen_tmp, nlen_tmp, 1);
-+  bltz(nlen_tmp, BMLOOPSTR1_LASTCMP);
++#include "precompiled.hpp"
++#include "compiler/oopMap.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/resourceArea.hpp"
++#include "memory/universe.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/javaCalls.hpp"
++#include "runtime/monitorChunk.hpp"
++#include "runtime/os.inline.hpp"
++#include "runtime/signature.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "vmreg_riscv.inline.hpp"
++#ifdef COMPILER1
++#include "c1/c1_Runtime1.hpp"
++#include "runtime/vframeArray.hpp"
++#endif
 +
-+  bind(BMLOOPSTR1_CMP);
-+  beq(ch1, ch2, BMLOOPSTR1);
++#ifdef ASSERT
++void RegisterMap::check_location_valid() {
++}
++#endif
 +
-+  bind(BMSKIP);
-+  if (!isLL) {
-+    // if we've met UTF symbol while searching Latin1 pattern, then we can
-+    // skip needle_len symbols
-+    if (needle_isL != haystack_isL) {
-+      mv(result_tmp, needle_len);
-+    } else {
-+      mv(result_tmp, 1);
-+    }
-+    mv(t0, ASIZE);
-+    bgeu(skipch, t0, BMADV);
-+  }
-+  add(result_tmp, sp, skipch);
-+  lbu(result_tmp, Address(result_tmp)); // load skip offset
 +
-+  bind(BMADV);
-+  sub(nlen_tmp, needle_len, 1);
-+  // move haystack after bad char skip offset
-+  shadd(haystack, result_tmp, haystack, result, haystack_chr_shift);
-+  ble(haystack, haystack_end, BMLOOPSTR2);
-+  add(sp, sp, ASIZE);
-+  j(NOMATCH);
++// Profiling/safepoint support
 +
-+  bind(BMLOOPSTR1_LASTCMP);
-+  bne(ch1, ch2, BMSKIP);
++bool frame::safe_for_sender(JavaThread *thread) {
++  address   sp = (address)_sp;
++  address   fp = (address)_fp;
++  address   unextended_sp = (address)_unextended_sp;
 +
-+  bind(BMMATCH);
-+  sub(result, haystack, orig_haystack);
-+  if (!haystack_isL) {
-+    srli(result, result, 1);
++  // consider stack guards when trying to determine "safe" stack pointers
++  static size_t stack_guard_size = os::uses_stack_guard_pages() ?
++                                   (JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size()) : 0;
++  size_t usable_stack_size = thread->stack_size() - stack_guard_size;
++
++  // sp must be within the usable part of the stack (not in guards)
++  bool sp_safe = (sp < thread->stack_base()) &&
++                 (sp >= thread->stack_base() - usable_stack_size);
++
++
++  if (!sp_safe) {
++    return false;
 +  }
-+  add(sp, sp, ASIZE);
-+  j(DONE);
 +
-+  bind(LINEARSTUB);
-+  sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm
-+  bltz(t0, LINEARSEARCH);
-+  mv(result, zr);
-+  RuntimeAddress stub = NULL;
-+  if (isLL) {
-+    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll());
-+    assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
-+  } else if (needle_isL) {
-+    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul());
-+    assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
-+  } else {
-+    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu());
-+    assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
++  // When we are running interpreted code the machine stack pointer, SP, is
++  // set low enough so that the Java expression stack can grow and shrink
++  // without ever exceeding the machine stack bounds.  So, ESP >= SP.
++
++  // When we call out of an interpreted method, SP is incremented so that
++  // the space between SP and ESP is removed.  The SP saved in the callee's
++  // frame is the SP *before* this increment.  So, when we walk a stack of
++  // interpreter frames the sender's SP saved in a frame might be less than
++  // the SP at the point of call.
++
++  // So unextended sp must be within the stack but we need not to check
++  // that unextended sp >= sp
++
++  bool unextended_sp_safe = (unextended_sp < thread->stack_base());
++
++  if (!unextended_sp_safe) {
++    return false;
 +  }
-+  trampoline_call(stub);
-+  j(DONE);
 +
-+  bind(NOMATCH);
-+  mv(result, -1);
-+  j(DONE);
++  // an fp must be within the stack and above (but not equal) sp
++  // second evaluation on fp+ is added to handle situation where fp is -1
++  bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base())));
 +
-+  bind(LINEARSEARCH);
-+  string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae);
++  // We know sp/unextended_sp are safe only fp is questionable here
 +
-+  bind(DONE);
-+  BLOCK_COMMENT("} string_indexof");
-+}
++  // If the current frame is known to the code cache then we can attempt to
++  // to construct the sender and do some validation of it. This goes a long way
++  // toward eliminating issues when we get in frame construction code
 +
-+// string_indexof
-+// result: x10
-+// src: x11
-+// src_count: x12
-+// pattern: x13
-+// pattern_count: x14 or 1/2/3/4
-+void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register needle,
-+                                               Register haystack_len, Register needle_len,
-+                                               Register tmp1, Register tmp2,
-+                                               Register tmp3, Register tmp4,
-+                                               int needle_con_cnt, Register result, int ae)
-+{
-+  // Note:
-+  // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant
-+  // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1
-+  assert(needle_con_cnt <= 4, "Invalid needle constant count");
-+  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
++  if (_cb != NULL) {
 +
-+  Register ch1 = t0;
-+  Register ch2 = t1;
-+  Register hlen_neg = haystack_len, nlen_neg = needle_len;
-+  Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4;
++    // First check if frame is complete and tester is reliable
++    // Unfortunately we can only check frame complete for runtime stubs and nmethod
++    // other generic buffer blobs are more problematic so we just assume they are
++    // ok. adapter blobs never have a frame complete and are never ok.
 +
-+  bool isLL = ae == StrIntrinsicNode::LL;
++    if (!_cb->is_frame_complete_at(_pc)) {
++      if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) {
++        return false;
++      }
++    }
 +
-+  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
-+  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
-+  int needle_chr_shift = needle_isL ? 0 : 1;
-+  int haystack_chr_shift = haystack_isL ? 0 : 1;
-+  int needle_chr_size = needle_isL ? 1 : 2;
-+  int haystack_chr_size = haystack_isL ? 1 : 2;
++    // Could just be some random pointer within the codeBlob
++    if (!_cb->code_contains(_pc)) {
++      return false;
++    }
 +
-+  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
-+                              (load_chr_insn)&MacroAssembler::lhu;
-+  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
-+                                (load_chr_insn)&MacroAssembler::lhu;
-+  load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu;
-+  load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld;
++    // Entry frame checks
++    if (is_entry_frame()) {
++      // an entry frame must have a valid fp.
++      return fp_safe && is_entry_frame_valid(thread);
++    }
 +
-+  Label DO1, DO2, DO3, MATCH, NOMATCH, DONE;
++    intptr_t* sender_sp = NULL;
++    intptr_t* sender_unextended_sp = NULL;
++    address   sender_pc = NULL;
++    intptr_t* saved_fp =  NULL;
 +
-+  Register first = tmp3;
++    if (is_interpreted_frame()) {
++      // fp must be safe
++      if (!fp_safe) {
++        return false;
++      }
 +
-+  if (needle_con_cnt == -1) {
-+    Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
++      sender_pc = (address)this->fp()[return_addr_offset];
++      // for interpreted frames, the value below is the sender "raw" sp,
++      // which can be different from the sender unextended sp (the sp seen
++      // by the sender) because of current frame local variables
++      sender_sp = (intptr_t*) addr_at(sender_sp_offset);
++      sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset];
++      saved_fp = (intptr_t*) this->fp()[link_offset];
++    } else {
++      // must be some sort of compiled/runtime frame
++      // fp does not have to be safe (although it could be check for c1?)
 +
-+    sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2);
-+    bltz(t0, DOSHORT);
++      // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc
++      if (_cb->frame_size() <= 0) {
++        return false;
++      }
 +
-+    (this->*needle_load_1chr)(first, Address(needle), noreg);
-+    slli(t0, needle_len, needle_chr_shift);
-+    add(needle, needle, t0);
-+    neg(nlen_neg, t0);
-+    slli(t0, result_tmp, haystack_chr_shift);
-+    add(haystack, haystack, t0);
-+    neg(hlen_neg, t0);
++      sender_sp = _unextended_sp + _cb->frame_size();
++      // Is sender_sp safe?
++      if ((address)sender_sp >= thread->stack_base()) {
++        return false;
++      }
 +
-+    bind(FIRST_LOOP);
-+    add(t0, haystack, hlen_neg);
-+    (this->*haystack_load_1chr)(ch2, Address(t0), noreg);
-+    beq(first, ch2, STR1_LOOP);
++      sender_unextended_sp = sender_sp;
++      sender_pc = (address) *(sender_sp - 1);
++      saved_fp = (intptr_t*) *(sender_sp - 2);
++    }
 +
-+    bind(STR2_NEXT);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, FIRST_LOOP);
-+    j(NOMATCH);
 +
-+    bind(STR1_LOOP);
-+    add(nlen_tmp, nlen_neg, needle_chr_size);
-+    add(hlen_tmp, hlen_neg, haystack_chr_size);
-+    bgez(nlen_tmp, MATCH);
++    // If the potential sender is the interpreter then we can do some more checking
++    if (Interpreter::contains(sender_pc)) {
 +
-+    bind(STR1_NEXT);
-+    add(ch1, needle, nlen_tmp);
-+    (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
-+    add(ch2, haystack, hlen_tmp);
-+    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
-+    bne(ch1, ch2, STR2_NEXT);
-+    add(nlen_tmp, nlen_tmp, needle_chr_size);
-+    add(hlen_tmp, hlen_tmp, haystack_chr_size);
-+    bltz(nlen_tmp, STR1_NEXT);
-+    j(MATCH);
++      // fp is always saved in a recognizable place in any code we generate. However
++      // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp
++      // is really a frame pointer.
 +
-+    bind(DOSHORT);
-+    if (needle_isL == haystack_isL) {
-+      sub(t0, needle_len, 2);
-+      bltz(t0, DO1);
-+      bgtz(t0, DO3);
++      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
++
++      if (!saved_fp_safe) {
++        return false;
++      }
++
++      // construct the potential sender
++      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++
++      return sender.is_interpreted_frame_valid(thread);
 +    }
-+  }
 +
-+  if (needle_con_cnt == 4) {
-+    Label CH1_LOOP;
-+    (this->*load_4chr)(ch1, Address(needle), noreg);
-+    sub(result_tmp, haystack_len, 4);
-+    slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp
-+    add(haystack, haystack, tmp3);
-+    neg(hlen_neg, tmp3);
++    // We must always be able to find a recognizable pc
++    CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc);
++    if (sender_pc == NULL || sender_blob == NULL) {
++      return false;
++    }
 +
-+    bind(CH1_LOOP);
-+    add(ch2, haystack, hlen_neg);
-+    (this->*load_4chr)(ch2, Address(ch2), noreg);
-+    beq(ch1, ch2, MATCH);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, CH1_LOOP);
-+    j(NOMATCH);
-+  }
++    // Could be a zombie method
++    if (sender_blob->is_zombie() || sender_blob->is_unloaded()) {
++      return false;
++    }
 +
-+  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) {
-+    Label CH1_LOOP;
-+    BLOCK_COMMENT("string_indexof DO2 {");
-+    bind(DO2);
-+    (this->*load_2chr)(ch1, Address(needle), noreg);
-+    if (needle_con_cnt == 2) {
-+      sub(result_tmp, haystack_len, 2);
++    // Could just be some random pointer within the codeBlob
++    if (!sender_blob->code_contains(sender_pc)) {
++      return false;
 +    }
-+    slli(tmp3, result_tmp, haystack_chr_shift);
-+    add(haystack, haystack, tmp3);
-+    neg(hlen_neg, tmp3);
 +
-+    bind(CH1_LOOP);
-+    add(tmp3, haystack, hlen_neg);
-+    (this->*load_2chr)(ch2, Address(tmp3), noreg);
-+    beq(ch1, ch2, MATCH);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, CH1_LOOP);
-+    j(NOMATCH);
-+    BLOCK_COMMENT("} string_indexof DO2");
-+  }
++    // We should never be able to see an adapter if the current frame is something from code cache
++    if (sender_blob->is_adapter_blob()) {
++      return false;
++    }
 +
-+  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) {
-+    Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
-+    BLOCK_COMMENT("string_indexof DO3 {");
++    // Could be the call_stub
++    if (StubRoutines::returns_to_call_stub(sender_pc)) {
++      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
 +
-+    bind(DO3);
-+    (this->*load_2chr)(first, Address(needle), noreg);
-+    (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg);
-+    if (needle_con_cnt == 3) {
-+      sub(result_tmp, haystack_len, 3);
++      if (!saved_fp_safe) {
++        return false;
++      }
++
++      // construct the potential sender
++      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++
++      // Validate the JavaCallWrapper an entry frame must have
++      address jcw = (address)sender.entry_frame_call_wrapper();
++
++      bool jcw_safe = (jcw < thread->stack_base()) && (jcw > (address)sender.fp());
++
++      return jcw_safe;
 +    }
-+    slli(hlen_tmp, result_tmp, haystack_chr_shift);
-+    add(haystack, haystack, hlen_tmp);
-+    neg(hlen_neg, hlen_tmp);
 +
-+    bind(FIRST_LOOP);
-+    add(ch2, haystack, hlen_neg);
-+    (this->*load_2chr)(ch2, Address(ch2), noreg);
-+    beq(first, ch2, STR1_LOOP);
++    CompiledMethod* nm = sender_blob->as_compiled_method_or_null();
++    if (nm != NULL) {
++      if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) ||
++          nm->method()->is_method_handle_intrinsic()) {
++        return false;
++      }
++    }
 +
-+    bind(STR2_NEXT);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, FIRST_LOOP);
-+    j(NOMATCH);
++    // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size
++    // because the return address counts against the callee's frame.
++    if (sender_blob->frame_size() <= 0) {
++      assert(!sender_blob->is_compiled(), "should count return address at least");
++      return false;
++    }
 +
-+    bind(STR1_LOOP);
-+    add(hlen_tmp, hlen_neg, 2 * haystack_chr_size);
-+    add(ch2, haystack, hlen_tmp);
-+    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
-+    bne(ch1, ch2, STR2_NEXT);
-+    j(MATCH);
-+    BLOCK_COMMENT("} string_indexof DO3");
++    // We should never be able to see anything here except an nmethod. If something in the
++    // code cache (current frame) is called by an entity within the code cache that entity
++    // should not be anything but the call stub (already covered), the interpreter (already covered)
++    // or an nmethod.
++    if (!sender_blob->is_compiled()) {
++        return false;
++    }
++
++    // Could put some more validation for the potential non-interpreted sender
++    // frame we'd create by calling sender if I could think of any. Wait for next crash in forte...
++
++    // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb
++
++    // We've validated the potential sender that would be created
++    return true;
 +  }
 +
-+  if (needle_con_cnt == -1 || needle_con_cnt == 1) {
-+    Label DO1_LOOP;
++  // Must be native-compiled frame. Since sender will try and use fp to find
++  // linkages it must be safe
++  if (!fp_safe) {
++    return false;
++  }
 +
-+    BLOCK_COMMENT("string_indexof DO1 {");
-+    bind(DO1);
-+    (this->*needle_load_1chr)(ch1, Address(needle), noreg);
-+    sub(result_tmp, haystack_len, 1);
-+    mv(tmp3, result_tmp);
-+    if (haystack_chr_shift) {
-+      slli(tmp3, result_tmp, haystack_chr_shift);
-+    }
-+    add(haystack, haystack, tmp3);
-+    neg(hlen_neg, tmp3);
++  // Will the pc we fetch be non-zero (which we'll find at the oldest frame)
++  if ((address)this->fp()[return_addr_offset] == NULL) { return false; }
 +
-+    bind(DO1_LOOP);
-+    add(tmp3, haystack, hlen_neg);
-+    (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg);
-+    beq(ch1, ch2, MATCH);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, DO1_LOOP);
-+    BLOCK_COMMENT("} string_indexof DO1");
++  return true;
++}
++
++void frame::patch_pc(Thread* thread, address pc) {
++  address* pc_addr = &(((address*) sp())[-1]);
++  if (TracePcPatching) {
++    tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
++                  p2i(pc_addr), p2i(*pc_addr), p2i(pc));
++  }
++  // Either the return address is the original one or we are going to
++  // patch in the same address that's already there.
++  assert(_pc == *pc_addr || pc == *pc_addr, "must be");
++  *pc_addr = pc;
++  _cb = CodeCache::find_blob(pc);
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    assert(original_pc == _pc, "expected original PC to be stored before patching");
++    _deopt_state = is_deoptimized;
++    // leave _pc as is
++  } else {
++    _deopt_state = not_deoptimized;
++    _pc = pc;
 +  }
++}
 +
-+  bind(NOMATCH);
-+  mv(result, -1);
-+  j(DONE);
++bool frame::is_interpreted_frame() const  {
++  return Interpreter::contains(pc());
++}
 +
-+  bind(MATCH);
-+  srai(t0, hlen_neg, haystack_chr_shift);
-+  add(result, result_tmp, t0);
++int frame::frame_size(RegisterMap* map) const {
++  frame sender = this->sender(map);
++  return sender.sp() - sp();
++}
 +
-+  bind(DONE);
++intptr_t* frame::entry_frame_argument_at(int offset) const {
++  // convert offset to index to deal with tsi
++  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
++  // Entry frame's arguments are always in relation to unextended_sp()
++  return &unextended_sp()[index];
 +}
 +
-+// Compare strings.
-+void C2_MacroAssembler::string_compare(Register str1, Register str2,
-+                                    Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
-+                                    Register tmp3, int ae)
-+{
-+  Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
-+      DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
-+      SHORT_LOOP_START, TAIL_CHECK, L;
++// sender_sp
++intptr_t* frame::interpreter_frame_sender_sp() const {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  return (intptr_t*) at(interpreter_frame_sender_sp_offset);
++}
 +
-+  const int STUB_THRESHOLD = 64 + 8;
-+  bool isLL = ae == StrIntrinsicNode::LL;
-+  bool isLU = ae == StrIntrinsicNode::LU;
-+  bool isUL = ae == StrIntrinsicNode::UL;
++void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  ptr_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp);
++}
 +
-+  bool str1_isL = isLL || isLU;
-+  bool str2_isL = isLL || isUL;
 +
-+  // for L strings, 1 byte for 1 character
-+  // for U strings, 2 bytes for 1 character
-+  int str1_chr_size = str1_isL ? 1 : 2;
-+  int str2_chr_size = str2_isL ? 1 : 2;
-+  int minCharsInWord = isLL ? wordSize : wordSize / 2;
++// monitor elements
 +
-+  load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
-+  load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
++BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
++  return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset);
++}
 +
-+  BLOCK_COMMENT("string_compare {");
++BasicObjectLock* frame::interpreter_frame_monitor_end() const {
++  BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset);
++  // make sure the pointer points inside the frame
++  assert(sp() <= (intptr_t*) result, "monitor end should be above the stack pointer");
++  assert((intptr_t*) result < fp(),  "monitor end should be strictly below the frame pointer");
++  return result;
++}
 +
-+  // Bizzarely, the counts are passed in bytes, regardless of whether they
-+  // are L or U strings, however the result is always in characters.
-+  if (!str1_isL) {
-+    sraiw(cnt1, cnt1, 1);
-+  }
-+  if (!str2_isL) {
-+    sraiw(cnt2, cnt2, 1);
++void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) {
++  *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value;
++}
++
++// Used by template based interpreter deoptimization
++void frame::interpreter_frame_set_last_sp(intptr_t* last_sp) {
++  *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = last_sp;
++}
++
++frame frame::sender_for_entry_frame(RegisterMap* map) const {
++  assert(map != NULL, "map must be set");
++  // Java frame called from C; skip all C frames and return top C
++  // frame of that chunk as the sender
++  JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
++  assert(!entry_frame_is_first(), "next Java fp must be non zero");
++  assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack");
++  // Since we are walking the stack now this nested anchor is obviously walkable
++  // even if it wasn't when it was stacked.
++  if (!jfa->walkable()) {
++    // Capture _last_Java_pc (if needed) and mark anchor walkable.
++    jfa->capture_last_Java_pc();
 +  }
++  map->clear();
++  assert(map->include_argument_oops(), "should be set by clear");
++  vmassert(jfa->last_Java_pc() != NULL, "not walkable");
++  frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
++  return fr;
++}
 +
-+  // Compute the minimum of the string lengths and save the difference in result.
-+  sub(result, cnt1, cnt2);
-+  bgt(cnt1, cnt2, L);
-+  mv(cnt2, cnt1);
-+  bind(L);
++//------------------------------------------------------------------------------
++// frame::verify_deopt_original_pc
++//
++// Verifies the calculated original PC of a deoptimization PC for the
++// given unextended SP.
++#ifdef ASSERT
++void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) {
++  frame fr;
 +
-+  // A very short string
-+  li(t0, minCharsInWord);
-+  ble(cnt2, t0, SHORT_STRING);
++  // This is ugly but it's better than to change {get,set}_original_pc
++  // to take an SP value as argument.  And it's only a debugging
++  // method anyway.
++  fr._unextended_sp = unextended_sp;
 +
-+  // Compare longwords
-+  // load first parts of strings and finish initialization while loading
-+  {
-+    if (str1_isL == str2_isL) { // LL or UU
-+      // load 8 bytes once to compare
-+      ld(tmp1, Address(str1));
-+      beq(str1, str2, DONE);
-+      ld(tmp2, Address(str2));
-+      li(t0, STUB_THRESHOLD);
-+      bge(cnt2, t0, STUB);
-+      sub(cnt2, cnt2, minCharsInWord);
-+      beqz(cnt2, TAIL_CHECK);
-+      // convert cnt2 from characters to bytes
-+      if (!str1_isL) {
-+        slli(cnt2, cnt2, 1);
++  assert_cond(nm != NULL);
++  address original_pc = nm->get_original_pc(&fr);
++  assert(nm->insts_contains_inclusive(original_pc),
++         "original PC must be in the main code section of the the compiled method (or must be immediately following it)");
++}
++#endif
++
++//------------------------------------------------------------------------------
++// frame::adjust_unextended_sp
++void frame::adjust_unextended_sp() {
++  // On riscv, sites calling method handle intrinsics and lambda forms are treated
++  // as any other call site. Therefore, no special action is needed when we are
++  // returning to any of these call sites.
++
++  if (_cb != NULL) {
++    CompiledMethod* sender_cm = _cb->as_compiled_method_or_null();
++    if (sender_cm != NULL) {
++      // If the sender PC is a deoptimization point, get the original PC.
++      if (sender_cm->is_deopt_entry(_pc) ||
++          sender_cm->is_deopt_mh_entry(_pc)) {
++        DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp));
 +      }
-+      add(str2, str2, cnt2);
-+      add(str1, str1, cnt2);
-+      sub(cnt2, zr, cnt2);
-+    } else if (isLU) { // LU case
-+      lwu(tmp1, Address(str1));
-+      ld(tmp2, Address(str2));
-+      li(t0, STUB_THRESHOLD);
-+      bge(cnt2, t0, STUB);
-+      addi(cnt2, cnt2, -4);
-+      add(str1, str1, cnt2);
-+      sub(cnt1, zr, cnt2);
-+      slli(cnt2, cnt2, 1);
-+      add(str2, str2, cnt2);
-+      inflate_lo32(tmp3, tmp1);
-+      mv(tmp1, tmp3);
-+      sub(cnt2, zr, cnt2);
-+      addi(cnt1, cnt1, 4);
-+    } else { // UL case
-+      ld(tmp1, Address(str1));
-+      lwu(tmp2, Address(str2));
-+      li(t0, STUB_THRESHOLD);
-+      bge(cnt2, t0, STUB);
-+      addi(cnt2, cnt2, -4);
-+      slli(t0, cnt2, 1);
-+      sub(cnt1, zr, t0);
-+      add(str1, str1, t0);
-+      add(str2, str2, cnt2);
-+      inflate_lo32(tmp3, tmp2);
-+      mv(tmp2, tmp3);
-+      sub(cnt2, zr, cnt2);
-+      addi(cnt1, cnt1, 8);
-+    }
-+    addi(cnt2, cnt2, isUL ? 4 : 8);
-+    bgez(cnt2, TAIL);
-+    xorr(tmp3, tmp1, tmp2);
-+    bnez(tmp3, DIFFERENCE);
-+
-+    // main loop
-+    bind(NEXT_WORD);
-+    if (str1_isL == str2_isL) { // LL or UU
-+      add(t0, str1, cnt2);
-+      ld(tmp1, Address(t0));
-+      add(t0, str2, cnt2);
-+      ld(tmp2, Address(t0));
-+      addi(cnt2, cnt2, 8);
-+    } else if (isLU) { // LU case
-+      add(t0, str1, cnt1);
-+      lwu(tmp1, Address(t0));
-+      add(t0, str2, cnt2);
-+      ld(tmp2, Address(t0));
-+      addi(cnt1, cnt1, 4);
-+      inflate_lo32(tmp3, tmp1);
-+      mv(tmp1, tmp3);
-+      addi(cnt2, cnt2, 8);
-+    } else { // UL case
-+      add(t0, str2, cnt2);
-+      lwu(tmp2, Address(t0));
-+      add(t0, str1, cnt1);
-+      ld(tmp1, Address(t0));
-+      inflate_lo32(tmp3, tmp2);
-+      mv(tmp2, tmp3);
-+      addi(cnt1, cnt1, 8);
-+      addi(cnt2, cnt2, 4);
-+    }
-+    bgez(cnt2, TAIL);
-+
-+    xorr(tmp3, tmp1, tmp2);
-+    beqz(tmp3, NEXT_WORD);
-+    j(DIFFERENCE);
-+    bind(TAIL);
-+    xorr(tmp3, tmp1, tmp2);
-+    bnez(tmp3, DIFFERENCE);
-+    // Last longword.  In the case where length == 4 we compare the
-+    // same longword twice, but that's still faster than another
-+    // conditional branch.
-+    if (str1_isL == str2_isL) { // LL or UU
-+      ld(tmp1, Address(str1));
-+      ld(tmp2, Address(str2));
-+    } else if (isLU) { // LU case
-+      lwu(tmp1, Address(str1));
-+      ld(tmp2, Address(str2));
-+      inflate_lo32(tmp3, tmp1);
-+      mv(tmp1, tmp3);
-+    } else { // UL case
-+      lwu(tmp2, Address(str2));
-+      ld(tmp1, Address(str1));
-+      inflate_lo32(tmp3, tmp2);
-+      mv(tmp2, tmp3);
-+    }
-+    bind(TAIL_CHECK);
-+    xorr(tmp3, tmp1, tmp2);
-+    beqz(tmp3, DONE);
-+
-+    // Find the first different characters in the longwords and
-+    // compute their difference.
-+    bind(DIFFERENCE);
-+    ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb
-+    srl(tmp1, tmp1, result);
-+    srl(tmp2, tmp2, result);
-+    if (isLL) {
-+      andi(tmp1, tmp1, 0xFF);
-+      andi(tmp2, tmp2, 0xFF);
-+    } else {
-+      andi(tmp1, tmp1, 0xFFFF);
-+      andi(tmp2, tmp2, 0xFFFF);
 +    }
-+    sub(result, tmp1, tmp2);
-+    j(DONE);
-+  }
-+
-+  bind(STUB);
-+  RuntimeAddress stub = NULL;
-+  switch (ae) {
-+    case StrIntrinsicNode::LL:
-+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL());
-+      break;
-+    case StrIntrinsicNode::UU:
-+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU());
-+      break;
-+    case StrIntrinsicNode::LU:
-+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU());
-+      break;
-+    case StrIntrinsicNode::UL:
-+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL());
-+      break;
-+    default:
-+      ShouldNotReachHere();
 +  }
-+  assert(stub.target() != NULL, "compare_long_string stub has not been generated");
-+  trampoline_call(stub);
-+  j(DONE);
-+
-+  bind(SHORT_STRING);
-+  // Is the minimum length zero?
-+  beqz(cnt2, DONE);
-+  // arrange code to do most branches while loading and loading next characters
-+  // while comparing previous
-+  (this->*str1_load_chr)(tmp1, Address(str1), t0);
-+  addi(str1, str1, str1_chr_size);
-+  addi(cnt2, cnt2, -1);
-+  beqz(cnt2, SHORT_LAST_INIT);
-+  (this->*str2_load_chr)(cnt1, Address(str2), t0);
-+  addi(str2, str2, str2_chr_size);
-+  j(SHORT_LOOP_START);
-+  bind(SHORT_LOOP);
-+  addi(cnt2, cnt2, -1);
-+  beqz(cnt2, SHORT_LAST);
-+  bind(SHORT_LOOP_START);
-+  (this->*str1_load_chr)(tmp2, Address(str1), t0);
-+  addi(str1, str1, str1_chr_size);
-+  (this->*str2_load_chr)(t0, Address(str2), t0);
-+  addi(str2, str2, str2_chr_size);
-+  bne(tmp1, cnt1, SHORT_LOOP_TAIL);
-+  addi(cnt2, cnt2, -1);
-+  beqz(cnt2, SHORT_LAST2);
-+  (this->*str1_load_chr)(tmp1, Address(str1), t0);
-+  addi(str1, str1, str1_chr_size);
-+  (this->*str2_load_chr)(cnt1, Address(str2), t0);
-+  addi(str2, str2, str2_chr_size);
-+  beq(tmp2, t0, SHORT_LOOP);
-+  sub(result, tmp2, t0);
-+  j(DONE);
-+  bind(SHORT_LOOP_TAIL);
-+  sub(result, tmp1, cnt1);
-+  j(DONE);
-+  bind(SHORT_LAST2);
-+  beq(tmp2, t0, DONE);
-+  sub(result, tmp2, t0);
-+
-+  j(DONE);
-+  bind(SHORT_LAST_INIT);
-+  (this->*str2_load_chr)(cnt1, Address(str2), t0);
-+  addi(str2, str2, str2_chr_size);
-+  bind(SHORT_LAST);
-+  beq(tmp1, cnt1, DONE);
-+  sub(result, tmp1, cnt1);
++}
 +
-+  bind(DONE);
++//------------------------------------------------------------------------------
++// frame::update_map_with_saved_link
++void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) {
++  // The interpreter and compiler(s) always save fp in a known
++  // location on entry. We must record where that location is
++  // so that if fp was live on callout from c2 we can find
++  // the saved copy no matter what it called.
 +
-+  BLOCK_COMMENT("} string_compare");
++  // Since the interpreter always saves fp if we record where it is then
++  // we don't have to always save fp on entry and exit to c2 compiled
++  // code, on entry will be enough.
++  assert(map != NULL, "map must be set");
++  map->set_location(::fp->as_VMReg(), (address) link_addr);
++  // this is weird "H" ought to be at a higher address however the
++  // oopMaps seems to have the "H" regs at the same address and the
++  // vanilla register.
++  map->set_location(::fp->as_VMReg()->next(), (address) link_addr);
 +}
 +
-+void C2_MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
-+                                      Register tmp4, Register tmp5, Register tmp6, Register result,
-+                                      Register cnt1, int elem_size) {
-+  Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR;
-+  Register tmp1 = t0;
-+  Register tmp2 = t1;
-+  Register cnt2 = tmp2;  // cnt2 only used in array length compare
-+  Register elem_per_word = tmp6;
-+  int log_elem_size = exact_log2(elem_size);
-+  int length_offset = arrayOopDesc::length_offset_in_bytes();
-+  int base_offset   = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
 +
-+  assert(elem_size == 1 || elem_size == 2, "must be char or byte");
-+  assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6);
-+  li(elem_per_word, wordSize / elem_size);
++//------------------------------------------------------------------------------
++// frame::sender_for_interpreter_frame
++frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
++  // SP is the raw SP from the sender after adapter or interpreter
++  // extension.
++  intptr_t* sender_sp = this->sender_sp();
 +
-+  BLOCK_COMMENT("arrays_equals {");
++  // This is the sp before any possible extension (adapter/locals).
++  intptr_t* unextended_sp = interpreter_frame_sender_sp();
 +
-+  // if (a1 == a2), return true
-+  beq(a1, a2, SAME);
++#ifdef COMPILER2
++  assert(map != NULL, "map must be set");
++  if (map->update_map()) {
++    update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset));
++  }
++#endif // COMPILER2
 +
-+  mv(result, false);
-+  beqz(a1, DONE);
-+  beqz(a2, DONE);
-+  lwu(cnt1, Address(a1, length_offset));
-+  lwu(cnt2, Address(a2, length_offset));
-+  bne(cnt2, cnt1, DONE);
-+  beqz(cnt1, SAME);
++  return frame(sender_sp, unextended_sp, link(), sender_pc());
++}
 +
-+  slli(tmp5, cnt1, 3 + log_elem_size);
-+  sub(tmp5, zr, tmp5);
-+  add(a1, a1, base_offset);
-+  add(a2, a2, base_offset);
-+  ld(tmp3, Address(a1, 0));
-+  ld(tmp4, Address(a2, 0));
-+  ble(cnt1, elem_per_word, SHORT); // short or same
 +
-+  // Main 16 byte comparison loop with 2 exits
-+  bind(NEXT_DWORD); {
-+    ld(tmp1, Address(a1, wordSize));
-+    ld(tmp2, Address(a2, wordSize));
-+    sub(cnt1, cnt1, 2 * wordSize / elem_size);
-+    blez(cnt1, TAIL);
-+    bne(tmp3, tmp4, DONE);
-+    ld(tmp3, Address(a1, 2 * wordSize));
-+    ld(tmp4, Address(a2, 2 * wordSize));
-+    add(a1, a1, 2 * wordSize);
-+    add(a2, a2, 2 * wordSize);
-+    ble(cnt1, elem_per_word, TAIL2);
-+  } beq(tmp1, tmp2, NEXT_DWORD);
-+  j(DONE);
++//------------------------------------------------------------------------------
++// frame::sender_for_compiled_frame
++frame frame::sender_for_compiled_frame(RegisterMap* map) const {
++  // we cannot rely upon the last fp having been saved to the thread
++  // in C2 code but it will have been pushed onto the stack. so we
++  // have to find it relative to the unextended sp
 +
-+  bind(TAIL);
-+  xorr(tmp4, tmp3, tmp4);
-+  xorr(tmp2, tmp1, tmp2);
-+  sll(tmp2, tmp2, tmp5);
-+  orr(tmp5, tmp4, tmp2);
-+  j(IS_TMP5_ZR);
++  assert(_cb->frame_size() >= 0, "must have non-zero frame size");
++  intptr_t* l_sender_sp = unextended_sp() + _cb->frame_size();
++  intptr_t* unextended_sp = l_sender_sp;
 +
-+  bind(TAIL2);
-+  bne(tmp1, tmp2, DONE);
++  // the return_address is always the word on the stack
++  address sender_pc = (address) *(l_sender_sp + frame::return_addr_offset);
 +
-+  bind(SHORT);
-+  xorr(tmp4, tmp3, tmp4);
-+  sll(tmp5, tmp4, tmp5);
++  intptr_t** saved_fp_addr = (intptr_t**) (l_sender_sp + frame::link_offset);
 +
-+  bind(IS_TMP5_ZR);
-+  bnez(tmp5, DONE);
++  assert(map != NULL, "map must be set");
++  if (map->update_map()) {
++    // Tell GC to use argument oopmaps for some runtime stubs that need it.
++    // For C1, the runtime stub might not have oop maps, so set this flag
++    // outside of update_register_map.
++    map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
++    if (_cb->oop_maps() != NULL) {
++      OopMapSet::update_register_map(this, map);
++    }
 +
-+  bind(SAME);
-+  mv(result, true);
-+  // That's it.
-+  bind(DONE);
++    // Since the prolog does the save and restore of FP there is no
++    // oopmap for it so we must fill in its location as if there was
++    // an oopmap entry since if our caller was compiled code there
++    // could be live jvm state in it.
++    update_map_with_saved_link(map, saved_fp_addr);
++  }
 +
-+  BLOCK_COMMENT("} array_equals");
++  return frame(l_sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
 +}
 +
-+// Compare Strings
++//------------------------------------------------------------------------------
++// frame::sender
++frame frame::sender(RegisterMap* map) const {
++  // Default is we done have to follow them. The sender_for_xxx will
++  // update it accordingly
++  assert(map != NULL, "map must be set");
++  map->set_include_argument_oops(false);
 +
-+// For Strings we're passed the address of the first characters in a1
-+// and a2 and the length in cnt1.
-+// elem_size is the element size in bytes: either 1 or 2.
-+// There are two implementations.  For arrays >= 8 bytes, all
-+// comparisons (including the final one, which may overlap) are
-+// performed 8 bytes at a time.  For strings < 8 bytes, we compare a
-+// halfword, then a short, and then a byte.
++  if (is_entry_frame()) {
++    return sender_for_entry_frame(map);
++  }
++  if (is_interpreted_frame()) {
++    return sender_for_interpreter_frame(map);
++  }
++  assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
 +
-+void C2_MacroAssembler::string_equals(Register a1, Register a2,
-+                                      Register result, Register cnt1, int elem_size)
-+{
-+  Label SAME, DONE, SHORT, NEXT_WORD;
-+  Register tmp1 = t0;
-+  Register tmp2 = t1;
++  // This test looks odd: why is it not is_compiled_frame() ?  That's
++  // because stubs also have OOP maps.
++  if (_cb != NULL) {
++    return sender_for_compiled_frame(map);
++  }
 +
-+  assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte");
-+  assert_different_registers(a1, a2, result, cnt1, t0, t1);
++  // Must be native-compiled frame, i.e. the marshaling code for native
++  // methods that exists in the core system.
++  return frame(sender_sp(), link(), sender_pc());
++}
 +
-+  BLOCK_COMMENT("string_equals {");
++bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
++  assert(is_interpreted_frame(), "Not an interpreted frame");
++  // These are reasonable sanity checks
++  if (fp() == NULL || (intptr_t(fp()) & (wordSize-1)) != 0) {
++    return false;
++  }
++  if (sp() == NULL || (intptr_t(sp()) & (wordSize-1)) != 0) {
++    return false;
++  }
++  if (fp() + interpreter_frame_initial_sp_offset < sp()) {
++    return false;
++  }
++  // These are hacks to keep us out of trouble.
++  // The problem with these is that they mask other problems
++  if (fp() <= sp()) {        // this attempts to deal with unsigned comparison above
++    return false;
++  }
 +
-+  mv(result, false);
++  // do some validation of frame elements
 +
-+  // Check for short strings, i.e. smaller than wordSize.
-+  sub(cnt1, cnt1, wordSize);
-+  bltz(cnt1, SHORT);
++  // first the method
++  Method* m = *interpreter_frame_method_addr();
++  // validate the method we'd find in this potential sender
++  if (!Method::is_valid_method(m)) {
++    return false;
++  }
 +
-+  // Main 8 byte comparison loop.
-+  bind(NEXT_WORD); {
-+    ld(tmp1, Address(a1, 0));
-+    add(a1, a1, wordSize);
-+    ld(tmp2, Address(a2, 0));
-+    add(a2, a2, wordSize);
-+    sub(cnt1, cnt1, wordSize);
-+    bne(tmp1, tmp2, DONE);
-+  } bgtz(cnt1, NEXT_WORD);
++  // stack frames shouldn't be much larger than max_stack elements
++  // this test requires the use of unextended_sp which is the sp as seen by
++  // the current frame, and not sp which is the "raw" pc which could point
++  // further because of local variables of the callee method inserted after
++  // method arguments
++  if (fp() - unextended_sp() > 1024 + m->max_stack()*Interpreter::stackElementSize) {
++    return false;
++  }
 +
-+  // Last longword.  In the case where length == 4 we compare the
-+  // same longword twice, but that's still faster than another
-+  // conditional branch.
-+  // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
-+  // length == 4.
-+  add(tmp1, a1, cnt1);
-+  ld(tmp1, Address(tmp1, 0));
-+  add(tmp2, a2, cnt1);
-+  ld(tmp2, Address(tmp2, 0));
-+  bne(tmp1, tmp2, DONE);
-+  j(SAME);
++  // validate bci/bcx
++  address bcp = interpreter_frame_bcp();
++  if (m->validate_bci_from_bcp(bcp) < 0) {
++    return false;
++  }
 +
-+  bind(SHORT);
-+  Label TAIL03, TAIL01;
++  // validate constantPoolCache*
++  ConstantPoolCache* cp = *interpreter_frame_cache_addr();
++  if (MetaspaceObj::is_valid(cp) == false) {
++    return false;
++  }
 +
-+  // 0-7 bytes left.
-+  andi(t0, cnt1, 4);
-+  beqz(t0, TAIL03);
-+  {
-+    lwu(tmp1, Address(a1, 0));
-+    add(a1, a1, 4);
-+    lwu(tmp2, Address(a2, 0));
-+    add(a2, a2, 4);
-+    bne(tmp1, tmp2, DONE);
++  // validate locals
++  address locals = (address) *interpreter_frame_locals_addr();
++  if (locals > thread->stack_base()) {
++    return false;
 +  }
 +
-+  bind(TAIL03);
-+  // 0-3 bytes left.
-+  andi(t0, cnt1, 2);
-+  beqz(t0, TAIL01);
-+  {
-+    lhu(tmp1, Address(a1, 0));
-+    add(a1, a1, 2);
-+    lhu(tmp2, Address(a2, 0));
-+    add(a2, a2, 2);
-+    bne(tmp1, tmp2, DONE);
++  if (m->max_locals() > 0 && locals < (address) fp()) {
++    // fp in interpreter frame on RISC-V is higher than that on AArch64,
++    // pointing to sender_sp and sender_sp-2 relatively.
++    // On RISC-V, if max_locals is 0, the 'locals' pointer may be below fp,
++    // pointing to sender_sp-1 (with one padding slot).
++    // So we verify the 'locals' pointer only if max_locals > 0.
++    return false;
 +  }
 +
-+  bind(TAIL01);
-+  if (elem_size == 1) { // Only needed when comparing 1-byte elements
-+    // 0-1 bytes left.
-+    andi(t0, cnt1, 1);
-+    beqz(t0, SAME);
-+    {
-+      lbu(tmp1, a1, 0);
-+      lbu(tmp2, a2, 0);
-+      bne(tmp1, tmp2, DONE);
++  // We'd have to be pretty unlucky to be mislead at this point
++  return true;
++}
++
++BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  Method* method = interpreter_frame_method();
++  BasicType type = method->result_type();
++
++  intptr_t* tos_addr = NULL;
++  if (method->is_native()) {
++    tos_addr = (intptr_t*)sp();
++    if (type == T_FLOAT || type == T_DOUBLE) {
++      // This is because we do a push(ltos) after push(dtos) in generate_native_entry.
++      tos_addr += 2 * Interpreter::stackElementWords;
 +    }
++  } else {
++    tos_addr = (intptr_t*)interpreter_frame_tos_address();
 +  }
 +
-+  // Arrays are equal.
-+  bind(SAME);
-+  mv(result, true);
++  switch (type) {
++    case T_OBJECT  :
++    case T_ARRAY   : {
++      oop obj;
++      if (method->is_native()) {
++        obj = cast_to_oop(at(interpreter_frame_oop_temp_offset));
++      } else {
++        oop* obj_p = (oop*)tos_addr;
++        obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
++      }
++      assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
++      *oop_result = obj;
++      break;
++    }
++    case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break;
++    case T_BYTE    : value_result->b = *(jbyte*)tos_addr; break;
++    case T_CHAR    : value_result->c = *(jchar*)tos_addr; break;
++    case T_SHORT   : value_result->s = *(jshort*)tos_addr; break;
++    case T_INT     : value_result->i = *(jint*)tos_addr; break;
++    case T_LONG    : value_result->j = *(jlong*)tos_addr; break;
++    case T_FLOAT   : {
++        value_result->f = *(jfloat*)tos_addr;
++      break;
++    }
++    case T_DOUBLE  : value_result->d = *(jdouble*)tos_addr; break;
++    case T_VOID    : /* Nothing to do */ break;
++    default        : ShouldNotReachHere();
++  }
 +
-+  // That's it.
-+  bind(DONE);
-+  BLOCK_COMMENT("} string_equals");
++  return type;
 +}
 +
-+typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far);
-+typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label,
-+                                                              bool is_far, bool is_unordered);
 +
-+static conditional_branch_insn conditional_branches[] =
-+{
-+  /* SHORT branches */
-+  (conditional_branch_insn)&Assembler::beq,
-+  (conditional_branch_insn)&Assembler::bgt,
-+  NULL, // BoolTest::overflow
-+  (conditional_branch_insn)&Assembler::blt,
-+  (conditional_branch_insn)&Assembler::bne,
-+  (conditional_branch_insn)&Assembler::ble,
-+  NULL, // BoolTest::no_overflow
-+  (conditional_branch_insn)&Assembler::bge,
++intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
++  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
++  return &interpreter_frame_tos_address()[index];
++}
 +
-+  /* UNSIGNED branches */
-+  (conditional_branch_insn)&Assembler::beq,
-+  (conditional_branch_insn)&Assembler::bgtu,
-+  NULL,
-+  (conditional_branch_insn)&Assembler::bltu,
-+  (conditional_branch_insn)&Assembler::bne,
-+  (conditional_branch_insn)&Assembler::bleu,
-+  NULL,
-+  (conditional_branch_insn)&Assembler::bgeu
-+};
++#ifndef PRODUCT
 +
-+static float_conditional_branch_insn float_conditional_branches[] =
-+{
-+  /* FLOAT SHORT branches */
-+  (float_conditional_branch_insn)&MacroAssembler::float_beq,
-+  (float_conditional_branch_insn)&MacroAssembler::float_bgt,
-+  NULL,  // BoolTest::overflow
-+  (float_conditional_branch_insn)&MacroAssembler::float_blt,
-+  (float_conditional_branch_insn)&MacroAssembler::float_bne,
-+  (float_conditional_branch_insn)&MacroAssembler::float_ble,
-+  NULL, // BoolTest::no_overflow
-+  (float_conditional_branch_insn)&MacroAssembler::float_bge,
-+
-+  /* DOUBLE SHORT branches */
-+  (float_conditional_branch_insn)&MacroAssembler::double_beq,
-+  (float_conditional_branch_insn)&MacroAssembler::double_bgt,
-+  NULL,
-+  (float_conditional_branch_insn)&MacroAssembler::double_blt,
-+  (float_conditional_branch_insn)&MacroAssembler::double_bne,
-+  (float_conditional_branch_insn)&MacroAssembler::double_ble,
-+  NULL,
-+  (float_conditional_branch_insn)&MacroAssembler::double_bge
-+};
-+
-+void C2_MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) {
-+  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])),
-+         "invalid conditional branch index");
-+  (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far);
-+}
-+
-+// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use
-+// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode().
-+void C2_MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) {
-+  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])),
-+         "invalid float conditional branch index");
-+  int booltest_flag = cmpFlag & ~(C2_MacroAssembler::double_branch_mask);
-+  (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far,
-+    (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true);
-+}
-+
-+void C2_MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
-+  switch (cmpFlag) {
-+    case BoolTest::eq:
-+    case BoolTest::le:
-+      beqz(op1, L, is_far);
-+      break;
-+    case BoolTest::ne:
-+    case BoolTest::gt:
-+      bnez(op1, L, is_far);
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
-+}
-+
-+void C2_MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
-+  switch (cmpFlag) {
-+    case BoolTest::eq:
-+      beqz(op1, L, is_far);
-+      break;
-+    case BoolTest::ne:
-+      bnez(op1, L, is_far);
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
-+}
-+
-+void C2_MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) {
-+  Label L;
-+  cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L);
-+  mv(dst, src);
-+  bind(L);
-+}
-+
-+// Set dst to NaN if any NaN input.
-+void C2_MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2,
-+                                  bool is_double, bool is_min) {
-+  assert_different_registers(dst, src1, src2);
-+
-+  Label Done;
-+  fsflags(zr);
-+  if (is_double) {
-+    is_min ? fmin_d(dst, src1, src2)
-+           : fmax_d(dst, src1, src2);
-+    // Checking NaNs
-+    flt_d(zr, src1, src2);
-+  } else {
-+    is_min ? fmin_s(dst, src1, src2)
-+           : fmax_s(dst, src1, src2);
-+    // Checking NaNs
-+    flt_s(zr, src1, src2);
-+  }
-+
-+  frflags(t0);
-+  beqz(t0, Done);
-+
-+  // In case of NaNs
-+  is_double ? fadd_d(dst, src1, src2)
-+            : fadd_s(dst, src1, src2);
-+
-+  bind(Done);
-+}
-+
-+void C2_MacroAssembler::element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2,
-+                                        VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE) {
-+  Label loop;
-+  Assembler::SEW sew = islatin ? Assembler::e8 : Assembler::e16;
-+
-+  bind(loop);
-+  vsetvli(tmp1, cnt, sew, Assembler::m2);
-+  vlex_v(vr1, a1, sew);
-+  vlex_v(vr2, a2, sew);
-+  vmsne_vv(vrs, vr1, vr2);
-+  vfirst_m(tmp2, vrs);
-+  bgez(tmp2, DONE);
-+  sub(cnt, cnt, tmp1);
-+  if (!islatin) {
-+    slli(tmp1, tmp1, 1); // get byte counts
-+  }
-+  add(a1, a1, tmp1);
-+  add(a2, a2, tmp1);
-+  bnez(cnt, loop);
-+
-+  mv(result, true);
-+}
-+
-+void C2_MacroAssembler::string_equals_v(Register a1, Register a2, Register result, Register cnt, int elem_size) {
-+  Label DONE;
-+  Register tmp1 = t0;
-+  Register tmp2 = t1;
-+
-+  BLOCK_COMMENT("string_equals_v {");
-+
-+  mv(result, false);
++#define DESCRIBE_FP_OFFSET(name) \
++  values.describe(frame_no, fp() + frame::name##_offset, #name)
 +
-+  if (elem_size == 2) {
-+    srli(cnt, cnt, 1);
++void frame::describe_pd(FrameValues& values, int frame_no) {
++  if (is_interpreted_frame()) {
++    DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_method);
++    DESCRIBE_FP_OFFSET(interpreter_frame_mdp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_mirror);
++    DESCRIBE_FP_OFFSET(interpreter_frame_cache);
++    DESCRIBE_FP_OFFSET(interpreter_frame_locals);
++    DESCRIBE_FP_OFFSET(interpreter_frame_bcp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
 +  }
-+
-+  element_compare(a1, a2, result, cnt, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE);
-+
-+  bind(DONE);
-+  BLOCK_COMMENT("} string_equals_v");
-+}
-+
-+// used by C2 ClearArray patterns.
-+// base: Address of a buffer to be zeroed
-+// cnt: Count in HeapWords
-+//
-+// base, cnt, v0, v1 and t0 are clobbered.
-+void C2_MacroAssembler::clear_array_v(Register base, Register cnt) {
-+  Label loop;
-+
-+  // making zero words
-+  vsetvli(t0, cnt, Assembler::e64, Assembler::m4);
-+  vxor_vv(v0, v0, v0);
-+
-+  bind(loop);
-+  vsetvli(t0, cnt, Assembler::e64, Assembler::m4);
-+  vse64_v(v0, base);
-+  sub(cnt, cnt, t0);
-+  shadd(base, t0, base, t0, 3);
-+  bnez(cnt, loop);
 +}
++#endif
 +
-+void C2_MacroAssembler::arrays_equals_v(Register a1, Register a2, Register result,
-+                                        Register cnt1, int elem_size) {
-+  Label DONE;
-+  Register tmp1 = t0;
-+  Register tmp2 = t1;
-+  Register cnt2 = tmp2;
-+  int length_offset = arrayOopDesc::length_offset_in_bytes();
-+  int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
-+
-+  BLOCK_COMMENT("arrays_equals_v {");
-+
-+  // if (a1 == a2), return true
-+  mv(result, true);
-+  beq(a1, a2, DONE);
-+
-+  mv(result, false);
-+  // if a1 == null or a2 == null, return false
-+  beqz(a1, DONE);
-+  beqz(a2, DONE);
-+  // if (a1.length != a2.length), return false
-+  lwu(cnt1, Address(a1, length_offset));
-+  lwu(cnt2, Address(a2, length_offset));
-+  bne(cnt1, cnt2, DONE);
-+
-+  la(a1, Address(a1, base_offset));
-+  la(a2, Address(a2, base_offset));
-+
-+  element_compare(a1, a2, result, cnt1, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE);
-+
-+  bind(DONE);
-+
-+  BLOCK_COMMENT("} arrays_equals_v");
++intptr_t *frame::initial_deoptimization_info() {
++  // Not used on riscv, but we must return something.
++  return NULL;
 +}
 +
-+void C2_MacroAssembler::string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2,
-+                                         Register result, Register tmp1, Register tmp2, int encForm) {
-+  Label DIFFERENCE, DONE, L, loop;
-+  bool encLL = encForm == StrIntrinsicNode::LL;
-+  bool encLU = encForm == StrIntrinsicNode::LU;
-+  bool encUL = encForm == StrIntrinsicNode::UL;
-+
-+  bool str1_isL = encLL || encLU;
-+  bool str2_isL = encLL || encUL;
-+
-+  int minCharsInWord = encLL ? wordSize : wordSize / 2;
-+
-+  BLOCK_COMMENT("string_compare {");
-+
-+  // for Lating strings, 1 byte for 1 character
-+  // for UTF16 strings, 2 bytes for 1 character
-+  if (!str1_isL)
-+    sraiw(cnt1, cnt1, 1);
-+  if (!str2_isL)
-+    sraiw(cnt2, cnt2, 1);
-+
-+  // if str1 == str2, return the difference
-+  // save the minimum of the string lengths in cnt2.
-+  sub(result, cnt1, cnt2);
-+  bgt(cnt1, cnt2, L);
-+  mv(cnt2, cnt1);
-+  bind(L);
-+
-+  if (str1_isL == str2_isL) { // LL or UU
-+    element_compare(str1, str2, zr, cnt2, tmp1, tmp2, v2, v4, v1, encLL, DIFFERENCE);
-+    j(DONE);
-+  } else { // LU or UL
-+    Register strL = encLU ? str1 : str2;
-+    Register strU = encLU ? str2 : str1;
-+    VectorRegister vstr1 = encLU ? v4 : v0;
-+    VectorRegister vstr2 = encLU ? v0 : v4;
-+
-+    bind(loop);
-+    vsetvli(tmp1, cnt2, Assembler::e8, Assembler::m2);
-+    vle8_v(vstr1, strL);
-+    vsetvli(tmp1, cnt2, Assembler::e16, Assembler::m4);
-+    vzext_vf2(vstr2, vstr1);
-+    vle16_v(vstr1, strU);
-+    vmsne_vv(v0, vstr2, vstr1);
-+    vfirst_m(tmp2, v0);
-+    bgez(tmp2, DIFFERENCE);
-+    sub(cnt2, cnt2, tmp1);
-+    add(strL, strL, tmp1);
-+    shadd(strU, tmp1, strU, tmp1, 1);
-+    bnez(cnt2, loop);
-+    j(DONE);
++intptr_t* frame::real_fp() const {
++  if (_cb != NULL) {
++    // use the frame size if valid
++    int size = _cb->frame_size();
++    if (size > 0) {
++      return unextended_sp() + size;
++    }
 +  }
-+  bind(DIFFERENCE);
-+  slli(tmp1, tmp2, 1);
-+  add(str1, str1, str1_isL ? tmp2 : tmp1);
-+  add(str2, str2, str2_isL ? tmp2 : tmp1);
-+  str1_isL ? lbu(tmp1, Address(str1, 0)) : lhu(tmp1, Address(str1, 0));
-+  str2_isL ? lbu(tmp2, Address(str2, 0)) : lhu(tmp2, Address(str2, 0));
-+  sub(result, tmp1, tmp2);
-+
-+  bind(DONE);
-+}
-+
-+void C2_MacroAssembler::byte_array_inflate_v(Register src, Register dst, Register len, Register tmp) {
-+  Label loop;
-+  assert_different_registers(src, dst, len, tmp, t0);
-+
-+  BLOCK_COMMENT("byte_array_inflate_v {");
-+  bind(loop);
-+  vsetvli(tmp, len, Assembler::e8, Assembler::m2);
-+  vle8_v(v2, src);
-+  vsetvli(t0, len, Assembler::e16, Assembler::m4);
-+  vzext_vf2(v0, v2);
-+  vse16_v(v0, dst);
-+  sub(len, len, tmp);
-+  add(src, src, tmp);
-+  shadd(dst, tmp, dst, tmp, 1);
-+  bnez(len, loop);
-+  BLOCK_COMMENT("} byte_array_inflate_v");
-+}
-+
-+// Compress char[] array to byte[].
-+// result: the array length if every element in array can be encoded; 0, otherwise.
-+void C2_MacroAssembler::char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp) {
-+  Label done;
-+  encode_iso_array_v(src, dst, len, result, tmp);
-+  beqz(len, done);
-+  mv(result, zr);
-+  bind(done);
-+}
-+
-+// result: the number of elements had been encoded.
-+void C2_MacroAssembler::encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp) {
-+  Label loop, DIFFERENCE, DONE;
-+
-+  BLOCK_COMMENT("encode_iso_array_v {");
-+  mv(result, 0);
-+
-+  bind(loop);
-+  mv(tmp, 0xff);
-+  vsetvli(t0, len, Assembler::e16, Assembler::m2);
-+  vle16_v(v2, src);
-+  // if element > 0xff, stop
-+  vmsgtu_vx(v1, v2, tmp);
-+  vfirst_m(tmp, v1);
-+  vmsbf_m(v0, v1);
-+  // compress char to byte
-+  vsetvli(t0, len, Assembler::e8);
-+  vncvt_x_x_w(v1, v2, Assembler::v0_t);
-+  vse8_v(v1, dst, Assembler::v0_t);
-+
-+  bgez(tmp, DIFFERENCE);
-+  add(result, result, t0);
-+  add(dst, dst, t0);
-+  sub(len, len, t0);
-+  shadd(src, t0, src, t0, 1);
-+  bnez(len, loop);
-+  j(DONE);
-+
-+  bind(DIFFERENCE);
-+  add(result, result, tmp);
-+
-+  bind(DONE);
-+  BLOCK_COMMENT("} encode_iso_array_v");
-+}
-+
-+void C2_MacroAssembler::count_positives_v(Register ary, Register len, Register result, Register tmp) {
-+  Label LOOP, SET_RESULT, DONE;
-+
-+  BLOCK_COMMENT("count_positives_v {");
-+  mv(result, zr);
-+
-+  bind(LOOP);
-+  vsetvli(t0, len, Assembler::e8, Assembler::m4);
-+  vle8_v(v0, ary);
-+  vmslt_vx(v0, v0, zr);
-+  vfirst_m(tmp, v0);
-+  bgez(tmp, SET_RESULT);
-+  // if tmp == -1, all bytes are positive
-+  add(result, result, t0);
-+
-+  sub(len, len, t0);
-+  add(ary, ary, t0);
-+  bnez(len, LOOP);
-+  j(DONE);
-+
-+  // add remaining positive bytes count
-+  bind(SET_RESULT);
-+  add(result, result, tmp);
-+
-+  bind(DONE);
-+  BLOCK_COMMENT("} count_positives_v");
++  // else rely on fp()
++  assert(!is_compiled_frame(), "unknown compiled frame size");
++  return fp();
 +}
 +
-+void C2_MacroAssembler::string_indexof_char_v(Register str1, Register cnt1,
-+                                              Register ch, Register result,
-+                                              Register tmp1, Register tmp2,
-+                                              bool isL) {
-+  mv(result, zr);
-+
-+  Label loop, MATCH, DONE;
-+  Assembler::SEW sew = isL ? Assembler::e8 : Assembler::e16;
-+  bind(loop);
-+  vsetvli(tmp1, cnt1, sew, Assembler::m4);
-+  vlex_v(v0, str1, sew);
-+  vmseq_vx(v0, v0, ch);
-+  vfirst_m(tmp2, v0);
-+  bgez(tmp2, MATCH); // if equal, return index
-+
-+  add(result, result, tmp1);
-+  sub(cnt1, cnt1, tmp1);
-+  if (!isL) slli(tmp1, tmp1, 1);
-+  add(str1, str1, tmp1);
-+  bnez(cnt1, loop);
-+
-+  mv(result, -1);
-+  j(DONE);
-+
-+  bind(MATCH);
-+  add(result, result, tmp2);
++#undef DESCRIBE_FP_OFFSET
 +
-+  bind(DONE);
++#ifndef PRODUCT
++// This is a generic constructor which is only used by pns() in debug.cpp.
++frame::frame(void* ptr_sp, void* ptr_fp, void* pc) {
++  init((intptr_t*)ptr_sp, (intptr_t*)ptr_fp, (address)pc);
 +}
 +
-+// Set dst to NaN if any NaN input.
-+void C2_MacroAssembler::minmax_FD_v(VectorRegister dst, VectorRegister src1, VectorRegister src2,
-+                                    bool is_double, bool is_min) {
-+  assert_different_registers(dst, src1, src2);
-+
-+  vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32);
-+
-+  is_min ? vfmin_vv(dst, src1, src2)
-+         : vfmax_vv(dst, src1, src2);
++void frame::pd_ps() {}
++#endif
 +
-+  vmfne_vv(v0,  src1, src1);
-+  vfadd_vv(dst, src1, src1, Assembler::v0_t);
-+  vmfne_vv(v0,  src2, src2);
-+  vfadd_vv(dst, src2, src2, Assembler::v0_t);
++void JavaFrameAnchor::make_walkable(JavaThread* thread) {
++  // last frame set?
++  if (last_Java_sp() == NULL) { return; }
++  // already walkable?
++  if (walkable()) { return; }
++  vmassert(Thread::current() == (Thread*)thread, "not current thread");
++  vmassert(last_Java_sp() != NULL, "not called from Java code?");
++  vmassert(last_Java_pc() == NULL, "already walkable");
++  capture_last_Java_pc();
++  vmassert(walkable(), "something went wrong");
 +}
 +
-+// Set dst to NaN if any NaN input.
-+void C2_MacroAssembler::reduce_minmax_FD_v(FloatRegister dst,
-+                                           FloatRegister src1, VectorRegister src2,
-+                                           VectorRegister tmp1, VectorRegister tmp2,
-+                                           bool is_double, bool is_min) {
-+  assert_different_registers(src2, tmp1, tmp2);
-+
-+  Label L_done, L_NaN;
-+  vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32);
-+  vfmv_s_f(tmp2, src1);
-+
-+  is_min ? vfredmin_vs(tmp1, src2, tmp2)
-+         : vfredmax_vs(tmp1, src2, tmp2);
-+
-+  fsflags(zr);
-+  // Checking NaNs
-+  vmflt_vf(tmp2, src2, src1);
-+  frflags(t0);
-+  bnez(t0, L_NaN);
-+  j(L_done);
-+
-+  bind(L_NaN);
-+  vfmv_s_f(tmp2, src1);
-+  vfredsum_vs(tmp1, src2, tmp2);
-+
-+  bind(L_done);
-+  vfmv_f_s(dst, tmp1);
++void JavaFrameAnchor::capture_last_Java_pc() {
++  vmassert(_last_Java_sp != NULL, "no last frame set");
++  vmassert(_last_Java_pc == NULL, "already walkable");
++  _last_Java_pc = (address)_last_Java_sp[-1];
 +}
-diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
+diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp
 new file mode 100644
-index 00000000000..c71df4c101b
+index 0000000000..18e021dcb9
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
-@@ -0,0 +1,193 @@
++++ b/src/hotspot/cpu/riscv/frame_riscv.hpp
+@@ -0,0 +1,199 @@
 +/*
-+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -13371,182 +12650,189 @@ index 00000000000..c71df4c101b
 + *
 + */
 +
-+#ifndef CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP
-+#define CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP
-+
-+// C2_MacroAssembler contains high-level macros for C2
-+
-+ private:
-+  void element_compare(Register r1, Register r2,
-+                       Register result, Register cnt,
-+                       Register tmp1, Register tmp2,
-+                       VectorRegister vr1, VectorRegister vr2,
-+                       VectorRegister vrs,
-+                       bool is_latin, Label& DONE);
-+ public:
++#ifndef CPU_RISCV_FRAME_RISCV_HPP
++#define CPU_RISCV_FRAME_RISCV_HPP
 +
-+  void string_compare(Register str1, Register str2,
-+                      Register cnt1, Register cnt2, Register result,
-+                      Register tmp1, Register tmp2, Register tmp3,
-+                      int ae);
++#include "runtime/synchronizer.hpp"
 +
-+  void string_indexof_char_short(Register str1, Register cnt1,
-+                                 Register ch, Register result,
-+                                 bool isL);
++// A frame represents a physical stack frame (an activation).  Frames can be
++// C or Java frames, and the Java frames can be interpreted or compiled.
++// In contrast, vframes represent source-level activations, so that one physical frame
++// can correspond to multiple source level frames because of inlining.
++// A frame is comprised of {pc, fp, sp}
++// ------------------------------ Asm interpreter ----------------------------------------
++// Layout of asm interpreter frame:
++//    [expression stack      ] * <- sp
 +
-+  void string_indexof_char(Register str1, Register cnt1,
-+                           Register ch, Register result,
-+                           Register tmp1, Register tmp2,
-+                           Register tmp3, Register tmp4,
-+                           bool isL);
++//    [monitors[0]           ]   \
++//     ...                        | monitor block size = k
++//    [monitors[k-1]         ]   /
++//    [frame initial esp     ] ( == &monitors[0], initially here)       initial_sp_offset
++//    [byte code index/pointr]                   = bcx()                bcx_offset
 +
-+  void string_indexof(Register str1, Register str2,
-+                      Register cnt1, Register cnt2,
-+                      Register tmp1, Register tmp2,
-+                      Register tmp3, Register tmp4,
-+                      Register tmp5, Register tmp6,
-+                      Register result, int ae);
++//    [pointer to locals     ]                   = locals()             locals_offset
++//    [constant pool cache   ]                   = cache()              cache_offset
 +
-+  void string_indexof_linearscan(Register haystack, Register needle,
-+                                 Register haystack_len, Register needle_len,
-+                                 Register tmp1, Register tmp2,
-+                                 Register tmp3, Register tmp4,
-+                                 int needle_con_cnt, Register result, int ae);
++//    [klass of method       ]                   = mirror()             mirror_offset
++//    [padding               ]
 +
-+  void arrays_equals(Register r1, Register r2,
-+                     Register tmp3, Register tmp4,
-+                     Register tmp5, Register tmp6,
-+                     Register result, Register cnt1,
-+                     int elem_size);
++//    [methodData            ]                   = mdp()                mdx_offset
++//    [Method                ]                   = method()             method_offset
 +
-+  void string_equals(Register r1, Register r2,
-+                     Register result, Register cnt1,
-+                     int elem_size);
++//    [last esp              ]                   = last_sp()            last_sp_offset
++//    [old stack pointer     ]                     (sender_sp)          sender_sp_offset
 +
-+  // refer to conditional_branches and float_conditional_branches
-+  static const int bool_test_bits = 3;
-+  static const int neg_cond_bits = 2;
-+  static const int unsigned_branch_mask = 1 << bool_test_bits;
-+  static const int double_branch_mask = 1 << bool_test_bits;
++//    [old frame pointer     ]
++//    [return pc             ]
 +
-+  // cmp
-+  void cmp_branch(int cmpFlag,
-+                  Register op1, Register op2,
-+                  Label& label, bool is_far = false);
++//    [last sp               ]   <- fp           = link()
++//    [oop temp              ]                     (only for native calls)
 +
-+  void float_cmp_branch(int cmpFlag,
-+                        FloatRegister op1, FloatRegister op2,
-+                        Label& label, bool is_far = false);
++//    [padding               ]                     (to preserve machine SP alignment)
++//    [locals and parameters ]
++//                               <- sender sp
++// ------------------------------ Asm interpreter ----------------------------------------
 +
-+  void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op,
-+                                    Label& L, bool is_far = false);
++// ------------------------------ C Frame ------------------------------------------------
++// Stack: gcc with -fno-omit-frame-pointer
++//                    .
++//                    .
++//       +->          .
++//       |   +-----------------+   |
++//       |   | return address  |   |
++//       |   |   previous fp ------+
++//       |   | saved registers |
++//       |   | local variables |
++//       |   |       ...       | <-+
++//       |   +-----------------+   |
++//       |   | return address  |   |
++//       +------ previous fp   |   |
++//           | saved registers |   |
++//           | local variables |   |
++//       +-> |       ...       |   |
++//       |   +-----------------+   |
++//       |   | return address  |   |
++//       |   |   previous fp ------+
++//       |   | saved registers |
++//       |   | local variables |
++//       |   |       ...       | <-+
++//       |   +-----------------+   |
++//       |   | return address  |   |
++//       +------ previous fp   |   |
++//           | saved registers |   |
++//           | local variables |   |
++//   $fp --> |       ...       |   |
++//           +-----------------+   |
++//           | return address  |   |
++//           |   previous fp ------+
++//           | saved registers |
++//   $sp --> | local variables |
++//           +-----------------+
++// ------------------------------ C Frame ------------------------------------------------
 +
-+  void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op,
-+                               Label& L, bool is_far = false);
++ public:
++  enum {
++    pc_return_offset                                 =  0,
++    // All frames
++    link_offset                                      = -2,
++    return_addr_offset                               = -1,
++    sender_sp_offset                                 =  0,
++    // Interpreter frames
++    interpreter_frame_oop_temp_offset                =  1, // for native calls only
 +
-+  void enc_cmove(int cmpFlag,
-+                 Register op1, Register op2,
-+                 Register dst, Register src);
++    interpreter_frame_sender_sp_offset               = -3,
++    // outgoing sp before a call to an invoked method
++    interpreter_frame_last_sp_offset                 = interpreter_frame_sender_sp_offset - 1,
++    interpreter_frame_method_offset                  = interpreter_frame_last_sp_offset - 1,
++    interpreter_frame_mdp_offset                     = interpreter_frame_method_offset - 1,
++    interpreter_frame_padding_offset                 = interpreter_frame_mdp_offset - 1,
++    interpreter_frame_mirror_offset                  = interpreter_frame_padding_offset - 1,
++    interpreter_frame_cache_offset                   = interpreter_frame_mirror_offset - 1,
++    interpreter_frame_locals_offset                  = interpreter_frame_cache_offset - 1,
++    interpreter_frame_bcp_offset                     = interpreter_frame_locals_offset - 1,
++    interpreter_frame_initial_sp_offset              = interpreter_frame_bcp_offset - 1,
 +
-+  void spill(Register r, bool is64, int offset) {
-+    is64 ? sd(r, Address(sp, offset))
-+         : sw(r, Address(sp, offset));
-+  }
++    interpreter_frame_monitor_block_top_offset       = interpreter_frame_initial_sp_offset,
++    interpreter_frame_monitor_block_bottom_offset    = interpreter_frame_initial_sp_offset,
 +
-+  void spill(FloatRegister f, bool is64, int offset) {
-+    is64 ? fsd(f, Address(sp, offset))
-+         : fsw(f, Address(sp, offset));
-+  }
++    // Entry frames
++    // n.b. these values are determined by the layout defined in
++    // stubGenerator for the Java call stub
++    entry_frame_after_call_words                     =  34,
++    entry_frame_call_wrapper_offset                  = -10,
 +
-+  void spill(VectorRegister v, int offset) {
-+    add(t0, sp, offset);
-+    vs1r_v(v, t0);
-+  }
++    // we don't need a save area
++    arg_reg_save_area_bytes                          =  0
++  };
 +
-+  void unspill(Register r, bool is64, int offset) {
-+    is64 ? ld(r, Address(sp, offset))
-+         : lw(r, Address(sp, offset));
++  intptr_t ptr_at(int offset) const {
++    return *ptr_at_addr(offset);
 +  }
 +
-+  void unspillu(Register r, bool is64, int offset) {
-+    is64 ? ld(r, Address(sp, offset))
-+         : lwu(r, Address(sp, offset));
++  void ptr_at_put(int offset, intptr_t value) {
++    *ptr_at_addr(offset) = value;
 +  }
 +
-+  void unspill(FloatRegister f, bool is64, int offset) {
-+    is64 ? fld(f, Address(sp, offset))
-+         : flw(f, Address(sp, offset));
-+  }
++ private:
++  // an additional field beyond _sp and _pc:
++  intptr_t*   _fp; // frame pointer
++  // The interpreter and adapters will extend the frame of the caller.
++  // Since oopMaps are based on the sp of the caller before extension
++  // we need to know that value. However in order to compute the address
++  // of the return address we need the real "raw" sp. Since sparc already
++  // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's
++  // original sp we use that convention.
 +
-+  void unspill(VectorRegister v, int offset) {
-+    add(t0, sp, offset);
-+    vl1r_v(v, t0);
-+  }
++  intptr_t*     _unextended_sp;
++  void adjust_unextended_sp();
 +
-+  void spill_copy_vector_stack_to_stack(int src_offset, int dst_offset, int vec_reg_size_in_bytes) {
-+    assert(vec_reg_size_in_bytes % 16 == 0, "unexpected vector reg size");
-+    unspill(v0, src_offset);
-+    spill(v0, dst_offset);
++  intptr_t* ptr_at_addr(int offset) const {
++    return (intptr_t*) addr_at(offset);
 +  }
 +
-+  void minmax_FD(FloatRegister dst,
-+                 FloatRegister src1, FloatRegister src2,
-+                 bool is_double, bool is_min);
++#ifdef ASSERT
++  // Used in frame::sender_for_{interpreter,compiled}_frame
++  static void verify_deopt_original_pc(   CompiledMethod* nm, intptr_t* unextended_sp);
++#endif
 +
-+  // intrinsic methods implemented by rvv instructions
-+  void string_equals_v(Register r1, Register r2,
-+                       Register result, Register cnt1,
-+                       int elem_size);
++ public:
++  // Constructors
 +
-+  void arrays_equals_v(Register r1, Register r2,
-+                       Register result, Register cnt1,
-+                       int elem_size);
++  frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc);
 +
-+  void string_compare_v(Register str1, Register str2,
-+                        Register cnt1, Register cnt2,
-+                        Register result,
-+                        Register tmp1, Register tmp2,
-+                        int encForm);
++  frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc);
 +
-+ void clear_array_v(Register base, Register cnt);
++  frame(intptr_t* ptr_sp, intptr_t* ptr_fp);
 +
-+ void byte_array_inflate_v(Register src, Register dst,
-+                           Register len, Register tmp);
++  void init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc);
 +
-+ void char_array_compress_v(Register src, Register dst,
-+                            Register len, Register result,
-+                            Register tmp);
++  // accessors for the instance variables
++  // Note: not necessarily the real 'frame pointer' (see real_fp)
++  intptr_t*   fp() const { return _fp; }
 +
-+ void encode_iso_array_v(Register src, Register dst,
-+                         Register len, Register result,
-+                         Register tmp);
++  inline address* sender_pc_addr() const;
 +
-+ void count_positives_v(Register ary, Register len,
-+                        Register result, Register tmp);
++  // expression stack tos if we are nested in a java call
++  intptr_t* interpreter_frame_last_sp() const;
 +
-+ void string_indexof_char_v(Register str1, Register cnt1,
-+                            Register ch, Register result,
-+                            Register tmp1, Register tmp2,
-+                            bool isL);
++  // helper to update a map with callee-saved RBP
++  static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr);
 +
-+ void minmax_FD_v(VectorRegister dst,
-+                  VectorRegister src1, VectorRegister src2,
-+                  bool is_double, bool is_min);
++  // deoptimization support
++  void interpreter_frame_set_last_sp(intptr_t* last_sp);
 +
-+ void reduce_minmax_FD_v(FloatRegister dst,
-+                         FloatRegister src1, VectorRegister src2,
-+                         VectorRegister tmp1, VectorRegister tmp2,
-+                         bool is_double, bool is_min);
++  static jint interpreter_frame_expression_stack_direction() { return -1; }
 +
-+#endif // CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
++#endif // CPU_RISCV_FRAME_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
 new file mode 100644
-index 00000000000..53a41665f4b
+index 0000000000..abd5bda7e4
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-@@ -0,0 +1,83 @@
++++ b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
+@@ -0,0 +1,245 @@
 +/*
-+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -13570,368 +12856,234 @@ index 00000000000..53a41665f4b
 + *
 + */
 +
-+#ifndef CPU_RISCV_C2_GLOBALS_RISCV_HPP
-+#define CPU_RISCV_C2_GLOBALS_RISCV_HPP
++#ifndef CPU_RISCV_FRAME_RISCV_INLINE_HPP
++#define CPU_RISCV_FRAME_RISCV_INLINE_HPP
 +
-+#include "utilities/globalDefinitions.hpp"
-+#include "utilities/macros.hpp"
++#include "code/codeCache.hpp"
++#include "code/vmreg.inline.hpp"
 +
-+// Sets the default values for platform dependent flags used by the server compiler.
-+// (see c2_globals.hpp).  Alpha-sorted.
++// Inline functions for RISCV frames:
 +
-+define_pd_global(bool, BackgroundCompilation,        true);
-+define_pd_global(bool, CICompileOSR,                 true);
-+define_pd_global(bool, InlineIntrinsics,             true);
-+define_pd_global(bool, PreferInterpreterNativeStubs, false);
-+define_pd_global(bool, ProfileTraps,                 true);
-+define_pd_global(bool, UseOnStackReplacement,        true);
-+define_pd_global(bool, ProfileInterpreter,           true);
-+define_pd_global(bool, TieredCompilation,            COMPILER1_PRESENT(true) NOT_COMPILER1(false));
-+define_pd_global(intx, CompileThreshold,             10000);
++// Constructors:
 +
-+define_pd_global(intx, OnStackReplacePercentage,     140);
-+define_pd_global(intx, ConditionalMoveLimit,         0);
-+define_pd_global(intx, FreqInlineSize,               325);
-+define_pd_global(intx, MinJumpTableSize,             10);
-+define_pd_global(intx, InteriorEntryAlignment,       16);
-+define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
-+define_pd_global(intx, LoopUnrollLimit,              60);
-+define_pd_global(intx, LoopPercentProfileLimit,      10);
-+// InitialCodeCacheSize derived from specjbb2000 run.
-+define_pd_global(intx, InitialCodeCacheSize,         2496*K); // Integral multiple of CodeCacheExpansionSize
-+define_pd_global(intx, CodeCacheExpansionSize,       64*K);
++inline frame::frame() {
++  _pc = NULL;
++  _sp = NULL;
++  _unextended_sp = NULL;
++  _fp = NULL;
++  _cb = NULL;
++  _deopt_state = unknown;
++}
 +
-+// Ergonomics related flags
-+define_pd_global(uint64_t,MaxRAM,                    128ULL*G);
-+define_pd_global(intx, RegisterCostAreaRatio,        16000);
++static int spin;
 +
-+// Peephole and CISC spilling both break the graph, and so makes the
-+// scheduler sick.
-+define_pd_global(bool, OptoPeephole,                 false);
-+define_pd_global(bool, UseCISCSpill,                 false);
-+define_pd_global(bool, OptoScheduling,               true);
-+define_pd_global(bool, OptoBundling,                 false);
-+define_pd_global(bool, OptoRegScheduling,            false);
-+define_pd_global(bool, SuperWordLoopUnrollAnalysis,  true);
-+define_pd_global(bool, IdealizeClearArrayNode,       true);
++inline void frame::init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) {
++  intptr_t a = intptr_t(ptr_sp);
++  intptr_t b = intptr_t(ptr_fp);
++  _sp = ptr_sp;
++  _unextended_sp = ptr_sp;
++  _fp = ptr_fp;
++  _pc = pc;
++  assert(pc != NULL, "no pc?");
++  _cb = CodeCache::find_blob(pc);
++  adjust_unextended_sp();
 +
-+define_pd_global(intx, ReservedCodeCacheSize,        48*M);
-+define_pd_global(intx, NonProfiledCodeHeapSize,      21*M);
-+define_pd_global(intx, ProfiledCodeHeapSize,         22*M);
-+define_pd_global(intx, NonNMethodCodeHeapSize,       5*M );
-+define_pd_global(uintx, CodeCacheMinBlockLength,     6);
-+define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
 +
-+// Ergonomics related flags
-+define_pd_global(bool, NeverActAsServerClassMachine, false);
++inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) {
++  init(ptr_sp, ptr_fp, pc);
++}
 +
-+define_pd_global(bool, TrapBasedRangeChecks,         false); // Not needed.
++inline frame::frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc) {
++  intptr_t a = intptr_t(ptr_sp);
++  intptr_t b = intptr_t(ptr_fp);
++  _sp = ptr_sp;
++  _unextended_sp = unextended_sp;
++  _fp = ptr_fp;
++  _pc = pc;
++  assert(pc != NULL, "no pc?");
++  _cb = CodeCache::find_blob(pc);
++  adjust_unextended_sp();
 +
-+#endif // CPU_RISCV_C2_GLOBALS_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/c2_init_riscv.cpp b/src/hotspot/cpu/riscv/c2_init_riscv.cpp
-new file mode 100644
-index 00000000000..cdbd69807be
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/c2_init_riscv.cpp
-@@ -0,0 +1,38 @@
-+/*
-+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    assert(_cb->as_compiled_method()->insts_contains_inclusive(_pc),
++           "original PC must be in the main code section of the the compiled method (or must be immediately following it)");
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
 +
-+#include "precompiled.hpp"
-+#include "opto/compile.hpp"
-+#include "opto/node.hpp"
++inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp) {
++  intptr_t a = intptr_t(ptr_sp);
++  intptr_t b = intptr_t(ptr_fp);
++  _sp = ptr_sp;
++  _unextended_sp = ptr_sp;
++  _fp = ptr_fp;
++  _pc = (address)(ptr_sp[-1]);
 +
-+// processor dependent initialization for riscv
++  // Here's a sticky one. This constructor can be called via AsyncGetCallTrace
++  // when last_Java_sp is non-null but the pc fetched is junk. If we are truly
++  // unlucky the junk value could be to a zombied method and we'll die on the
++  // find_blob call. This is also why we can have no asserts on the validity
++  // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
++  // -> pd_last_frame should use a specialized version of pd_last_frame which could
++  // call a specilaized frame constructor instead of this one.
++  // Then we could use the assert below. However this assert is of somewhat dubious
++  // value.
 +
-+extern void reg_mask_init();
++  _cb = CodeCache::find_blob(_pc);
++  adjust_unextended_sp();
 +
-+void Compile::pd_compiler2_init() {
-+  guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
-+  reg_mask_init();
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
 +}
-diff --git a/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp b/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp
-new file mode 100644
-index 00000000000..a90d9fdc160
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp
-@@ -0,0 +1,47 @@
-+/*
-+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.hpp"
-+#include "opto/compile.hpp"
-+#include "opto/node.hpp"
-+#include "opto/output.hpp"
-+#include "runtime/sharedRuntime.hpp"
 +
-+#define __ masm.
-+void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointPollStub* entry) const {
-+  assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
-+         "polling page return stub not created yet");
-+  address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
-+  RuntimeAddress callback_addr(stub);
++// Accessors
 +
-+  __ bind(entry->_stub_label);
-+  InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset);
-+  masm.code_section()->relocate(masm.pc(), safepoint_pc.rspec());
-+  __ la(t0, safepoint_pc.target());
-+  __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset()));
-+  __ far_jump(callback_addr);
++inline bool frame::equal(frame other) const {
++  bool ret =  sp() == other.sp() &&
++              unextended_sp() == other.unextended_sp() &&
++              fp() == other.fp() &&
++              pc() == other.pc();
++  assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction");
++  return ret;
 +}
-+#undef __
-diff --git a/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp
-new file mode 100644
-index 00000000000..14a68b45026
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp
-@@ -0,0 +1,36 @@
-+/*
-+ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
 +
-+#ifndef CPU_RISCV_CODEBUFFER_RISCV_HPP
-+#define CPU_RISCV_CODEBUFFER_RISCV_HPP
++// Return unique id for this frame. The id must have a value where we can distinguish
++// identity and younger/older relationship. NULL represents an invalid (incomparable)
++// frame.
++inline intptr_t* frame::id(void) const { return unextended_sp(); }
 +
-+private:
-+  void pd_initialize() {}
++// Return true if the frame is older (less recent activation) than the frame represented by id
++inline bool frame::is_older(intptr_t* id) const   { assert(this->id() != NULL && id != NULL, "NULL frame id");
++                                                    return this->id() > id ; }
 +
-+public:
-+  void flush_bundle(bool start_new_bundle) {}
++inline intptr_t* frame::link() const              { return (intptr_t*) *(intptr_t **)addr_at(link_offset); }
 +
-+#endif // CPU_RISCV_CODEBUFFER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-new file mode 100644
-index 00000000000..75bc4be7840
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-@@ -0,0 +1,149 @@
-+/*
-+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++inline intptr_t* frame::link_or_null() const {
++  intptr_t** ptr = (intptr_t **)addr_at(link_offset);
++  return os::is_readable_pointer(ptr) ? *ptr : NULL;
++}
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "code/compiledIC.hpp"
-+#include "code/icBuffer.hpp"
-+#include "code/nmethod.hpp"
-+#include "memory/resourceArea.hpp"
-+#include "runtime/mutexLocker.hpp"
-+#include "runtime/safepoint.hpp"
++inline intptr_t* frame::unextended_sp() const     { return _unextended_sp; }
 +
-+// ----------------------------------------------------------------------------
++// Return address
++inline address* frame::sender_pc_addr() const     { return (address*) addr_at(return_addr_offset); }
++inline address  frame::sender_pc() const          { return *sender_pc_addr(); }
++inline intptr_t* frame::sender_sp() const         { return addr_at(sender_sp_offset); }
 +
-+#define __ _masm.
-+address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
-+  precond(cbuf.stubs()->start() != badAddress);
-+  precond(cbuf.stubs()->end() != badAddress);
-+  // Stub is fixed up when the corresponding call is converted from
-+  // calling compiled code to calling interpreted code.
-+  // mv xmethod, 0
-+  // jalr -4 # to self
++inline intptr_t** frame::interpreter_frame_locals_addr() const {
++  return (intptr_t**)addr_at(interpreter_frame_locals_offset);
++}
 +
-+  if (mark == NULL) {
-+    mark = cbuf.insts_mark();  // Get mark within main instrs section.
-+  }
++inline intptr_t* frame::interpreter_frame_last_sp() const {
++  return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset);
++}
 +
-+  // Note that the code buffer's insts_mark is always relative to insts.
-+  // That's why we must use the macroassembler to generate a stub.
-+  MacroAssembler _masm(&cbuf);
++inline intptr_t* frame::interpreter_frame_bcp_addr() const {
++  return (intptr_t*)addr_at(interpreter_frame_bcp_offset);
++}
 +
-+  address base = __ start_a_stub(to_interp_stub_size());
-+  int offset = __ offset();
-+  if (base == NULL) {
-+    return NULL;  // CodeBuffer::expand failed
-+  }
-+  // static stub relocation stores the instruction address of the call
-+  __ relocate(static_stub_Relocation::spec(mark));
++inline intptr_t* frame::interpreter_frame_mdp_addr() const {
++  return (intptr_t*)addr_at(interpreter_frame_mdp_offset);
++}
 +
-+  __ emit_static_call_stub();
 +
-+  assert((__ offset() - offset) <= (int)to_interp_stub_size(), "stub too big");
-+  __ end_a_stub();
-+  return base;
-+}
-+#undef __
++// Constant pool cache
 +
-+int CompiledStaticCall::to_interp_stub_size() {
-+  // fence_i + fence* + (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr
-+  return NativeFenceI::instruction_size() + 12 * NativeInstruction::instruction_size;
++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
++  return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset);
 +}
 +
-+int CompiledStaticCall::to_trampoline_stub_size() {
-+  // Somewhat pessimistically, we count 4 instructions here (although
-+  // there are only 3) because we sometimes emit an alignment nop.
-+  // Trampoline stubs are always word aligned.
-+  return NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size;
-+}
++// Method
 +
-+// Relocation entries for call stub, compiled java to interpreter.
-+int CompiledStaticCall::reloc_to_interp_stub() {
-+  return 4; // 3 in emit_to_interp_stub + 1 in emit_call
++inline Method** frame::interpreter_frame_method_addr() const {
++  return (Method**)addr_at(interpreter_frame_method_offset);
 +}
 +
-+void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) {
-+  address stub = find_stub();
-+  guarantee(stub != NULL, "stub not found");
++// Mirror
 +
-+  if (TraceICs) {
-+    ResourceMark rm;
-+    tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
-+                  p2i(instruction_address()),
-+                  callee->name_and_sig_as_C_string());
++inline oop* frame::interpreter_frame_mirror_addr() const {
++  return (oop*)addr_at(interpreter_frame_mirror_offset);
++}
++
++// top of expression stack
++inline intptr_t* frame::interpreter_frame_tos_address() const {
++  intptr_t* last_sp = interpreter_frame_last_sp();
++  if (last_sp == NULL) {
++    return sp();
++  } else {
++    // sp() may have been extended or shrunk by an adapter.  At least
++    // check that we don't fall behind the legal region.
++    // For top deoptimized frame last_sp == interpreter_frame_monitor_end.
++    assert(last_sp <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos");
++    return last_sp;
 +  }
++}
 +
-+  // Creation also verifies the object.
-+  NativeMovConstReg* method_holder
-+    = nativeMovConstReg_at(stub + NativeFenceI::instruction_size());
-+#ifdef ASSERT
-+  NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address());
++inline oop* frame::interpreter_frame_temp_oop_addr() const {
++  return (oop *)(fp() + interpreter_frame_oop_temp_offset);
++}
 +
-+  verify_mt_safe(callee, entry, method_holder, jump);
-+#endif
-+  // Update stub.
-+  method_holder->set_data((intptr_t)callee());
-+  NativeGeneralJump::insert_unconditional(method_holder->next_instruction_address(), entry);
-+  ICache::invalidate_range(stub, to_interp_stub_size());
-+  // Update jump to call.
-+  set_destination_mt_safe(stub);
++inline int frame::interpreter_frame_monitor_size() {
++  return BasicObjectLock::size();
 +}
 +
-+void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
-+  // Reset stub.
-+  address stub = static_stub->addr();
-+  assert(stub != NULL, "stub not found");
-+  assert(CompiledICLocker::is_safe(stub), "mt unsafe call");
-+  // Creation also verifies the object.
-+  NativeMovConstReg* method_holder
-+    = nativeMovConstReg_at(stub + NativeFenceI::instruction_size());
-+  method_holder->set_data(0);
-+  NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
-+  jump->set_jump_destination((address)-1);
++
++// expression stack
++// (the max_stack arguments are used by the GC; see class FrameClosure)
++
++inline intptr_t* frame::interpreter_frame_expression_stack() const {
++  intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end();
++  return monitor_end-1;
 +}
 +
-+//-----------------------------------------------------------------------------
-+// Non-product mode code
-+#ifndef PRODUCT
 +
-+void CompiledDirectStaticCall::verify() {
-+  // Verify call.
-+  _call->verify();
-+  _call->verify_alignment();
++// Entry frames
 +
-+  // Verify stub.
-+  address stub = find_stub();
-+  assert(stub != NULL, "no stub found for static call");
-+  // Creation also verifies the object.
-+  NativeMovConstReg* method_holder
-+    = nativeMovConstReg_at(stub + NativeFenceI::instruction_size());
-+  NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
++inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
++ return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset);
++}
 +
-+  // Verify state.
-+  assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
++
++// Compiled frames
++inline oop frame::saved_oop_result(RegisterMap* map) const {
++  oop* result_adr = (oop *)map->location(x10->as_VMReg());
++  guarantee(result_adr != NULL, "bad register save location");
++  return (*result_adr);
 +}
 +
-+#endif // !PRODUCT
-diff --git a/src/hotspot/cpu/riscv/copy_riscv.hpp b/src/hotspot/cpu/riscv/copy_riscv.hpp
++inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
++  oop* result_adr = (oop *)map->location(x10->as_VMReg());
++  guarantee(result_adr != NULL, "bad register save location");
++  *result_adr = obj;
++}
++
++#endif // CPU_RISCV_FRAME_RISCV_INLINE_HPP
+diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
 new file mode 100644
-index 00000000000..bceadcc5dcc
+index 0000000000..e191cbcee2
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/copy_riscv.hpp
-@@ -0,0 +1,136 @@
++++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
+@@ -0,0 +1,481 @@
 +/*
-+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -13954,126 +13106,471 @@ index 00000000000..bceadcc5dcc
 + *
 + */
 +
-+#ifndef CPU_RISCV_COPY_RISCV_HPP
-+#define CPU_RISCV_COPY_RISCV_HPP
-+
-+#include OS_CPU_HEADER(copy)
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/g1/g1BarrierSet.hpp"
++#include "gc/g1/g1BarrierSetAssembler.hpp"
++#include "gc/g1/g1BarrierSetRuntime.hpp"
++#include "gc/g1/g1CardTable.hpp"
++#include "gc/g1/g1ThreadLocalData.hpp"
++#include "gc/g1/heapRegion.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/thread.hpp"
++#ifdef COMPILER1
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "gc/g1/c1/g1BarrierSetC1.hpp"
++#endif
 +
-+static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
-+  julong* to = (julong*) tohw;
-+  julong  v  = ((julong) value << 32) | value;
-+  while (count-- > 0) {
-+    *to++ = v;
-+  }
-+}
++#define __ masm->
 +
-+static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
-+  pd_fill_to_words(tohw, count, value);
-+}
++void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                            Register addr, Register count, RegSet saved_regs) {
++  assert_cond(masm != NULL);
++  bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
++  if (!dest_uninitialized) {
++    Label done;
++    Address in_progress(xthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
 +
-+static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
-+  (void)memset(to, value, count);
-+}
++    // Is marking active?
++    if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
++      __ lwu(t0, in_progress);
++    } else {
++      assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++      __ lbu(t0, in_progress);
++    }
++    __ beqz(t0, done);
 +
-+static void pd_zero_to_words(HeapWord* tohw, size_t count) {
-+  pd_fill_to_words(tohw, count, 0);
-+}
++    __ push_reg(saved_regs, sp);
++    if (count == c_rarg0) {
++      if (addr == c_rarg1) {
++        // exactly backwards!!
++        __ mv(t0, c_rarg0);
++        __ mv(c_rarg0, c_rarg1);
++        __ mv(c_rarg1, t0);
++      } else {
++        __ mv(c_rarg1, count);
++        __ mv(c_rarg0, addr);
++      }
++    } else {
++      __ mv(c_rarg0, addr);
++      __ mv(c_rarg1, count);
++    }
++    if (UseCompressedOops) {
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2);
++    } else {
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2);
++    }
++    __ pop_reg(saved_regs, sp);
 +
-+static void pd_zero_to_bytes(void* to, size_t count) {
-+  (void)memset(to, 0, count);
++    __ bind(done);
++  }
 +}
 +
-+static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
-+  (void)memmove(to, from, count * HeapWordSize);
++void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                             Register start, Register count, Register tmp, RegSet saved_regs) {
++  assert_cond(masm != NULL);
++  __ push_reg(saved_regs, sp);
++  assert_different_registers(start, count, tmp);
++  assert_different_registers(c_rarg0, count);
++  __ mv(c_rarg0, start);
++  __ mv(c_rarg1, count);
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2);
++  __ pop_reg(saved_regs, sp);
 +}
 +
-+static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
-+  switch (count) {
-+    case 8:  to[7] = from[7];   // fall through
-+    case 7:  to[6] = from[6];   // fall through
-+    case 6:  to[5] = from[5];   // fall through
-+    case 5:  to[4] = from[4];   // fall through
-+    case 4:  to[3] = from[3];   // fall through
-+    case 3:  to[2] = from[2];   // fall through
-+    case 2:  to[1] = from[1];   // fall through
-+    case 1:  to[0] = from[0];   // fall through
-+    case 0:  break;
-+    default:
-+      memcpy(to, from, count * HeapWordSize);
-+      break;
++void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
++                                                 Register obj,
++                                                 Register pre_val,
++                                                 Register thread,
++                                                 Register tmp,
++                                                 bool tosca_live,
++                                                 bool expand_call) {
++  // If expand_call is true then we expand the call_VM_leaf macro
++  // directly to skip generating the check by
++  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
++
++  assert_cond(masm != NULL);
++  assert(thread == xthread, "must be");
++
++  Label done;
++  Label runtime;
++
++  assert_different_registers(obj, pre_val, tmp, t0);
++  assert(pre_val != noreg &&  tmp != noreg, "expecting a register");
++
++  Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++  Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
++
++  // Is marking active?
++  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width
++    __ lwu(tmp, in_progress);
++  } else {
++    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++    __ lbu(tmp, in_progress);
 +  }
-+}
++  __ beqz(tmp, done);
 +
-+static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
-+  shared_disjoint_words_atomic(from, to, count);
-+}
++  // Do we need to load the previous value?
++  if (obj != noreg) {
++    __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
++  }
 +
-+static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
-+  pd_conjoint_words(from, to, count);
-+}
++  // Is the previous value null?
++  __ beqz(pre_val, done);
 +
-+static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
-+  pd_disjoint_words(from, to, count);
-+}
++  // Can we store original value in the thread's buffer?
++  // Is index == 0?
++  // (The index field is typed as size_t.)
 +
-+static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
-+  (void)memmove(to, from, count);
-+}
++  __ ld(tmp, index);                       // tmp := *index_adr
++  __ beqz(tmp, runtime);                   // tmp == 0?
++                                           // If yes, goto runtime
 +
-+static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
-+  pd_conjoint_bytes(from, to, count);
-+}
++  __ sub(tmp, tmp, wordSize);              // tmp := tmp - wordSize
++  __ sd(tmp, index);                       // *index_adr := tmp
++  __ ld(t0, buffer);
++  __ add(tmp, tmp, t0);                    // tmp := tmp + *buffer_adr
 +
-+static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
-+  _Copy_conjoint_jshorts_atomic(from, to, count);
-+}
++  // Record the previous value
++  __ sd(pre_val, Address(tmp, 0));
++  __ j(done);
++
++  __ bind(runtime);
++
++  __ push_call_clobbered_registers();
++  if (expand_call) {
++    assert(pre_val != c_rarg1, "smashed arg");
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++  } else {
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++  }
++  __ pop_call_clobbered_registers();
++
++  __ bind(done);
 +
-+static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
-+  _Copy_conjoint_jints_atomic(from, to, count);
 +}
 +
-+static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
-+  _Copy_conjoint_jlongs_atomic(from, to, count);
++void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
++                                                  Register store_addr,
++                                                  Register new_val,
++                                                  Register thread,
++                                                  Register tmp,
++                                                  Register tmp2) {
++  assert_cond(masm != NULL);
++  assert(thread == xthread, "must be");
++  assert_different_registers(store_addr, new_val, thread, tmp, tmp2,
++                             t0);
++  assert(store_addr != noreg && new_val != noreg && tmp != noreg &&
++         tmp2 != noreg, "expecting a register");
++
++  Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
++
++  BarrierSet* bs = BarrierSet::barrier_set();
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
++  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
++
++  Label done;
++  Label runtime;
++
++  // Does store cross heap regions?
++
++  __ xorr(tmp, store_addr, new_val);
++  __ srli(tmp, tmp, HeapRegion::LogOfHRGrainBytes);
++  __ beqz(tmp, done);
++
++  // crosses regions, storing NULL?
++
++  __ beqz(new_val, done);
++
++  // storing region crossing non-NULL, is card already dirty?
++
++  ExternalAddress cardtable((address) ct->byte_map_base());
++  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
++  const Register card_addr = tmp;
++
++  __ srli(card_addr, store_addr, CardTable::card_shift);
++
++  // get the address of the card
++  __ load_byte_map_base(tmp2);
++  __ add(card_addr, card_addr, tmp2);
++  __ lbu(tmp2, Address(card_addr));
++  __ mv(t0, (int)G1CardTable::g1_young_card_val());
++  __ beq(tmp2, t0, done);
++
++  assert((int)CardTable::dirty_card_val() == 0, "must be 0");
++
++  __ membar(MacroAssembler::StoreLoad);
++
++  __ lbu(tmp2, Address(card_addr));
++  __ beqz(tmp2, done);
++
++  // storing a region crossing, non-NULL oop, card is clean.
++  // dirty card and log.
++
++  __ sb(zr, Address(card_addr));
++
++  __ ld(t0, queue_index);
++  __ beqz(t0, runtime);
++  __ sub(t0, t0, wordSize);
++  __ sd(t0, queue_index);
++
++  __ ld(tmp2, buffer);
++  __ add(t0, tmp2, t0);
++  __ sd(card_addr, Address(t0, 0));
++  __ j(done);
++
++  __ bind(runtime);
++  // save the live input values
++  RegSet saved = RegSet::of(store_addr, new_val);
++  __ push_reg(saved, sp);
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
++  __ pop_reg(saved, sp);
++
++  __ bind(done);
 +}
 +
-+static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
-+  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size.");
-+  _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count);
++void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                    Register dst, Address src, Register tmp1, Register tmp_thread) {
++  assert_cond(masm != NULL);
++  bool on_oop = is_reference_type(type);
++  bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
++  bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
++  bool on_reference = on_weak || on_phantom;
++  ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
++  if (on_oop && on_reference) {
++    // RA is live.  It must be saved around calls.
++    __ enter(); // barrier may call runtime
++    // Generate the G1 pre-barrier code to log the value of
++    // the referent field in an SATB buffer.
++    g1_write_barrier_pre(masm /* masm */,
++                         noreg /* obj */,
++                         dst /* pre_val */,
++                         xthread /* thread */,
++                         tmp1 /* tmp */,
++                         true /* tosca_live */,
++                         true /* expand_call */);
++    __ leave();
++  }
 +}
 +
-+static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
-+  _Copy_arrayof_conjoint_bytes(from, to, count);
++void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                         Address dst, Register val, Register tmp1, Register tmp2) {
++  assert_cond(masm != NULL);
++  // flatten object address if needed
++  if (dst.offset() == 0) {
++    if (dst.base() != x13) {
++      __ mv(x13, dst.base());
++    }
++  } else {
++    __ la(x13, dst);
++  }
++
++  g1_write_barrier_pre(masm,
++                       x13 /* obj */,
++                       tmp2 /* pre_val */,
++                       xthread /* thread */,
++                       tmp1  /* tmp */,
++                       val != noreg /* tosca_live */,
++                       false /* expand_call */);
++
++  if (val == noreg) {
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg);
++  } else {
++    // G1 barrier needs uncompressed oop for region cross check.
++    Register new_val = val;
++    if (UseCompressedOops) {
++      new_val = t1;
++      __ mv(new_val, val);
++    }
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg);
++    g1_write_barrier_post(masm,
++                          x13 /* store_adr */,
++                          new_val /* new_val */,
++                          xthread /* thread */,
++                          tmp1 /* tmp */,
++                          tmp2 /* tmp2 */);
++  }
 +}
 +
-+static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
-+  _Copy_arrayof_conjoint_jshorts(from, to, count);
++#ifdef COMPILER1
++
++#undef __
++#define __ ce->masm()->
++
++void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) {
++  G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
++
++  // At this point we know that marking is in progress.
++  // If do_load() is true then we have to emit the
++  // load of the previous value; otherwise it has already
++  // been loaded into _pre_val.
++  __ bind(*stub->entry());
++
++  assert(stub->pre_val()->is_register(), "Precondition.");
++
++  Register pre_val_reg = stub->pre_val()->as_register();
++
++  if (stub->do_load()) {
++    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/);
++  }
++  __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
++  ce->store_parameter(stub->pre_val()->as_register(), 0);
++  __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
++  __ j(*stub->continuation());
 +}
 +
-+static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
-+  _Copy_arrayof_conjoint_jints(from, to, count);
++void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
++  G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
++  __ bind(*stub->entry());
++  assert(stub->addr()->is_register(), "Precondition");
++  assert(stub->new_val()->is_register(), "Precondition");
++  Register new_val_reg = stub->new_val()->as_register();
++  __ beqz(new_val_reg, *stub->continuation(), /* is_far */ true);
++  ce->store_parameter(stub->addr()->as_pointer_register(), 0);
++  __ far_call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin()));
++  __ j(*stub->continuation());
 +}
 +
-+static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
-+  _Copy_arrayof_conjoint_jlongs(from, to, count);
++#undef __
++
++#define __ sasm->
++
++void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
++  __ prologue("g1_pre_barrier", false);
++
++  BarrierSet* bs = BarrierSet::barrier_set();
++
++  // arg0 : previous value of memory
++  const Register pre_val = x10;
++  const Register thread = xthread;
++  const Register tmp = t0;
++
++  Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++  Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
++
++  Label done;
++  Label runtime;
++
++  // Is marking still active?
++  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {  // 4-byte width
++    __ lwu(tmp, in_progress);
++  } else {
++    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++    __ lbu(tmp, in_progress);
++  }
++  __ beqz(tmp, done);
++
++  // Can we store original value in the thread's buffer?
++  __ ld(tmp, queue_index);
++  __ beqz(tmp, runtime);
++
++  __ sub(tmp, tmp, wordSize);
++  __ sd(tmp, queue_index);
++  __ ld(t1, buffer);
++  __ add(tmp, tmp, t1);
++  __ load_parameter(0, t1);
++  __ sd(t1, Address(tmp, 0));
++  __ j(done);
++
++  __ bind(runtime);
++  __ push_call_clobbered_registers();
++  __ load_parameter(0, pre_val);
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++  __ pop_call_clobbered_registers();
++  __ bind(done);
++
++  __ epilogue();
 +}
 +
-+static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
-+  assert(!UseCompressedOops, "foo!");
-+  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
-+  _Copy_arrayof_conjoint_jlongs(from, to, count);
++void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
++  __ prologue("g1_post_barrier", false);
++
++  // arg0 : store_address
++  Address store_addr(fp, 2 * BytesPerWord); // 2 BytesPerWord from fp
++
++  BarrierSet* bs = BarrierSet::barrier_set();
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
++  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
++
++  Label done;
++  Label runtime;
++
++  // At this point we know new_value is non-NULL and the new_value crosses regions.
++  // Must check to see if card is already dirty
++  const Register thread = xthread;
++
++  Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
++
++  const Register card_offset = t1;
++  // RA is free here, so we can use it to hold the byte_map_base.
++  const Register byte_map_base = ra;
++
++  assert_different_registers(card_offset, byte_map_base, t0);
++
++  __ load_parameter(0, card_offset);
++  __ srli(card_offset, card_offset, CardTable::card_shift);
++  __ load_byte_map_base(byte_map_base);
++
++  // Convert card offset into an address in card_addr
++  Register card_addr = card_offset;
++  __ add(card_addr, byte_map_base, card_addr);
++
++  __ lbu(t0, Address(card_addr, 0));
++  __ sub(t0, t0, (int)G1CardTable::g1_young_card_val());
++  __ beqz(t0, done);
++
++  assert((int)CardTable::dirty_card_val() == 0, "must be 0");
++
++  __ membar(MacroAssembler::StoreLoad);
++  __ lbu(t0, Address(card_addr, 0));
++  __ beqz(t0, done);
++
++  // storing region crossing non-NULL, card is clean.
++  // dirty card and log.
++  __ sb(zr, Address(card_addr, 0));
++
++  __ ld(t0, queue_index);
++  __ beqz(t0, runtime);
++  __ sub(t0, t0, wordSize);
++  __ sd(t0, queue_index);
++
++  // Reuse RA to hold buffer_addr
++  const Register buffer_addr = ra;
++
++  __ ld(buffer_addr, buffer);
++  __ add(t0, buffer_addr, t0);
++  __ sd(card_addr, Address(t0, 0));
++  __ j(done);
++
++  __ bind(runtime);
++  __ push_call_clobbered_registers();
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
++  __ pop_call_clobbered_registers();
++  __ bind(done);
++  __ epilogue();
 +}
 +
-+#endif // CPU_RISCV_COPY_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/disassembler_riscv.hpp b/src/hotspot/cpu/riscv/disassembler_riscv.hpp
++#undef __
++
++#endif // COMPILER1
+diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
 new file mode 100644
-index 00000000000..b0e5560c906
+index 0000000000..37bc183f39
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/disassembler_riscv.hpp
-@@ -0,0 +1,58 @@
++++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
+@@ -0,0 +1,78 @@
 +/*
-+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -14096,47 +13593,68 @@ index 00000000000..b0e5560c906
 + *
 + */
 +
-+#ifndef CPU_RISCV_DISASSEMBLER_RISCV_HPP
-+#define CPU_RISCV_DISASSEMBLER_RISCV_HPP
++#ifndef CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
++#define CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
 +
-+static int pd_instruction_alignment() {
-+  return 1;
-+}
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
++#include "utilities/macros.hpp"
 +
-+static const char* pd_cpu_opts() {
-+  return "";
-+}
++#ifdef COMPILER1
++class LIR_Assembler;
++#endif
++class StubAssembler;
++class G1PreBarrierStub;
++class G1PostBarrierStub;
 +
-+// Returns address of n-th instruction preceding addr,
-+// NULL if no preceding instruction can be found.
-+// On riscv, we assume a constant instruction length.
-+// It might be beneficial to check "is_readable" as we do on ppc and s390.
-+static address find_prev_instr(address addr, int n_instr) {
-+  return addr - Assembler::instruction_size * n_instr;
-+}
++class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
++protected:
++  void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                       Register addr, Register count, RegSet saved_regs);
++  void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                        Register start, Register count, Register tmp, RegSet saved_regs);
 +
-+// special-case instruction decoding.
-+// There may be cases where the binutils disassembler doesn't do
-+// the perfect job. In those cases, decode_instruction0 may kick in
-+// and do it right.
-+// If nothing had to be done, just return "here", otherwise return "here + instr_len(here)"
-+static address decode_instruction0(address here, outputStream* st, address virtual_begin = NULL) {
-+  return here;
-+}
++  void g1_write_barrier_pre(MacroAssembler* masm,
++                            Register obj,
++                            Register pre_val,
++                            Register thread,
++                            Register tmp,
++                            bool tosca_live,
++                            bool expand_call);
++
++  void g1_write_barrier_post(MacroAssembler* masm,
++                             Register store_addr,
++                             Register new_val,
++                             Register thread,
++                             Register tmp,
++                             Register tmp2);
 +
-+// platform-specific instruction annotations (like value of loaded constants)
-+static void annotate(address pc, outputStream* st) {}
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2);
 +
-+#endif // CPU_RISCV_DISASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp b/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp
++public:
++#ifdef COMPILER1
++  void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
++  void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
++
++  void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
++  void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
++#endif
++
++  void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++               Register dst, Address src, Register tmp1, Register tmp_thread);
++};
++
++#endif // CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp
 new file mode 100644
-index 00000000000..5c700be9c91
+index 0000000000..8735fd014f
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp
-@@ -0,0 +1,44 @@
++++ b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp
+@@ -0,0 +1,31 @@
 +/*
-+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -14159,73 +13677,21 @@ index 00000000000..5c700be9c91
 + *
 + */
 +
-+#include "precompiled.hpp"
-+#include "prims/foreign_globals.hpp"
-+#include "utilities/debug.hpp"
-+
-+// Stubbed out, implement later
-+const ABIDescriptor ForeignGlobals::parse_abi_descriptor_impl(jobject jabi) const {
-+  Unimplemented();
-+  return {};
-+}
++#ifndef CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP
++#define CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP
 +
-+const BufferLayout ForeignGlobals::parse_buffer_layout_impl(jobject jlayout) const {
-+  Unimplemented();
-+  return {};
-+}
++const size_t G1MergeHeapRootsPrefetchCacheSize = 16;
 +
-+const CallRegs ForeignGlobals::parse_call_regs_impl(jobject jconv) const {
-+  ShouldNotCallThis();
-+  return {};
-+}
-diff --git a/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp b/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp
++#endif // CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
 new file mode 100644
-index 00000000000..3ac89752c27
+index 0000000000..2b556b95d7
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp
-@@ -0,0 +1,32 @@
++++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
+@@ -0,0 +1,231 @@
 +/*
-+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#ifndef CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP
-+#define CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP
-+
-+class ABIDescriptor {};
-+class BufferLayout {};
-+
-+#endif // CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
-new file mode 100644
-index 00000000000..6e38960598a
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
-@@ -0,0 +1,697 @@
-+/*
-+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -14249,1147 +13715,1447 @@ index 00000000000..6e38960598a
 + */
 +
 +#include "precompiled.hpp"
-+#include "compiler/oopMap.hpp"
-+#include "interpreter/interpreter.hpp"
-+#include "memory/resourceArea.hpp"
++#include "classfile/classLoaderData.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "interpreter/interp_masm.hpp"
 +#include "memory/universe.hpp"
-+#include "oops/markWord.hpp"
-+#include "oops/method.hpp"
-+#include "oops/oop.inline.hpp"
-+#include "prims/methodHandles.hpp"
-+#include "runtime/frame.inline.hpp"
-+#include "runtime/handles.inline.hpp"
-+#include "runtime/javaCalls.hpp"
-+#include "runtime/monitorChunk.hpp"
-+#include "runtime/os.inline.hpp"
-+#include "runtime/signature.hpp"
-+#include "runtime/stackWatermarkSet.hpp"
-+#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/jniHandles.hpp"
++#include "runtime/sharedRuntime.hpp"
 +#include "runtime/stubRoutines.hpp"
-+#include "vmreg_riscv.inline.hpp"
-+#ifdef COMPILER1
-+#include "c1/c1_Runtime1.hpp"
-+#include "runtime/vframeArray.hpp"
-+#endif
++#include "runtime/thread.hpp"
 +
-+#ifdef ASSERT
-+void RegisterMap::check_location_valid() {
-+}
-+#endif
++#define __ masm->
 +
++void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                  Register dst, Address src, Register tmp1, Register tmp_thread) {
++  assert_cond(masm != NULL);
 +
-+// Profiling/safepoint support
++  // RA is live. It must be saved around calls.
 +
-+bool frame::safe_for_sender(JavaThread *thread) {
-+  address   addr_sp = (address)_sp;
-+  address   addr_fp = (address)_fp;
-+  address   unextended_sp = (address)_unextended_sp;
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool in_native = (decorators & IN_NATIVE) != 0;
++  bool is_not_null = (decorators & IS_NOT_NULL) != 0;
++  switch (type) {
++    case T_OBJECT:  // fall through
++    case T_ARRAY: {
++      if (in_heap) {
++        if (UseCompressedOops) {
++          __ lwu(dst, src);
++          if (is_not_null) {
++            __ decode_heap_oop_not_null(dst);
++          } else {
++            __ decode_heap_oop(dst);
++          }
++        } else {
++          __ ld(dst, src);
++        }
++      } else {
++        assert(in_native, "why else?");
++        __ ld(dst, src);
++      }
++      break;
++    }
++    case T_BOOLEAN: __ load_unsigned_byte (dst, src); break;
++    case T_BYTE:    __ load_signed_byte   (dst, src); break;
++    case T_CHAR:    __ load_unsigned_short(dst, src); break;
++    case T_SHORT:   __ load_signed_short  (dst, src); break;
++    case T_INT:     __ lw                 (dst, src); break;
++    case T_LONG:    __ ld                 (dst, src); break;
++    case T_ADDRESS: __ ld                 (dst, src); break;
++    case T_FLOAT:   __ flw                (f10, src); break;
++    case T_DOUBLE:  __ fld                (f10, src); break;
++    default: Unimplemented();
++  }
++}
 +
-+  // consider stack guards when trying to determine "safe" stack pointers
-+  // sp must be within the usable part of the stack (not in guards)
-+  if (!thread->is_in_usable_stack(addr_sp)) {
-+    return false;
++void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                   Address dst, Register val, Register tmp1, Register tmp2) {
++  assert_cond(masm != NULL);
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool in_native = (decorators & IN_NATIVE) != 0;
++  switch (type) {
++    case T_OBJECT: // fall through
++    case T_ARRAY: {
++      val = val == noreg ? zr : val;
++      if (in_heap) {
++        if (UseCompressedOops) {
++          assert(!dst.uses(val), "not enough registers");
++          if (val != zr) {
++            __ encode_heap_oop(val);
++          }
++          __ sw(val, dst);
++        } else {
++          __ sd(val, dst);
++        }
++      } else {
++        assert(in_native, "why else?");
++        __ sd(val, dst);
++      }
++      break;
++    }
++    case T_BOOLEAN:
++      __ andi(val, val, 0x1);  // boolean is true if LSB is 1
++      __ sb(val, dst);
++      break;
++    case T_BYTE:    __ sb(val, dst); break;
++    case T_CHAR:    __ sh(val, dst); break;
++    case T_SHORT:   __ sh(val, dst); break;
++    case T_INT:     __ sw(val, dst); break;
++    case T_LONG:    __ sd(val, dst); break;
++    case T_ADDRESS: __ sd(val, dst); break;
++    case T_FLOAT:   __ fsw(f10,  dst); break;
++    case T_DOUBLE:  __ fsd(f10,  dst); break;
++    default: Unimplemented();
 +  }
 +
-+  // When we are running interpreted code the machine stack pointer, SP, is
-+  // set low enough so that the Java expression stack can grow and shrink
-+  // without ever exceeding the machine stack bounds.  So, ESP >= SP.
++}
 +
-+  // When we call out of an interpreted method, SP is incremented so that
-+  // the space between SP and ESP is removed.  The SP saved in the callee's
-+  // frame is the SP *before* this increment.  So, when we walk a stack of
-+  // interpreter frames the sender's SP saved in a frame might be less than
-+  // the SP at the point of call.
++void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                                        Register obj, Register tmp, Label& slowpath) {
++  assert_cond(masm != NULL);
++  // If mask changes we need to ensure that the inverse is still encodable as an immediate
++  STATIC_ASSERT(JNIHandles::weak_tag_mask == 1);
++  __ andi(obj, obj, ~JNIHandles::weak_tag_mask);
++  __ ld(obj, Address(obj, 0));             // *obj
++}
 +
-+  // So unextended sp must be within the stack but we need not to check
-+  // that unextended sp >= sp
++// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
++void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj,
++                                        Register var_size_in_bytes,
++                                        int con_size_in_bytes,
++                                        Register tmp1,
++                                        Register tmp2,
++                                        Label& slow_case,
++                                        bool is_far) {
++  assert_cond(masm != NULL);
++  assert_different_registers(obj, tmp2);
++  assert_different_registers(obj, var_size_in_bytes);
++  Register end = tmp2;
 +
-+  if (!thread->is_in_full_stack_checked(unextended_sp)) {
-+    return false;
++  __ ld(obj, Address(xthread, JavaThread::tlab_top_offset()));
++  if (var_size_in_bytes == noreg) {
++    __ la(end, Address(obj, con_size_in_bytes));
++  } else {
++    __ add(end, obj, var_size_in_bytes);
 +  }
++  __ ld(t0, Address(xthread, JavaThread::tlab_end_offset()));
++  __ bgtu(end, t0, slow_case, is_far);
 +
-+  // an fp must be within the stack and above (but not equal) sp
-+  // second evaluation on fp+ is added to handle situation where fp is -1
-+  bool fp_safe = thread->is_in_stack_range_excl(addr_fp, addr_sp) &&
-+                 thread->is_in_full_stack_checked(addr_fp + (return_addr_offset * sizeof(void*)));
-+
-+  // We know sp/unextended_sp are safe only fp is questionable here
-+
-+  // If the current frame is known to the code cache then we can attempt to
-+  // to construct the sender and do some validation of it. This goes a long way
-+  // toward eliminating issues when we get in frame construction code
++  // update the tlab top pointer
++  __ sd(end, Address(xthread, JavaThread::tlab_top_offset()));
 +
-+  if (_cb != NULL) {
++  // recover var_size_in_bytes if necessary
++  if (var_size_in_bytes == end) {
++    __ sub(var_size_in_bytes, var_size_in_bytes, obj);
++  }
++}
 +
-+    // First check if frame is complete and tester is reliable
-+    // Unfortunately we can only check frame complete for runtime stubs and nmethod
-+    // other generic buffer blobs are more problematic so we just assume they are
-+    // ok. adapter blobs never have a frame complete and are never ok.
++// Defines obj, preserves var_size_in_bytes
++void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj,
++                                        Register var_size_in_bytes,
++                                        int con_size_in_bytes,
++                                        Register tmp1,
++                                        Label& slow_case,
++                                        bool is_far) {
++  assert_cond(masm != NULL);
++  assert_different_registers(obj, var_size_in_bytes, tmp1);
++  if (!Universe::heap()->supports_inline_contig_alloc()) {
++    __ j(slow_case);
++  } else {
++    Register end = tmp1;
++    Label retry;
++    __ bind(retry);
 +
-+    if (!_cb->is_frame_complete_at(_pc)) {
-+      if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) {
-+        return false;
-+      }
++    // Get the current end of the heap
++    ExternalAddress address_end((address) Universe::heap()->end_addr());
++    {
++      int32_t offset;
++      __ la_patchable(t1, address_end, offset);
++      __ ld(t1, Address(t1, offset));
 +    }
 +
-+    // Could just be some random pointer within the codeBlob
-+    if (!_cb->code_contains(_pc)) {
-+      return false;
++    // Get the current top of the heap
++    ExternalAddress address_top((address) Universe::heap()->top_addr());
++    {
++      int32_t offset;
++      __ la_patchable(t0, address_top, offset);
++      __ addi(t0, t0, offset);
++      __ lr_d(obj, t0, Assembler::aqrl);
 +    }
 +
-+    // Entry frame checks
-+    if (is_entry_frame()) {
-+      // an entry frame must have a valid fp.
-+      return fp_safe && is_entry_frame_valid(thread);
++    // Adjust it my the size of our new object
++    if (var_size_in_bytes == noreg) {
++      __ la(end, Address(obj, con_size_in_bytes));
++    } else {
++      __ add(end, obj, var_size_in_bytes);
 +    }
 +
-+    intptr_t* sender_sp = NULL;
-+    intptr_t* sender_unextended_sp = NULL;
-+    address   sender_pc = NULL;
-+    intptr_t* saved_fp =  NULL;
-+
-+    if (is_interpreted_frame()) {
-+      // fp must be safe
-+      if (!fp_safe) {
-+        return false;
-+      }
-+
-+      sender_pc = (address)this->fp()[return_addr_offset];
-+      // for interpreted frames, the value below is the sender "raw" sp,
-+      // which can be different from the sender unextended sp (the sp seen
-+      // by the sender) because of current frame local variables
-+      sender_sp = (intptr_t*) addr_at(sender_sp_offset);
-+      sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset];
-+      saved_fp = (intptr_t*) this->fp()[link_offset];
-+    } else {
-+      // must be some sort of compiled/runtime frame
-+      // fp does not have to be safe (although it could be check for c1?)
++    // if end < obj then we wrapped around high memory
++    __ bltu(end, obj, slow_case, is_far);
 +
-+      // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc
-+      if (_cb->frame_size() <= 0) {
-+        return false;
-+      }
++    __ bgtu(end, t1, slow_case, is_far);
 +
-+      sender_sp = _unextended_sp + _cb->frame_size();
-+      // Is sender_sp safe?
-+      if (!thread->is_in_full_stack_checked((address)sender_sp)) {
-+        return false;
-+      }
++    // If heap_top hasn't been changed by some other thread, update it.
++    __ sc_d(t1, end, t0, Assembler::rl);
++    __ bnez(t1, retry);
 +
-+      sender_unextended_sp = sender_sp;
-+      sender_pc = (address) *(sender_sp - 1);
-+      saved_fp = (intptr_t*) *(sender_sp - 2);
-+    }
++    incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, tmp1);
++  }
++}
 +
++void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm,
++                                               Register var_size_in_bytes,
++                                               int con_size_in_bytes,
++                                               Register tmp1) {
++  assert_cond(masm != NULL);
++  assert(tmp1->is_valid(), "need temp reg");
 +
-+    // If the potential sender is the interpreter then we can do some more checking
-+    if (Interpreter::contains(sender_pc)) {
++  __ ld(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset())));
++  if (var_size_in_bytes->is_valid()) {
++    __ add(tmp1, tmp1, var_size_in_bytes);
++  } else {
++    __ add(tmp1, tmp1, con_size_in_bytes);
++  }
++  __ sd(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset())));
++}
+diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
+new file mode 100644
+index 0000000000..984d94f4c3
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
+@@ -0,0 +1,76 @@
++/*
++ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+      // fp is always saved in a recognizable place in any code we generate. However
-+      // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp
-+      // is really a frame pointer.
-+      if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) {
-+        return false;
-+      }
++#ifndef CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
++#define CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
 +
-+      // construct the potential sender
-+      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "memory/allocation.hpp"
++#include "oops/access.hpp"
 +
-+      return sender.is_interpreted_frame_valid(thread);
-+    }
++class BarrierSetAssembler: public CHeapObj<mtGC> {
++private:
++  void incr_allocated_bytes(MacroAssembler* masm,
++                            Register var_size_in_bytes, int con_size_in_bytes,
++                            Register t1 = noreg);
 +
-+    // We must always be able to find a recognizable pc
-+    CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc);
-+    if (sender_pc == NULL || sender_blob == NULL) {
-+      return false;
-+    }
++public:
++  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register src, Register dst, Register count, RegSet saved_regs) {}
++  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register start, Register end, Register tmp, RegSet saved_regs) {}
++  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                       Register dst, Address src, Register tmp1, Register tmp_thread);
++  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                        Address dst, Register val, Register tmp1, Register tmp2);
 +
-+    // Could be a zombie method
-+    if (sender_blob->is_zombie() || sender_blob->is_unloaded()) {
-+      return false;
-+    }
++  virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                             Register obj, Register tmp, Label& slowpath);
 +
-+    // Could just be some random pointer within the codeBlob
-+    if (!sender_blob->code_contains(sender_pc)) {
-+      return false;
-+    }
++  virtual void tlab_allocate(MacroAssembler* masm,
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register tmp1,                     // temp register
++    Register tmp2,                     // temp register
++    Label&   slow_case,                // continuation point if fast allocation fails
++    bool is_far = false
++  );
 +
-+    // We should never be able to see an adapter if the current frame is something from code cache
-+    if (sender_blob->is_adapter_blob()) {
-+      return false;
-+    }
++  void eden_allocate(MacroAssembler* masm,
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register tmp1,                     // temp register
++    Label&   slow_case,                // continuation point if fast allocation fails
++    bool is_far = false
++  );
++  virtual void barrier_stubs_init() {}
 +
-+    // Could be the call_stub
-+    if (StubRoutines::returns_to_call_stub(sender_pc)) {
-+      if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) {
-+        return false;
-+      }
++  virtual ~BarrierSetAssembler() {}
++};
 +
-+      // construct the potential sender
-+      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++#endif // CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
+new file mode 100644
+index 0000000000..81d47d61d4
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
+@@ -0,0 +1,125 @@
++/*
++ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+      // Validate the JavaCallWrapper an entry frame must have
-+      address jcw = (address)sender.entry_frame_call_wrapper();
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/cardTable.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "gc/shared/cardTableBarrierSetAssembler.hpp"
++#include "gc/shared/gc_globals.hpp"
++#include "interpreter/interp_masm.hpp"
 +
-+      bool jcw_safe = (jcw < thread->stack_base()) && (jcw > (address)sender.fp());
++#define __ masm->
 +
-+      return jcw_safe;
-+    }
 +
-+    CompiledMethod* nm = sender_blob->as_compiled_method_or_null();
-+    if (nm != NULL) {
-+      if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) ||
-+          nm->method()->is_method_handle_intrinsic()) {
-+        return false;
-+      }
-+    }
++void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Register tmp) {
++  assert_cond(masm != NULL);
++  assert_different_registers(obj, tmp);
++  BarrierSet* bs = BarrierSet::barrier_set();
++  assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind");
 +
-+    // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size
-+    // because the return address counts against the callee's frame.
-+    if (sender_blob->frame_size() <= 0) {
-+      assert(!sender_blob->is_compiled(), "should count return address at least");
-+      return false;
-+    }
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
++  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
 +
-+    // We should never be able to see anything here except an nmethod. If something in the
-+    // code cache (current frame) is called by an entity within the code cache that entity
-+    // should not be anything but the call stub (already covered), the interpreter (already covered)
-+    // or an nmethod.
-+    if (!sender_blob->is_compiled()) {
-+        return false;
-+    }
++  __ srli(obj, obj, CardTable::card_shift);
 +
-+    // Could put some more validation for the potential non-interpreted sender
-+    // frame we'd create by calling sender if I could think of any. Wait for next crash in forte...
++  assert(CardTable::dirty_card_val() == 0, "must be");
 +
-+    // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb
++  __ load_byte_map_base(tmp);
++  __ add(tmp, obj, tmp);
 +
-+    // We've validated the potential sender that would be created
-+    return true;
++  if (UseCondCardMark) {
++    Label L_already_dirty;
++    __ membar(MacroAssembler::StoreLoad);
++    __ lbu(t1,  Address(tmp));
++    __ beqz(t1, L_already_dirty);
++    __ sb(zr, Address(tmp));
++    __ bind(L_already_dirty);
++  } else {
++    if (ct->scanned_concurrently()) {
++      __ membar(MacroAssembler::StoreStore);
++    }
++    __ sb(zr, Address(tmp));
 +  }
++}
 +
-+  // Must be native-compiled frame. Since sender will try and use fp to find
-+  // linkages it must be safe
-+  if (!fp_safe) {
-+    return false;
-+  }
++void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                                    Register start, Register count, Register tmp, RegSet saved_regs) {
++  assert_cond(masm != NULL);
++  assert_different_registers(start, tmp);
++  assert_different_registers(count, tmp);
 +
-+  // Will the pc we fetch be non-zero (which we'll find at the oldest frame)
-+  if ((address)this->fp()[return_addr_offset] == NULL) { return false; }
++  BarrierSet* bs = BarrierSet::barrier_set();
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
 +
-+  return true;
-+}
++  Label L_loop, L_done;
++  const Register end = count;
 +
-+void frame::patch_pc(Thread* thread, address pc) {
-+  assert(_cb == CodeCache::find_blob(pc), "unexpected pc");
-+  address* pc_addr = &(((address*) sp())[-1]);
-+  if (TracePcPatching) {
-+    tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
-+                  p2i(pc_addr), p2i(*pc_addr), p2i(pc));
-+  }
-+  // Either the return address is the original one or we are going to
-+  // patch in the same address that's already there.
-+  assert(_pc == *pc_addr || pc == *pc_addr, "must be");
-+  *pc_addr = pc;
-+  address original_pc = CompiledMethod::get_deopt_original_pc(this);
-+  if (original_pc != NULL) {
-+    assert(original_pc == _pc, "expected original PC to be stored before patching");
-+    _deopt_state = is_deoptimized;
-+    // leave _pc as is
-+  } else {
-+    _deopt_state = not_deoptimized;
-+    _pc = pc;
++  __ beqz(count, L_done); // zero count - nothing to do
++  // end = start + count << LogBytesPerHeapOop
++  __ shadd(end, count, start, count, LogBytesPerHeapOop);
++  __ sub(end, end, BytesPerHeapOop); // last element address to make inclusive
++
++  __ srli(start, start, CardTable::card_shift);
++  __ srli(end, end, CardTable::card_shift);
++  __ sub(count, end, start); // number of bytes to copy
++
++  __ load_byte_map_base(tmp);
++  __ add(start, start, tmp);
++  if (ct->scanned_concurrently()) {
++    __ membar(MacroAssembler::StoreStore);
 +  }
-+}
 +
-+bool frame::is_interpreted_frame() const  {
-+  return Interpreter::contains(pc());
++  __ bind(L_loop);
++  __ add(tmp, start, count);
++  __ sb(zr, Address(tmp));
++  __ sub(count, count, 1);
++  __ bgez(count, L_loop);
++  __ bind(L_done);
 +}
 +
-+int frame::frame_size(RegisterMap* map) const {
-+  frame sender = this->sender(map);
-+  return sender.sp() - sp();
-+}
++void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                                Address dst, Register val, Register tmp1, Register tmp2) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool is_array = (decorators & IS_ARRAY) != 0;
++  bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
++  bool precise = is_array || on_anonymous;
 +
-+intptr_t* frame::entry_frame_argument_at(int offset) const {
-+  // convert offset to index to deal with tsi
-+  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
-+  // Entry frame's arguments are always in relation to unextended_sp()
-+  return &unextended_sp()[index];
++  bool needs_post_barrier = val != noreg && in_heap;
++  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg);
++  if (needs_post_barrier) {
++    // flatten object address if needed
++    if (!precise || dst.offset() == 0) {
++      store_check(masm, dst.base(), x13);
++    } else {
++      assert_cond(masm != NULL);
++      __ la(x13, dst);
++      store_check(masm, x13, t0);
++    }
++  }
 +}
+diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
+new file mode 100644
+index 0000000000..686fe8fa47
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
+@@ -0,0 +1,42 @@
++/*
++ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+// sender_sp
-+intptr_t* frame::interpreter_frame_sender_sp() const {
-+  assert(is_interpreted_frame(), "interpreted frame expected");
-+  return (intptr_t*) at(interpreter_frame_sender_sp_offset);
-+}
++#ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
++#define CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
 +
-+void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) {
-+  assert(is_interpreted_frame(), "interpreted frame expected");
-+  ptr_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp);
-+}
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
 +
++class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler {
++protected:
++  void store_check(MacroAssembler* masm, Register obj, Register tmp);
 +
-+// monitor elements
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                Register start, Register count, Register tmp, RegSet saved_regs);
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2);
++};
 +
-+BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
-+  return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset);
-+}
++#endif // #ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp
+new file mode 100644
+index 0000000000..7aa2015f9e
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp
+@@ -0,0 +1,55 @@
++/*
++ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+BasicObjectLock* frame::interpreter_frame_monitor_end() const {
-+  BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset);
-+  // make sure the pointer points inside the frame
-+  assert(sp() <= (intptr_t*) result, "monitor end should be above the stack pointer");
-+  assert((intptr_t*) result < fp(),  "monitor end should be strictly below the frame pointer");
-+  return result;
-+}
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
 +
-+void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) {
-+  *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value;
-+}
++#define __ masm->
 +
-+// Used by template based interpreter deoptimization
-+void frame::interpreter_frame_set_last_sp(intptr_t* last_sp) {
-+  *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = last_sp;
-+}
++void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                                   Register src, Register dst, Register count, RegSet saved_regs) {
 +
-+frame frame::sender_for_entry_frame(RegisterMap* map) const {
-+  assert(map != NULL, "map must be set");
-+  // Java frame called from C; skip all C frames and return top C
-+  // frame of that chunk as the sender
-+  JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
-+  assert(!entry_frame_is_first(), "next Java fp must be non zero");
-+  assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack");
-+  // Since we are walking the stack now this nested anchor is obviously walkable
-+  // even if it wasn't when it was stacked.
-+  if (!jfa->walkable()) {
-+    // Capture _last_Java_pc (if needed) and mark anchor walkable.
-+    jfa->capture_last_Java_pc();
++  if (is_oop) {
++    gen_write_ref_array_pre_barrier(masm, decorators, dst, count, saved_regs);
 +  }
-+  map->clear();
-+  assert(map->include_argument_oops(), "should be set by clear");
-+  vmassert(jfa->last_Java_pc() != NULL, "not walkable");
-+  frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
-+  return fr;
 +}
 +
-+OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const {
-+  ShouldNotCallThis();
-+  return nullptr;
++void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                                   Register start, Register count, Register tmp,
++                                                   RegSet saved_regs) {
++  if (is_oop) {
++    gen_write_ref_array_post_barrier(masm, decorators, start, count, tmp, saved_regs);
++  }
 +}
 +
-+bool frame::optimized_entry_frame_is_first() const {
-+  ShouldNotCallThis();
-+  return false;
++void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                         Address dst, Register val, Register tmp1, Register tmp2) {
++  if (is_reference_type(type)) {
++    oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++  } else {
++    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++  }
 +}
+diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp
+new file mode 100644
+index 0000000000..00419c3163
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp
+@@ -0,0 +1,55 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+frame frame::sender_for_optimized_entry_frame(RegisterMap* map) const {
-+  ShouldNotCallThis();
-+  return {};
-+}
++#ifndef CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
++#define CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
 +
-+//------------------------------------------------------------------------------
-+// frame::verify_deopt_original_pc
-+//
-+// Verifies the calculated original PC of a deoptimization PC for the
-+// given unextended SP.
-+#ifdef ASSERT
-+void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) {
-+  frame fr;
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
 +
-+  // This is ugly but it's better than to change {get,set}_original_pc
-+  // to take an SP value as argument.  And it's only a debugging
-+  // method anyway.
-+  fr._unextended_sp = unextended_sp;
++// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other
++// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected
++// accesses, which are overridden in the concrete BarrierSetAssembler.
 +
-+  assert_cond(nm != NULL);
-+  address original_pc = nm->get_original_pc(&fr);
-+  assert(nm->insts_contains_inclusive(original_pc),
-+         "original PC must be in the main code section of the the compiled method (or must be immediately following it)");
-+}
-+#endif
++class ModRefBarrierSetAssembler: public BarrierSetAssembler {
++protected:
++  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                               Register addr, Register count, RegSet saved_regs) {}
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                Register start, Register count, Register tmp, RegSet saved_regs) {}
 +
-+//------------------------------------------------------------------------------
-+// frame::adjust_unextended_sp
-+void frame::adjust_unextended_sp() {
-+  // On riscv, sites calling method handle intrinsics and lambda forms are treated
-+  // as any other call site. Therefore, no special action is needed when we are
-+  // returning to any of these call sites.
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2) = 0;
 +
-+  if (_cb != NULL) {
-+    CompiledMethod* sender_cm = _cb->as_compiled_method_or_null();
-+    if (sender_cm != NULL) {
-+      // If the sender PC is a deoptimization point, get the original PC.
-+      if (sender_cm->is_deopt_entry(_pc) ||
-+          sender_cm->is_deopt_mh_entry(_pc)) {
-+        DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp));
-+      }
-+    }
-+  }
-+}
++public:
++  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register src, Register dst, Register count, RegSet saved_regs);
++  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register start, Register count, Register tmp, RegSet saved_regs);
++  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                        Address dst, Register val, Register tmp1, Register tmp2);
++};
 +
-+//------------------------------------------------------------------------------
-+// frame::update_map_with_saved_link
-+void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) {
-+  // The interpreter and compiler(s) always save fp in a known
-+  // location on entry. We must record where that location is
-+  // so that if fp was live on callout from c2 we can find
-+  // the saved copy no matter what it called.
++#endif // CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
+new file mode 100644
+index 0000000000..d19f5b859c
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
+@@ -0,0 +1,117 @@
++/*
++ * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  // Since the interpreter always saves fp if we record where it is then
-+  // we don't have to always save fp on entry and exit to c2 compiled
-+  // code, on entry will be enough.
-+  assert(map != NULL, "map must be set");
-+  map->set_location(::fp->as_VMReg(), (address) link_addr);
-+  // this is weird "H" ought to be at a higher address however the
-+  // oopMaps seems to have the "H" regs at the same address and the
-+  // vanilla register.
-+  map->set_location(::fp->as_VMReg()->next(), (address) link_addr);
-+}
++#include "precompiled.hpp"
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "gc/shared/gc_globals.hpp"
++#include "gc/shenandoah/shenandoahBarrierSet.hpp"
++#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
++#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
 +
++#define __ masm->masm()->
 +
-+//------------------------------------------------------------------------------
-+// frame::sender_for_interpreter_frame
-+frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
-+  // SP is the raw SP from the sender after adapter or interpreter
-+  // extension.
-+  intptr_t* sender_sp = this->sender_sp();
++void LIR_OpShenandoahCompareAndSwap::emit_code(LIR_Assembler* masm) {
++  Register addr = _addr->as_register_lo();
++  Register newval = _new_value->as_register();
++  Register cmpval = _cmp_value->as_register();
++  Register tmp1 = _tmp1->as_register();
++  Register tmp2 = _tmp2->as_register();
++  Register result = result_opr()->as_register();
 +
-+  // This is the sp before any possible extension (adapter/locals).
-+  intptr_t* unextended_sp = interpreter_frame_sender_sp();
++  ShenandoahBarrierSet::assembler()->iu_barrier(masm->masm(), newval, t1);
 +
-+#ifdef COMPILER2
-+  assert(map != NULL, "map must be set");
-+  if (map->update_map()) {
-+    update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset));
++  if (UseCompressedOops) {
++    __ encode_heap_oop(tmp1, cmpval);
++    cmpval = tmp1;
++    __ encode_heap_oop(tmp2, newval);
++    newval = tmp2;
 +  }
-+#endif // COMPILER2
 +
-+  return frame(sender_sp, unextended_sp, link(), sender_pc());
++  ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /* acquire */ Assembler::aq,
++                                                 /* release */ Assembler::rl, /* is_cae */ false, result);
 +}
 +
++#undef __
 +
-+//------------------------------------------------------------------------------
-+// frame::sender_for_compiled_frame
-+frame frame::sender_for_compiled_frame(RegisterMap* map) const {
-+  // we cannot rely upon the last fp having been saved to the thread
-+  // in C2 code but it will have been pushed onto the stack. so we
-+  // have to find it relative to the unextended sp
-+
-+  assert(_cb->frame_size() >= 0, "must have non-zero frame size");
-+  intptr_t* l_sender_sp = unextended_sp() + _cb->frame_size();
-+  intptr_t* unextended_sp = l_sender_sp;
++#ifdef ASSERT
++#define __ gen->lir(__FILE__, __LINE__)->
++#else
++#define __ gen->lir()->
++#endif
 +
-+  // the return_address is always the word on the stack
-+  address sender_pc = (address) *(l_sender_sp + frame::return_addr_offset);
++LIR_Opr ShenandoahBarrierSetC1::atomic_cmpxchg_at_resolved(LIRAccess& access, LIRItem& cmp_value, LIRItem& new_value) {
++  BasicType bt = access.type();
++  if (access.is_oop()) {
++    LIRGenerator *gen = access.gen();
++    if (ShenandoahSATBBarrier) {
++      pre_barrier(gen, access.access_emit_info(), access.decorators(), access.resolved_addr(),
++                  LIR_OprFact::illegalOpr /* pre_val */);
++    }
++    if (ShenandoahCASBarrier) {
++      cmp_value.load_item();
++      new_value.load_item();
 +
-+  intptr_t** saved_fp_addr = (intptr_t**) (l_sender_sp + frame::link_offset);
++      LIR_Opr tmp1 = gen->new_register(T_OBJECT);
++      LIR_Opr tmp2 = gen->new_register(T_OBJECT);
++      LIR_Opr addr = access.resolved_addr()->as_address_ptr()->base();
++      LIR_Opr result = gen->new_register(T_INT);
 +
-+  assert(map != NULL, "map must be set");
-+  if (map->update_map()) {
-+    // Tell GC to use argument oopmaps for some runtime stubs that need it.
-+    // For C1, the runtime stub might not have oop maps, so set this flag
-+    // outside of update_register_map.
-+    map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
-+    if (_cb->oop_maps() != NULL) {
-+      OopMapSet::update_register_map(this, map);
++      __ append(new LIR_OpShenandoahCompareAndSwap(addr, cmp_value.result(), new_value.result(), tmp1, tmp2, result));
++      return result;
 +    }
-+
-+    // Since the prolog does the save and restore of FP there is no
-+    // oopmap for it so we must fill in its location as if there was
-+    // an oopmap entry since if our caller was compiled code there
-+    // could be live jvm state in it.
-+    update_map_with_saved_link(map, saved_fp_addr);
 +  }
-+
-+  return frame(l_sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
++  return BarrierSetC1::atomic_cmpxchg_at_resolved(access, cmp_value, new_value);
 +}
 +
-+//------------------------------------------------------------------------------
-+// frame::sender_raw
-+frame frame::sender_raw(RegisterMap* map) const {
-+  // Default is we done have to follow them. The sender_for_xxx will
-+  // update it accordingly
-+  assert(map != NULL, "map must be set");
-+  map->set_include_argument_oops(false);
++LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRItem& value) {
++  LIRGenerator* gen = access.gen();
++  BasicType type = access.type();
 +
-+  if (is_entry_frame()) {
-+    return sender_for_entry_frame(map);
-+  }
-+  if (is_interpreted_frame()) {
-+    return sender_for_interpreter_frame(map);
-+  }
-+  assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
++  LIR_Opr result = gen->new_register(type);
++  value.load_item();
++  LIR_Opr value_opr = value.result();
 +
-+  // This test looks odd: why is it not is_compiled_frame() ?  That's
-+  // because stubs also have OOP maps.
-+  if (_cb != NULL) {
-+    return sender_for_compiled_frame(map);
++  if (access.is_oop()) {
++    value_opr = iu_barrier(access.gen(), value_opr, access.access_emit_info(), access.decorators());
 +  }
 +
-+  // Must be native-compiled frame, i.e. the marshaling code for native
-+  // methods that exists in the core system.
-+  return frame(sender_sp(), link(), sender_pc());
-+}
-+
-+frame frame::sender(RegisterMap* map) const {
-+  frame result = sender_raw(map);
++  assert(type == T_INT || is_reference_type(type) LP64_ONLY( || type == T_LONG ), "unexpected type");
++  LIR_Opr tmp = gen->new_register(T_INT);
++  __ xchg(access.resolved_addr(), value_opr, result, tmp);
 +
-+  if (map->process_frames()) {
-+    StackWatermarkSet::on_iteration(map->thread(), result);
++  if (access.is_oop()) {
++    result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0));
++    LIR_Opr tmp_opr = gen->new_register(type);
++    __ move(result, tmp_opr);
++    result = tmp_opr;
++    if (ShenandoahSATBBarrier) {
++      pre_barrier(access.gen(), access.access_emit_info(), access.decorators(), LIR_OprFact::illegalOpr,
++                  result /* pre_val */);
++    }
 +  }
 +
 +  return result;
 +}
+diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
+new file mode 100644
+index 0000000000..b8534c52e7
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
+@@ -0,0 +1,715 @@
++/*
++ * Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
-+  assert(is_interpreted_frame(), "Not an interpreted frame");
-+  // These are reasonable sanity checks
-+  if (fp() == NULL || (intptr_t(fp()) & (wordSize-1)) != 0) {
-+    return false;
-+  }
-+  if (sp() == NULL || (intptr_t(sp()) & (wordSize-1)) != 0) {
-+    return false;
-+  }
-+  if (fp() + interpreter_frame_initial_sp_offset < sp()) {
-+    return false;
-+  }
-+  // These are hacks to keep us out of trouble.
-+  // The problem with these is that they mask other problems
-+  if (fp() <= sp()) {        // this attempts to deal with unsigned comparison above
-+    return false;
-+  }
-+
-+  // do some validation of frame elements
-+
-+  // first the method
-+  Method* m = *interpreter_frame_method_addr();
-+  // validate the method we'd find in this potential sender
-+  if (!Method::is_valid_method(m)) {
-+    return false;
-+  }
++#include "precompiled.hpp"
++#include "gc/shenandoah/shenandoahBarrierSet.hpp"
++#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
++#include "gc/shenandoah/shenandoahForwarding.hpp"
++#include "gc/shenandoah/shenandoahHeap.hpp"
++#include "gc/shenandoah/shenandoahHeapRegion.hpp"
++#include "gc/shenandoah/shenandoahRuntime.hpp"
++#include "gc/shenandoah/shenandoahThreadLocalData.hpp"
++#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/thread.hpp"
++#ifdef COMPILER1
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
++#endif
 +
-+  // stack frames shouldn't be much larger than max_stack elements
-+  // this test requires the use of unextended_sp which is the sp as seen by
-+  // the current frame, and not sp which is the "raw" pc which could point
-+  // further because of local variables of the callee method inserted after
-+  // method arguments
-+  if (fp() - unextended_sp() > 1024 + m->max_stack()*Interpreter::stackElementSize) {
-+    return false;
-+  }
++#define __ masm->
 +
-+  // validate bci/bcx
-+  address bcp = interpreter_frame_bcp();
-+  if (m->validate_bci_from_bcp(bcp) < 0) {
-+    return false;
-+  }
++address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL;
 +
-+  // validate constantPoolCache*
-+  ConstantPoolCache* cp = *interpreter_frame_cache_addr();
-+  if (MetaspaceObj::is_valid(cp) == false) {
-+    return false;
-+  }
++void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                                       Register src, Register dst, Register count, RegSet saved_regs) {
++  if (is_oop) {
++    bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
++    if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahIUBarrier || ShenandoahLoadRefBarrier) {
 +
-+  // validate locals
-+  address locals = (address) *interpreter_frame_locals_addr();
-+  if (locals > thread->stack_base() || locals < (address) fp()) {
-+    return false;
-+  }
++      Label done;
 +
-+  // We'd have to be pretty unlucky to be mislead at this point
-+  return true;
-+}
++      // Avoid calling runtime if count == 0
++      __ beqz(count, done);
 +
-+BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
-+  assert(is_interpreted_frame(), "interpreted frame expected");
-+  Method* method = interpreter_frame_method();
-+  BasicType type = method->result_type();
++      // Is GC active?
++      Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
++      assert_different_registers(src, dst, count, t0);
 +
-+  intptr_t* tos_addr = NULL;
-+  if (method->is_native()) {
-+    tos_addr = (intptr_t*)sp();
-+    if (type == T_FLOAT || type == T_DOUBLE) {
-+      // This is because we do a push(ltos) after push(dtos) in generate_native_entry.
-+      tos_addr += 2 * Interpreter::stackElementWords;
-+    }
-+  } else {
-+    tos_addr = (intptr_t*)interpreter_frame_tos_address();
-+  }
++      __ lbu(t0, gc_state);
++      if (ShenandoahSATBBarrier && dest_uninitialized) {
++        __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED);
++        __ beqz(t0, done);
++      } else {
++        __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING);
++        __ beqz(t0, done);
++      }
 +
-+  switch (type) {
-+    case T_OBJECT  :
-+    case T_ARRAY   : {
-+      oop obj;
-+      if (method->is_native()) {
-+        obj = cast_to_oop(at(interpreter_frame_oop_temp_offset));
++      __ push_reg(saved_regs, sp);
++      if (UseCompressedOops) {
++        __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry),
++                        src, dst, count);
 +      } else {
-+        oop* obj_p = (oop*)tos_addr;
-+        obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
++        __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), src, dst, count);
 +      }
-+      assert(Universe::is_in_heap_or_null(obj), "sanity check");
-+      *oop_result = obj;
-+      break;
-+    }
-+    case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break;
-+    case T_BYTE    : value_result->b = *(jbyte*)tos_addr; break;
-+    case T_CHAR    : value_result->c = *(jchar*)tos_addr; break;
-+    case T_SHORT   : value_result->s = *(jshort*)tos_addr; break;
-+    case T_INT     : value_result->i = *(jint*)tos_addr; break;
-+    case T_LONG    : value_result->j = *(jlong*)tos_addr; break;
-+    case T_FLOAT   : {
-+        value_result->f = *(jfloat*)tos_addr;
-+      break;
++      __ pop_reg(saved_regs, sp);
++      __ bind(done);
 +    }
-+    case T_DOUBLE  : value_result->d = *(jdouble*)tos_addr; break;
-+    case T_VOID    : /* Nothing to do */ break;
-+    default        : ShouldNotReachHere();
 +  }
-+
-+  return type;
-+}
-+
-+
-+intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
-+  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
-+  return &interpreter_frame_tos_address()[index];
 +}
 +
-+#ifndef PRODUCT
-+
-+#define DESCRIBE_FP_OFFSET(name) \
-+  values.describe(frame_no, fp() + frame::name##_offset, #name)
-+
-+void frame::describe_pd(FrameValues& values, int frame_no) {
-+  if (is_interpreted_frame()) {
-+    DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
-+    DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
-+    DESCRIBE_FP_OFFSET(interpreter_frame_method);
-+    DESCRIBE_FP_OFFSET(interpreter_frame_mdp);
-+    DESCRIBE_FP_OFFSET(interpreter_frame_mirror);
-+    DESCRIBE_FP_OFFSET(interpreter_frame_cache);
-+    DESCRIBE_FP_OFFSET(interpreter_frame_locals);
-+    DESCRIBE_FP_OFFSET(interpreter_frame_bcp);
-+    DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
++void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm,
++                                                                 Register obj,
++                                                                 Register pre_val,
++                                                                 Register thread,
++                                                                 Register tmp,
++                                                                 bool tosca_live,
++                                                                 bool expand_call) {
++  if (ShenandoahSATBBarrier) {
++    satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call);
 +  }
 +}
-+#endif
-+
-+intptr_t *frame::initial_deoptimization_info() {
-+  // Not used on riscv, but we must return something.
-+  return NULL;
-+}
 +
-+intptr_t* frame::real_fp() const {
-+  if (_cb != NULL) {
-+    // use the frame size if valid
-+    int size = _cb->frame_size();
-+    if (size > 0) {
-+      return unextended_sp() + size;
-+    }
-+  }
-+  // else rely on fp()
-+  assert(!is_compiled_frame(), "unknown compiled frame size");
-+  return fp();
-+}
++void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm,
++                                                           Register obj,
++                                                           Register pre_val,
++                                                           Register thread,
++                                                           Register tmp,
++                                                           bool tosca_live,
++                                                           bool expand_call) {
++  // If expand_call is true then we expand the call_VM_leaf macro
++  // directly to skip generating the check by
++  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
++  assert(thread == xthread, "must be");
 +
-+#undef DESCRIBE_FP_OFFSET
++  Label done;
++  Label runtime;
 +
-+#ifndef PRODUCT
-+// This is a generic constructor which is only used by pns() in debug.cpp.
-+frame::frame(void* ptr_sp, void* ptr_fp, void* pc) {
-+  init((intptr_t*)ptr_sp, (intptr_t*)ptr_fp, (address)pc);
-+}
++  assert_different_registers(obj, pre_val, tmp, t0);
++  assert(pre_val != noreg &&  tmp != noreg, "expecting a register");
 +
-+#endif
++  Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset()));
++  Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
++  Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
 +
-+void JavaFrameAnchor::make_walkable(JavaThread* thread) {
-+  // last frame set?
-+  if (last_Java_sp() == NULL) { return; }
-+  // already walkable?
-+  if (walkable()) { return; }
-+  vmassert(Thread::current() == (Thread*)thread, "not current thread");
-+  vmassert(last_Java_sp() != NULL, "not called from Java code?");
-+  vmassert(last_Java_pc() == NULL, "already walkable");
-+  capture_last_Java_pc();
-+  vmassert(walkable(), "something went wrong");
-+}
++  // Is marking active?
++  if (in_bytes(ShenandoahSATBMarkQueue::byte_width_of_active()) == 4) {
++    __ lwu(tmp, in_progress);
++  } else {
++    assert(in_bytes(ShenandoahSATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++    __ lbu(tmp, in_progress);
++  }
++  __ beqz(tmp, done);
 +
-+void JavaFrameAnchor::capture_last_Java_pc() {
-+  vmassert(_last_Java_sp != NULL, "no last frame set");
-+  vmassert(_last_Java_pc == NULL, "already walkable");
-+  _last_Java_pc = (address)_last_Java_sp[-1];
-+}
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp
-new file mode 100644
-index 00000000000..c06aaa9e391
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/frame_riscv.hpp
-@@ -0,0 +1,202 @@
-+/*
-+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  // Do we need to load the previous value?
++  if (obj != noreg) {
++    __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
++  }
 +
-+#ifndef CPU_RISCV_FRAME_RISCV_HPP
-+#define CPU_RISCV_FRAME_RISCV_HPP
++  // Is the previous value null?
++  __ beqz(pre_val, done);
 +
-+#include "runtime/synchronizer.hpp"
++  // Can we store original value in the thread's buffer?
++  // Is index == 0?
++  // (The index field is typed as size_t.)
++  __ ld(tmp, index);                        // tmp := *index_adr
++  __ beqz(tmp, runtime);                    // tmp == 0? If yes, goto runtime
 +
-+// A frame represents a physical stack frame (an activation).  Frames can be
-+// C or Java frames, and the Java frames can be interpreted or compiled.
-+// In contrast, vframes represent source-level activations, so that one physical frame
-+// can correspond to multiple source level frames because of inlining.
-+// A frame is comprised of {pc, fp, sp}
-+// ------------------------------ Asm interpreter ----------------------------------------
-+// Layout of asm interpreter frame:
-+//    [expression stack      ] * <- sp
++  __ sub(tmp, tmp, wordSize);               // tmp := tmp - wordSize
++  __ sd(tmp, index);                        // *index_adr := tmp
++  __ ld(t0, buffer);
++  __ add(tmp, tmp, t0);                     // tmp := tmp + *buffer_adr
 +
-+//    [monitors[0]           ]   \
-+//     ...                        | monitor block size = k
-+//    [monitors[k-1]         ]   /
-+//    [frame initial esp     ] ( == &monitors[0], initially here)       initial_sp_offset
-+//    [byte code index/pointr]                   = bcx()                bcx_offset
++  // Record the previous value
++  __ sd(pre_val, Address(tmp, 0));
++  __ j(done);
 +
-+//    [pointer to locals     ]                   = locals()             locals_offset
-+//    [constant pool cache   ]                   = cache()              cache_offset
++  __ bind(runtime);
++  // save the live input values
++  RegSet saved = RegSet::of(pre_val);
++  if (tosca_live) saved += RegSet::of(x10);
++  if (obj != noreg) saved += RegSet::of(obj);
 +
-+//    [klass of method       ]                   = mirror()             mirror_offset
-+//    [padding               ]
++  __ push_reg(saved, sp);
 +
-+//    [methodData            ]                   = mdp()                mdx_offset
-+//    [Method                ]                   = method()             method_offset
++  // Calling the runtime using the regular call_VM_leaf mechanism generates
++  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
++  // that checks that the *(rfp+frame::interpreter_frame_last_sp) == NULL.
++  //
++  // If we care generating the pre-barrier without a frame (e.g. in the
++  // intrinsified Reference.get() routine) then ebp might be pointing to
++  // the caller frame and so this check will most likely fail at runtime.
++  //
++  // Expanding the call directly bypasses the generation of the check.
++  // So when we do not have have a full interpreter frame on the stack
++  // expand_call should be passed true.
++  if (expand_call) {
++    assert(pre_val != c_rarg1, "smashed arg");
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
++  } else {
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
++  }
 +
-+//    [last esp              ]                   = last_sp()            last_sp_offset
-+//    [old stack pointer     ]                     (sender_sp)          sender_sp_offset
++  __ pop_reg(saved, sp);
 +
-+//    [old frame pointer     ]
-+//    [return pc             ]
++  __ bind(done);
++}
 +
-+//    [last sp               ]   <- fp           = link()
-+//    [oop temp              ]                     (only for native calls)
++void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) {
++  assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
 +
-+//    [padding               ]                     (to preserve machine SP alignment)
-+//    [locals and parameters ]
-+//                               <- sender sp
-+// ------------------------------ Asm interpreter ----------------------------------------
++  Label is_null;
++  __ beqz(dst, is_null);
++  resolve_forward_pointer_not_null(masm, dst, tmp);
++  __ bind(is_null);
++}
 +
-+// ------------------------------ C Frame ------------------------------------------------
-+// Stack: gcc with -fno-omit-frame-pointer
-+//                    .
-+//                    .
-+//       +->          .
-+//       |   +-----------------+   |
-+//       |   | return address  |   |
-+//       |   |   previous fp ------+
-+//       |   | saved registers |
-+//       |   | local variables |
-+//       |   |       ...       | <-+
-+//       |   +-----------------+   |
-+//       |   | return address  |   |
-+//       +------ previous fp   |   |
-+//           | saved registers |   |
-+//           | local variables |   |
-+//       +-> |       ...       |   |
-+//       |   +-----------------+   |
-+//       |   | return address  |   |
-+//       |   |   previous fp ------+
-+//       |   | saved registers |
-+//       |   | local variables |
-+//       |   |       ...       | <-+
-+//       |   +-----------------+   |
-+//       |   | return address  |   |
-+//       +------ previous fp   |   |
-+//           | saved registers |   |
-+//           | local variables |   |
-+//   $fp --> |       ...       |   |
-+//           +-----------------+   |
-+//           | return address  |   |
-+//           |   previous fp ------+
-+//           | saved registers |
-+//   $sp --> | local variables |
-+//           +-----------------+
-+// ------------------------------ C Frame ------------------------------------------------
++// IMPORTANT: This must preserve all registers, even t0 and t1, except those explicitely
++// passed in.
++void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) {
++  assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
++  // The below loads the mark word, checks if the lowest two bits are
++  // set, and if so, clear the lowest two bits and copy the result
++  // to dst. Otherwise it leaves dst alone.
++  // Implementing this is surprisingly awkward. I do it here by:
++  // - Inverting the mark word
++  // - Test lowest two bits == 0
++  // - If so, set the lowest two bits
++  // - Invert the result back, and copy to dst
++  RegSet saved_regs = RegSet::of(t2);
++  bool borrow_reg = (tmp == noreg);
++  if (borrow_reg) {
++    // No free registers available. Make one useful.
++    tmp = t0;
++    if (tmp == dst) {
++      tmp = t1;
++    }
++    saved_regs += RegSet::of(tmp);
++  }
 +
-+ public:
-+  enum {
-+    pc_return_offset                                 =  0,
-+    // All frames
-+    link_offset                                      = -2,
-+    return_addr_offset                               = -1,
-+    sender_sp_offset                                 =  0,
-+    // Interpreter frames
-+    interpreter_frame_oop_temp_offset                =  1, // for native calls only
++  assert_different_registers(tmp, dst, t2);
++  __ push_reg(saved_regs, sp);
 +
-+    interpreter_frame_sender_sp_offset               = -3,
-+    // outgoing sp before a call to an invoked method
-+    interpreter_frame_last_sp_offset                 = interpreter_frame_sender_sp_offset - 1,
-+    interpreter_frame_method_offset                  = interpreter_frame_last_sp_offset - 1,
-+    interpreter_frame_mdp_offset                     = interpreter_frame_method_offset - 1,
-+    interpreter_frame_padding_offset                 = interpreter_frame_mdp_offset - 1,
-+    interpreter_frame_mirror_offset                  = interpreter_frame_padding_offset - 1,
-+    interpreter_frame_cache_offset                   = interpreter_frame_mirror_offset - 1,
-+    interpreter_frame_locals_offset                  = interpreter_frame_cache_offset - 1,
-+    interpreter_frame_bcp_offset                     = interpreter_frame_locals_offset - 1,
-+    interpreter_frame_initial_sp_offset              = interpreter_frame_bcp_offset - 1,
++  Label done;
++  __ ld(tmp, Address(dst, oopDesc::mark_offset_in_bytes()));
++  __ xori(tmp, tmp, -1); // eon with 0 is equivalent to XOR with -1
++  __ andi(t2, tmp, markOopDesc::lock_mask_in_place);
++  __ bnez(t2, done);
++  __ ori(tmp, tmp, markOopDesc::marked_value);
++  __ xori(dst, tmp, -1); // eon with 0 is equivalent to XOR with -1
++  __ bind(done);
 +
-+    interpreter_frame_monitor_block_top_offset       = interpreter_frame_initial_sp_offset,
-+    interpreter_frame_monitor_block_bottom_offset    = interpreter_frame_initial_sp_offset,
++  __ pop_reg(saved_regs, sp);
++}
 +
-+    // Entry frames
-+    // n.b. these values are determined by the layout defined in
-+    // stubGenerator for the Java call stub
-+    entry_frame_after_call_words                     =  22,
-+    entry_frame_call_wrapper_offset                  = -10,
++void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm,
++                                                                    Register dst,
++                                                                    Address load_addr) {
++  assert(ShenandoahLoadRefBarrier, "Should be enabled");
++  assert(dst != t1 && load_addr.base() != t1, "need t1");
++  assert_different_registers(load_addr.base(), t0, t1);
 +
-+    // we don't need a save area
-+    arg_reg_save_area_bytes                          =  0
-+  };
++  Label done;
++  __ enter();
++  Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
++  __ lbu(t1, gc_state);
 +
-+  intptr_t ptr_at(int offset) const {
-+    return *ptr_at_addr(offset);
-+  }
++  // Check for heap stability
++  __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED);
++  __ beqz(t1, done);
 +
-+  void ptr_at_put(int offset, intptr_t value) {
-+    *ptr_at_addr(offset) = value;
++  // use x11 for load address
++  Register result_dst = dst;
++  if (dst == x11) {
++    __ mv(t1, dst);
++    dst = t1;
 +  }
 +
-+ private:
-+  // an additional field beyond _sp and _pc:
-+  intptr_t*   _fp; // frame pointer
-+  // The interpreter and adapters will extend the frame of the caller.
-+  // Since oopMaps are based on the sp of the caller before extension
-+  // we need to know that value. However in order to compute the address
-+  // of the return address we need the real "raw" sp. Since sparc already
-+  // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's
-+  // original sp we use that convention.
-+
-+  intptr_t*     _unextended_sp;
-+  void adjust_unextended_sp();
-+
-+  intptr_t* ptr_at_addr(int offset) const {
-+    return (intptr_t*) addr_at(offset);
-+  }
++  // Save x10 and x11, unless it is an output register
++  RegSet saved_regs = RegSet::of(x10, x11) - result_dst;
++  __ push_reg(saved_regs, sp);
++  __ la(x11, load_addr);
++  __ mv(x10, dst);
 +
-+#ifdef ASSERT
-+  // Used in frame::sender_for_{interpreter,compiled}_frame
-+  static void verify_deopt_original_pc(   CompiledMethod* nm, intptr_t* unextended_sp);
-+#endif
++  __ far_call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb())));
 +
-+ public:
-+  // Constructors
++  __ mv(result_dst, x10);
++  __ pop_reg(saved_regs, sp);
 +
-+  frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc);
++  __ bind(done);
++  __ leave();
++}
 +
-+  frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc);
++void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register dst, Register tmp) {
++  if (ShenandoahIUBarrier) {
++    __ push_call_clobbered_registers();
 +
-+  frame(intptr_t* ptr_sp, intptr_t* ptr_fp);
++    satb_write_barrier_pre(masm, noreg, dst, xthread, tmp, true, false);
 +
-+  void init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc);
++    __ pop_call_clobbered_registers();
++  }
++}
 +
-+  // accessors for the instance variables
-+  // Note: not necessarily the real 'frame pointer' (see real_fp)
-+  intptr_t*   fp() const { return _fp; }
++void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr) {
++  if (ShenandoahLoadRefBarrier) {
++    Label is_null;
++    __ beqz(dst, is_null);
++    load_reference_barrier_not_null(masm, dst, load_addr);
++    __ bind(is_null);
++  }
++}
 +
-+  inline address* sender_pc_addr() const;
++//
++// Arguments:
++//
++// Inputs:
++//   src:        oop location to load from, might be clobbered
++//
++// Output:
++//   dst:        oop loaded from src location
++//
++// Kill:
++//   x30 (tmp reg)
++//
++// Alias:
++//   dst: x30 (might use x30 as temporary output register to avoid clobbering src)
++//
++void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm,
++                                            DecoratorSet decorators,
++                                            BasicType type,
++                                            Register dst,
++                                            Address src,
++                                            Register tmp1,
++                                            Register tmp_thread) {
++  // 1: non-reference load, no additional barrier is needed
++  if (!is_reference_type(type)) {
++    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
++    return;
++  }
 +
-+  // expression stack tos if we are nested in a java call
-+  intptr_t* interpreter_frame_last_sp() const;
++  // 2: load a reference from src location and apply LRB if needed
++  if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {
++    Register result_dst = dst;
 +
-+  // helper to update a map with callee-saved RBP
-+  static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr);
++    // Preserve src location for LRB
++    RegSet saved_regs;
++    if (dst == src.base()) {
++      dst = (src.base() == x28) ? x29 : x28;
++      saved_regs = RegSet::of(dst);
++      __ push_reg(saved_regs, sp);
++    }
++    assert_different_registers(dst, src.base());
 +
-+  // deoptimization support
-+  void interpreter_frame_set_last_sp(intptr_t* last_sp);
++    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
 +
-+  static jint interpreter_frame_expression_stack_direction() { return -1; }
++    load_reference_barrier(masm, dst, src);
 +
-+  // returns the sending frame, without applying any barriers
-+  frame sender_raw(RegisterMap* map) const;
++    if (dst != result_dst) {
++      __ mv(result_dst, dst);
++      dst = result_dst;
++    }
 +
-+#endif // CPU_RISCV_FRAME_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
-new file mode 100644
-index 00000000000..5ac1bf57f57
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
-@@ -0,0 +1,248 @@
-+/*
-+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++    if (saved_regs.bits() != 0) {
++      __ pop_reg(saved_regs, sp);
++    }
++  } else {
++    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
++  }
 +
-+#ifndef CPU_RISCV_FRAME_RISCV_INLINE_HPP
-+#define CPU_RISCV_FRAME_RISCV_INLINE_HPP
++  // 3: apply keep-alive barrier if needed
++  if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
++    __ enter();
++    __ push_call_clobbered_registers();
++    satb_write_barrier_pre(masm /* masm */,
++                           noreg /* obj */,
++                           dst /* pre_val */,
++                           xthread /* thread */,
++                           tmp1 /* tmp */,
++                           true /* tosca_live */,
++                           true /* expand_call */);
++    __ pop_call_clobbered_registers();
++    __ leave();
++  }
++}
 +
-+#include "code/codeCache.hpp"
-+#include "code/vmreg.inline.hpp"
++void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                             Address dst, Register val, Register tmp1, Register tmp2) {
++  bool on_oop = is_reference_type(type);
++  if (!on_oop) {
++    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++    return;
++  }
 +
-+// Inline functions for RISCV frames:
++  // flatten object address if needed
++  if (dst.offset() == 0) {
++    if (dst.base() != x13) {
++      __ mv(x13, dst.base());
++    }
++  } else {
++    __ la(x13, dst);
++  }
 +
-+// Constructors:
++  shenandoah_write_barrier_pre(masm,
++                               x13 /* obj */,
++                               tmp2 /* pre_val */,
++                               xthread /* thread */,
++                               tmp1  /* tmp */,
++                               val != noreg /* tosca_live */,
++                               false /* expand_call */);
 +
-+inline frame::frame() {
-+  _pc = NULL;
-+  _sp = NULL;
-+  _unextended_sp = NULL;
-+  _fp = NULL;
-+  _cb = NULL;
-+  _deopt_state = unknown;
++  if (val == noreg) {
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg);
++  } else {
++    iu_barrier(masm, val, tmp1);
++    // G1 barrier needs uncompressed oop for region cross check.
++    Register new_val = val;
++    if (UseCompressedOops) {
++      new_val = t1;
++      __ mv(new_val, val);
++    }
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg);
++  }
 +}
 +
-+static int spin;
++void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                                                  Register obj, Register tmp, Label& slowpath) {
++  Label done;
++  // Resolve jobject
++  BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath);
 +
-+inline void frame::init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) {
-+  intptr_t a = intptr_t(ptr_sp);
-+  intptr_t b = intptr_t(ptr_fp);
-+  _sp = ptr_sp;
-+  _unextended_sp = ptr_sp;
-+  _fp = ptr_fp;
-+  _pc = pc;
-+  assert(pc != NULL, "no pc?");
-+  _cb = CodeCache::find_blob(pc);
-+  adjust_unextended_sp();
++  // Check for null.
++  __ beqz(obj, done);
 +
-+  address original_pc = CompiledMethod::get_deopt_original_pc(this);
-+  if (original_pc != NULL) {
-+    _pc = original_pc;
-+    _deopt_state = is_deoptimized;
-+  } else {
-+    _deopt_state = not_deoptimized;
-+  }
-+}
++  assert(obj != t1, "need t1");
++  Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset());
++  __ lbu(t1, gc_state);
 +
-+inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) {
-+  init(ptr_sp, ptr_fp, pc);
++  // Check for heap in evacuation phase
++  __ andi(t0, t1, ShenandoahHeap::EVACUATION);
++  __ bnez(t0, slowpath);
++
++  __ bind(done);
 +}
 +
-+inline frame::frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc) {
-+  intptr_t a = intptr_t(ptr_sp);
-+  intptr_t b = intptr_t(ptr_fp);
-+  _sp = ptr_sp;
-+  _unextended_sp = unextended_sp;
-+  _fp = ptr_fp;
-+  _pc = pc;
-+  assert(pc != NULL, "no pc?");
-+  _cb = CodeCache::find_blob(pc);
-+  adjust_unextended_sp();
++// Special Shenandoah CAS implementation that handles false negatives due
++// to concurrent evacuation.  The service is more complex than a
++// traditional CAS operation because the CAS operation is intended to
++// succeed if the reference at addr exactly matches expected or if the
++// reference at addr holds a pointer to a from-space object that has
++// been relocated to the location named by expected.  There are two
++// races that must be addressed:
++//  a) A parallel thread may mutate the contents of addr so that it points
++//     to a different object.  In this case, the CAS operation should fail.
++//  b) A parallel thread may heal the contents of addr, replacing a
++//     from-space pointer held in addr with the to-space pointer
++//     representing the new location of the object.
++// Upon entry to cmpxchg_oop, it is assured that new_val equals NULL
++// or it refers to an object that is not being evacuated out of
++// from-space, or it refers to the to-space version of an object that
++// is being evacuated out of from-space.
++//
++// By default the value held in the result register following execution
++// of the generated code sequence is 0 to indicate failure of CAS,
++// non-zero to indicate success. If is_cae, the result is the value most
++// recently fetched from addr rather than a boolean success indicator.
++//
++// Clobbers t0, t1
++void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
++                                                Register addr,
++                                                Register expected,
++                                                Register new_val,
++                                                Assembler::Aqrl acquire,
++                                                Assembler::Aqrl release,
++                                                bool is_cae,
++                                                Register result) {
++  bool is_narrow = UseCompressedOops;
++  Assembler::operand_size size = is_narrow ? Assembler::uint32 : Assembler::int64;
 +
-+  address original_pc = CompiledMethod::get_deopt_original_pc(this);
-+  if (original_pc != NULL) {
-+    _pc = original_pc;
-+    assert(_cb->as_compiled_method()->insts_contains_inclusive(_pc),
-+           "original PC must be in the main code section of the the compiled method (or must be immediately following it)");
-+    _deopt_state = is_deoptimized;
-+  } else {
-+    _deopt_state = not_deoptimized;
++  assert_different_registers(addr, expected, t0, t1);
++  assert_different_registers(addr, new_val, t0, t1);
++
++  Label retry, success, fail, done;
++
++  __ bind(retry);
++
++  // Step1: Try to CAS.
++  __ cmpxchg(addr, expected, new_val, size, acquire, release, /* result */ t1);
++
++  // If success, then we are done.
++  __ beq(expected, t1, success);
++
++  // Step2: CAS failed, check the forwared pointer.
++  __ mv(t0, t1);
++
++  if (is_narrow) {
++    __ decode_heap_oop(t0, t0);
 +  }
-+}
++  resolve_forward_pointer(masm, t0);
 +
-+inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp) {
-+  intptr_t a = intptr_t(ptr_sp);
-+  intptr_t b = intptr_t(ptr_fp);
-+  _sp = ptr_sp;
-+  _unextended_sp = ptr_sp;
-+  _fp = ptr_fp;
-+  _pc = (address)(ptr_sp[-1]);
++  __ encode_heap_oop(t0, t0);
 +
-+  // Here's a sticky one. This constructor can be called via AsyncGetCallTrace
-+  // when last_Java_sp is non-null but the pc fetched is junk. If we are truly
-+  // unlucky the junk value could be to a zombied method and we'll die on the
-+  // find_blob call. This is also why we can have no asserts on the validity
-+  // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
-+  // -> pd_last_frame should use a specialized version of pd_last_frame which could
-+  // call a specilaized frame constructor instead of this one.
-+  // Then we could use the assert below. However this assert is of somewhat dubious
-+  // value.
++  // Report failure when the forwarded oop was not expected.
++  __ bne(t0, expected, fail);
 +
-+  _cb = CodeCache::find_blob(_pc);
-+  adjust_unextended_sp();
++  // Step 3: CAS again using the forwarded oop.
++  __ cmpxchg(addr, t1, new_val, size, acquire, release, /* result */ t0);
 +
-+  address original_pc = CompiledMethod::get_deopt_original_pc(this);
-+  if (original_pc != NULL) {
-+    _pc = original_pc;
-+    _deopt_state = is_deoptimized;
++  // Retry when failed.
++  __ bne(t0, t1, retry);
++
++  __ bind(success);
++  if (is_cae) {
++    __ mv(result, expected);
 +  } else {
-+    _deopt_state = not_deoptimized;
++    __ addi(result, zr, 1);
 +  }
-+}
++  __ j(done);
 +
-+// Accessors
++  __ bind(fail);
++  if (is_cae) {
++    __ mv(result, t0);
++  } else {
++    __ mv(result, zr);
++  }
 +
-+inline bool frame::equal(frame other) const {
-+  bool ret =  sp() == other.sp() &&
-+              unextended_sp() == other.unextended_sp() &&
-+              fp() == other.fp() &&
-+              pc() == other.pc();
-+  assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction");
-+  return ret;
++  __ bind(done);
 +}
 +
-+// Return unique id for this frame. The id must have a value where we can distinguish
-+// identity and younger/older relationship. NULL represents an invalid (incomparable)
-+// frame.
-+inline intptr_t* frame::id(void) const { return unextended_sp(); }
++#undef __
 +
-+// Return true if the frame is older (less recent activation) than the frame represented by id
-+inline bool frame::is_older(intptr_t* id) const   { assert(this->id() != NULL && id != NULL, "NULL frame id");
-+                                                    return this->id() > id ; }
++#ifdef COMPILER1
 +
-+inline intptr_t* frame::link() const              { return (intptr_t*) *(intptr_t **)addr_at(link_offset); }
++#define __ ce->masm()->
 +
-+inline intptr_t* frame::link_or_null() const {
-+  intptr_t** ptr = (intptr_t **)addr_at(link_offset);
-+  return os::is_readable_pointer(ptr) ? *ptr : NULL;
-+}
++void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
++  ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
++  // At this point we know that marking is in progress.
++  // If do_load() is true then we have to emit the
++  // load of the previous value; otherwise it has already
++  // been loaded into _pre_val.
++  __ bind(*stub->entry());
 +
-+inline intptr_t* frame::unextended_sp() const     { return _unextended_sp; }
++  assert(stub->pre_val()->is_register(), "Precondition.");
 +
-+// Return address
-+inline address* frame::sender_pc_addr() const     { return (address*) addr_at(return_addr_offset); }
-+inline address  frame::sender_pc() const          { return *sender_pc_addr(); }
-+inline intptr_t* frame::sender_sp() const         { return addr_at(sender_sp_offset); }
++  Register pre_val_reg = stub->pre_val()->as_register();
 +
-+inline intptr_t** frame::interpreter_frame_locals_addr() const {
-+  return (intptr_t**)addr_at(interpreter_frame_locals_offset);
++  if (stub->do_load()) {
++    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/);
++  }
++  __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
++  ce->store_parameter(stub->pre_val()->as_register(), 0);
++  __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
++  __ j(*stub->continuation());
 +}
 +
-+inline intptr_t* frame::interpreter_frame_last_sp() const {
-+  return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset);
-+}
++void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce,
++                                                                    ShenandoahLoadReferenceBarrierStub* stub) {
++  ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
++  __ bind(*stub->entry());
 +
-+inline intptr_t* frame::interpreter_frame_bcp_addr() const {
-+  return (intptr_t*)addr_at(interpreter_frame_bcp_offset);
-+}
++  Register obj = stub->obj()->as_register();
++  Register res = stub->result()->as_register();
++  Register addr = stub->addr()->as_pointer_register();
++  Register tmp1 = stub->tmp1()->as_register();
++  Register tmp2 = stub->tmp2()->as_register();
 +
-+inline intptr_t* frame::interpreter_frame_mdp_addr() const {
-+  return (intptr_t*)addr_at(interpreter_frame_mdp_offset);
-+}
++  assert(res == x10, "result must arrive in x10");
++  assert_different_registers(tmp1, tmp2, t0);
 +
++  if (res != obj) {
++    __ mv(res, obj);
++  }
 +
-+// Constant pool cache
++  // Check for null.
++  __ beqz(res, *stub->continuation(), /* is_far */ true);
 +
-+inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
-+  return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset);
-+}
++  // Check for object in cset.
++  __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr());
++  __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint());
++  __ add(t0, tmp2, tmp1);
++  __ lb(tmp2, Address(t0));
++  __ beqz(tmp2, *stub->continuation(), /* is_far */ true);
 +
-+// Method
++  // Check if object is already forwarded.
++  Label slow_path;
++  __ ld(tmp1, Address(res, oopDesc::mark_offset_in_bytes()));
++  __ xori(tmp1, tmp1, -1);
++  __ andi(t0, tmp1, markOopDesc::lock_mask_in_place);
++  __ bnez(t0, slow_path);
 +
-+inline Method** frame::interpreter_frame_method_addr() const {
-+  return (Method**)addr_at(interpreter_frame_method_offset);
-+}
++  // Decode forwarded object.
++  __ ori(tmp1, tmp1, markOopDesc::marked_value);
++  __ xori(res, tmp1, -1);
++  __ j(*stub->continuation());
 +
-+// Mirror
++  __ bind(slow_path);
++  ce->store_parameter(res, 0);
++  ce->store_parameter(addr, 1);
++  __ far_call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin()));
 +
-+inline oop* frame::interpreter_frame_mirror_addr() const {
-+  return (oop*)addr_at(interpreter_frame_mirror_offset);
++  __ j(*stub->continuation());
 +}
 +
-+// top of expression stack
-+inline intptr_t* frame::interpreter_frame_tos_address() const {
-+  intptr_t* last_sp = interpreter_frame_last_sp();
-+  if (last_sp == NULL) {
-+    return sp();
-+  } else {
-+    // sp() may have been extended or shrunk by an adapter.  At least
-+    // check that we don't fall behind the legal region.
-+    // For top deoptimized frame last_sp == interpreter_frame_monitor_end.
-+    assert(last_sp <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos");
-+    return last_sp;
-+  }
-+}
++#undef __
 +
-+inline oop* frame::interpreter_frame_temp_oop_addr() const {
-+  return (oop *)(fp() + interpreter_frame_oop_temp_offset);
-+}
++#define __ sasm->
 +
-+inline int frame::interpreter_frame_monitor_size() {
-+  return BasicObjectLock::size();
-+}
++void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
++  __ prologue("shenandoah_pre_barrier", false);
 +
++  // arg0 : previous value of memory
 +
-+// expression stack
-+// (the max_stack arguments are used by the GC; see class FrameClosure)
++  BarrierSet* bs = BarrierSet::barrier_set();
 +
-+inline intptr_t* frame::interpreter_frame_expression_stack() const {
-+  intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end();
-+  return monitor_end-1;
-+}
++  const Register pre_val = x10;
++  const Register thread = xthread;
++  const Register tmp = t0;
 +
++  Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
++  Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
 +
-+// Entry frames
++  Label done;
++  Label runtime;
 +
-+inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
-+ return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset);
++  // Is marking still active?
++  Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
++  __ lb(tmp, gc_state);
++  __ andi(tmp, tmp, ShenandoahHeap::MARKING);
++  __ beqz(tmp, done);
++
++  // Can we store original value in the thread's buffer?
++  __ ld(tmp, queue_index);
++  __ beqz(tmp, runtime);
++
++  __ sub(tmp, tmp, wordSize);
++  __ sd(tmp, queue_index);
++  __ ld(t1, buffer);
++  __ add(tmp, tmp, t1);
++  __ load_parameter(0, t1);
++  __ sd(t1, Address(tmp, 0));
++  __ j(done);
++
++  __ bind(runtime);
++  __ push_call_clobbered_registers();
++  __ load_parameter(0, pre_val);
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
++  __ pop_call_clobbered_registers();
++  __ bind(done);
++
++  __ epilogue();
 +}
 +
++void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm) {
++  __ prologue("shenandoah_load_reference_barrier", false);
++  // arg0 : object to be resolved
 +
-+// Compiled frames
-+PRAGMA_DIAG_PUSH
-+PRAGMA_NONNULL_IGNORED
-+inline oop frame::saved_oop_result(RegisterMap* map) const {
-+  oop* result_adr = (oop *)map->location(x10->as_VMReg());
-+  guarantee(result_adr != NULL, "bad register save location");
-+  return (*result_adr);
++  __ push_call_clobbered_registers();
++  __ load_parameter(0, x10);
++  __ load_parameter(1, x11);
++
++  if (UseCompressedOops) {
++    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
++  } else {
++    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
++  }
++  __ jalr(ra);
++  __ mv(t0, x10);
++  __ pop_call_clobbered_registers();
++  __ mv(x10, t0);
++
++  __ epilogue();
 +}
 +
-+inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
-+  oop* result_adr = (oop *)map->location(x10->as_VMReg());
-+  guarantee(result_adr != NULL, "bad register save location");
-+  *result_adr = obj;
++#undef __
++
++#endif // COMPILER1
++
++address ShenandoahBarrierSetAssembler::shenandoah_lrb() {
++  assert(_shenandoah_lrb != NULL, "need load reference barrier stub");
++  return _shenandoah_lrb;
 +}
-+PRAGMA_DIAG_POP
 +
-+#endif // CPU_RISCV_FRAME_RISCV_INLINE_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-new file mode 100644
-index 00000000000..1c46b3947d3
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-@@ -0,0 +1,484 @@
-+/*
-+ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++#define __ cgen->assembler()->
++
++// Shenandoah load reference barrier.
++//
++// Input:
++//   x10: OOP to evacuate.  Not null.
++//   x11: load address
++//
++// Output:
++//   x10: Pointer to evacuated OOP.
++//
++// Trash t0 t1  Preserve everything else.
++address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) {
++  __ align(6);
++  StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb");
++  address start = __ pc();
++
++  Label slow_path;
++  __ mv(t1, ShenandoahHeap::in_cset_fast_test_addr());
++  __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint());
++  __ add(t1, t1, t0);
++  __ lbu(t1, Address(t1, 0));
++  __ andi(t0, t1, 1);
++  __ bnez(t0, slow_path);
++  __ ret();
++
++  __ bind(slow_path);
++  __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++  __ push_call_clobbered_registers();
++
++  if (UseCompressedOops) {
++    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
++  } else {
++    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
++  }
++  __ jalr(ra);
++  __ mv(t0, x10);
++  __ pop_call_clobbered_registers();
++  __ mv(x10, t0);
++
++  __ leave(); // required for proper stackwalking of RuntimeStub frame
++  __ ret();
++
++  return start;
++}
++
++#undef __
++
++void ShenandoahBarrierSetAssembler::barrier_stubs_init() {
++  if (ShenandoahLoadRefBarrier) {
++    int stub_code_size = 2048;
++    ResourceMark rm;
++    BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size);
++    CodeBuffer buf(bb);
++    StubCodeGenerator cgen(&buf);
++    _shenandoah_lrb = generate_shenandoah_lrb(&cgen);
++  }
++}
+diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
+new file mode 100644
+index 0000000000..5d75035e9d
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
+@@ -0,0 +1,97 @@
++/*
++ * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -15412,474 +15178,545 @@ index 00000000000..1c46b3947d3
 + *
 + */
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "gc/g1/g1BarrierSet.hpp"
-+#include "gc/g1/g1BarrierSetAssembler.hpp"
-+#include "gc/g1/g1BarrierSetRuntime.hpp"
-+#include "gc/g1/g1CardTable.hpp"
-+#include "gc/g1/g1ThreadLocalData.hpp"
-+#include "gc/g1/heapRegion.hpp"
-+#include "gc/shared/collectedHeap.hpp"
-+#include "interpreter/interp_masm.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "runtime/thread.hpp"
++#ifndef CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
++#define CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shenandoah/shenandoahBarrierSet.hpp"
 +#ifdef COMPILER1
-+#include "c1/c1_LIRAssembler.hpp"
-+#include "c1/c1_MacroAssembler.hpp"
-+#include "gc/g1/c1/g1BarrierSetC1.hpp"
++class LIR_Assembler;
++class ShenandoahPreBarrierStub;
++class ShenandoahLoadReferenceBarrierStub;
++class StubAssembler;
 +#endif
++class StubCodeGenerator;
 +
-+#define __ masm->
++class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
++private:
 +
-+void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
-+                                                            Register addr, Register count, RegSet saved_regs) {
-+  assert_cond(masm != NULL);
-+  bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
-+  if (!dest_uninitialized) {
-+    Label done;
-+    Address in_progress(xthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++  static address _shenandoah_lrb;
 +
-+    // Is marking active?
-+    if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
-+      __ lwu(t0, in_progress);
-+    } else {
-+      assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
-+      __ lbu(t0, in_progress);
-+    }
-+    __ beqz(t0, done);
++  void satb_write_barrier_pre(MacroAssembler* masm,
++                              Register obj,
++                              Register pre_val,
++                              Register thread,
++                              Register tmp,
++                              bool tosca_live,
++                              bool expand_call);
++  void shenandoah_write_barrier_pre(MacroAssembler* masm,
++                                    Register obj,
++                                    Register pre_val,
++                                    Register thread,
++                                    Register tmp,
++                                    bool tosca_live,
++                                    bool expand_call);
 +
-+    __ push_reg(saved_regs, sp);
-+    if (count == c_rarg0) {
-+      if (addr == c_rarg1) {
-+        // exactly backwards!!
-+        __ mv(t0, c_rarg0);
-+        __ mv(c_rarg0, c_rarg1);
-+        __ mv(c_rarg1, t0);
-+      } else {
-+        __ mv(c_rarg1, count);
-+        __ mv(c_rarg0, addr);
-+      }
-+    } else {
-+      __ mv(c_rarg0, addr);
-+      __ mv(c_rarg1, count);
-+    }
-+    if (UseCompressedOops) {
-+      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2);
-+    } else {
-+      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2);
-+    }
-+    __ pop_reg(saved_regs, sp);
++  void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg);
++  void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg);
++  void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr);
++  void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr);
 +
-+    __ bind(done);
-+  }
-+}
++  address generate_shenandoah_lrb(StubCodeGenerator* cgen);
 +
-+void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
-+                                                             Register start, Register count, Register tmp, RegSet saved_regs) {
-+  assert_cond(masm != NULL);
-+  __ push_reg(saved_regs, sp);
-+  assert_different_registers(start, count, tmp);
-+  assert_different_registers(c_rarg0, count);
-+  __ mv(c_rarg0, start);
-+  __ mv(c_rarg1, count);
-+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2);
-+  __ pop_reg(saved_regs, sp);
-+}
++public:
 +
-+void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
-+                                                 Register obj,
-+                                                 Register pre_val,
-+                                                 Register thread,
-+                                                 Register tmp,
-+                                                 bool tosca_live,
-+                                                 bool expand_call) {
-+  // If expand_call is true then we expand the call_VM_leaf macro
-+  // directly to skip generating the check by
-+  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
++  static address shenandoah_lrb();
 +
-+  assert_cond(masm != NULL);
-+  assert(thread == xthread, "must be");
++  void iu_barrier(MacroAssembler* masm, Register dst, Register tmp);
 +
-+  Label done;
-+  Label runtime;
++#ifdef COMPILER1
++  void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
++  void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub);
++  void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
++  void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm);
++#endif
 +
-+  assert_different_registers(obj, pre_val, tmp, t0);
-+  assert(pre_val != noreg &&  tmp != noreg, "expecting a register");
++  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register src, Register dst, Register count, RegSet saved_regs);
 +
-+  Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
-+  Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
-+  Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
++  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                       Register dst, Address src, Register tmp1, Register tmp_thread);
++  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                        Address dst, Register val, Register tmp1, Register tmp2);
 +
-+  // Is marking active?
-+  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width
-+    __ lwu(tmp, in_progress);
-+  } else {
-+    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
-+    __ lbu(tmp, in_progress);
-+  }
-+  __ beqz(tmp, done);
++  virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                             Register obj, Register tmp, Label& slowpath);
 +
-+  // Do we need to load the previous value?
-+  if (obj != noreg) {
-+    __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
-+  }
++  virtual void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val,
++                   Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result);
 +
-+  // Is the previous value null?
-+  __ beqz(pre_val, done);
++  virtual void barrier_stubs_init();
++};
 +
-+  // Can we store original value in the thread's buffer?
-+  // Is index == 0?
-+  // (The index field is typed as size_t.)
++#endif // CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
+new file mode 100644
+index 0000000000..bab407a8b7
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
+@@ -0,0 +1,197 @@
++//
++// Copyright (c) 2018, Red Hat, Inc. All rights reserved.
++// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
 +
-+  __ ld(tmp, index);                       // tmp := *index_adr
-+  __ beqz(tmp, runtime);                   // tmp == 0?
-+                                           // If yes, goto runtime
++source_hpp %{
++#include "gc/shenandoah/shenandoahBarrierSet.hpp"
++#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
++%}
 +
-+  __ sub(tmp, tmp, wordSize);              // tmp := tmp - wordSize
-+  __ sd(tmp, index);                       // *index_adr := tmp
-+  __ ld(t0, buffer);
-+  __ add(tmp, tmp, t0);                    // tmp := tmp + *buffer_adr
++instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
++  match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval)));
++  ins_cost(10 * DEFAULT_COST);
 +
-+  // Record the previous value
-+  __ sd(pre_val, Address(tmp, 0));
-+  __ j(done);
++  effect(TEMP tmp, KILL cr);
 +
-+  __ bind(runtime);
-+  // save the live input values
-+  RegSet saved = RegSet::of(pre_val);
-+  if (tosca_live) { saved += RegSet::of(x10); }
-+  if (obj != noreg) { saved += RegSet::of(obj); }
++  format %{
++    "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapP_shenandoah"
++  %}
 +
-+  __ push_reg(saved, sp);
++  ins_encode %{
++    Register tmp = $tmp$$Register;
++    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
++                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
++                                                   false /* is_cae */, $res$$Register);
++  %}
 +
-+  if (expand_call) {
-+    assert(pre_val != c_rarg1, "smashed arg");
-+    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
-+  } else {
-+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
-+  }
++  ins_pipe(pipe_slow);
++%}
 +
-+  __ pop_reg(saved, sp);
++instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
++  match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval)));
++  ins_cost(10 * DEFAULT_COST);
 +
-+  __ bind(done);
++  effect(TEMP tmp, KILL cr);
 +
-+}
++  format %{
++    "cmpxchgw_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapN_shenandoah"
++  %}
 +
-+void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
-+                                                  Register store_addr,
-+                                                  Register new_val,
-+                                                  Register thread,
-+                                                  Register tmp,
-+                                                  Register tmp2) {
-+  assert_cond(masm != NULL);
-+  assert(thread == xthread, "must be");
-+  assert_different_registers(store_addr, new_val, thread, tmp, tmp2,
-+                             t0);
-+  assert(store_addr != noreg && new_val != noreg && tmp != noreg &&
-+         tmp2 != noreg, "expecting a register");
++  ins_encode %{
++    Register tmp = $tmp$$Register;
++    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
++                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
++                                                   false /* is_cae */, $res$$Register);
++  %}
 +
-+  Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
-+  Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
++  ins_pipe(pipe_slow);
++%}
 +
-+  BarrierSet* bs = BarrierSet::barrier_set();
-+  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
-+  CardTable* ct = ctbs->card_table();
++instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
++  predicate(needs_acquiring_load_reserved(n));
++  match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval)));
++  ins_cost(10 * DEFAULT_COST);
 +
-+  Label done;
-+  Label runtime;
++  effect(TEMP tmp, KILL cr);
 +
-+  // Does store cross heap regions?
++  format %{
++    "cmpxchg_acq_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapPAcq_shenandoah"
++  %}
 +
-+  __ xorr(tmp, store_addr, new_val);
-+  __ srli(tmp, tmp, HeapRegion::LogOfHRGrainBytes);
-+  __ beqz(tmp, done);
++  ins_encode %{
++    Register tmp = $tmp$$Register;
++    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
++                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
++                                                   false /* is_cae */, $res$$Register);
++  %}
 +
-+  // crosses regions, storing NULL?
++  ins_pipe(pipe_slow);
++%}
 +
-+  __ beqz(new_val, done);
++instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
++  predicate(needs_acquiring_load_reserved(n));
++  match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval)));
++  ins_cost(10 * DEFAULT_COST);
 +
-+  // storing region crossing non-NULL, is card already dirty?
++  effect(TEMP tmp, KILL cr);
 +
-+  ExternalAddress cardtable((address) ct->byte_map_base());
-+  const Register card_addr = tmp;
++  format %{
++    "cmpxchgw_acq_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapNAcq_shenandoah"
++  %}
 +
-+  __ srli(card_addr, store_addr, CardTable::card_shift());
++  ins_encode %{
++    Register tmp = $tmp$$Register;
++    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
++                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
++                                                   false /* is_cae */, $res$$Register);
++  %}
 +
-+  // get the address of the card
-+  __ load_byte_map_base(tmp2);
-+  __ add(card_addr, card_addr, tmp2);
-+  __ lbu(tmp2, Address(card_addr));
-+  __ mv(t0, (int)G1CardTable::g1_young_card_val());
-+  __ beq(tmp2, t0, done);
++  ins_pipe(pipe_slow);
++%}
 +
-+  assert((int)CardTable::dirty_card_val() == 0, "must be 0");
++instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
++  match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval)));
++  ins_cost(10 * DEFAULT_COST);
++  effect(TEMP_DEF res, TEMP tmp, KILL cr);
 +
-+  __ membar(MacroAssembler::StoreLoad);
++  format %{
++    "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN_shenandoah"
++  %}
 +
-+  __ lbu(tmp2, Address(card_addr));
-+  __ beqz(tmp2, done);
++  ins_encode %{
++    Register tmp = $tmp$$Register;
++    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
++                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
++                                                   true /* is_cae */, $res$$Register);
++  %}
 +
-+  // storing a region crossing, non-NULL oop, card is clean.
-+  // dirty card and log.
++  ins_pipe(pipe_slow);
++%}
 +
-+  __ sb(zr, Address(card_addr));
++instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
++  match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval)));
++  ins_cost(10 * DEFAULT_COST);
 +
-+  __ ld(t0, queue_index);
-+  __ beqz(t0, runtime);
-+  __ sub(t0, t0, wordSize);
-+  __ sd(t0, queue_index);
++  effect(TEMP_DEF res, TEMP tmp, KILL cr);
++  format %{
++    "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndExchangeP_shenandoah"
++  %}
 +
-+  __ ld(tmp2, buffer);
-+  __ add(t0, tmp2, t0);
-+  __ sd(card_addr, Address(t0, 0));
-+  __ j(done);
++  ins_encode %{
++    Register tmp = $tmp$$Register;
++    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
++                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
++                                                   true /* is_cae */, $res$$Register);
++  %}
 +
-+  __ bind(runtime);
-+  // save the live input values
-+  RegSet saved = RegSet::of(store_addr);
-+  __ push_reg(saved, sp);
-+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
-+  __ pop_reg(saved, sp);
++  ins_pipe(pipe_slow);
++%}
 +
-+  __ bind(done);
-+}
++instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
++  match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval)));
++  ins_cost(10 * DEFAULT_COST);
 +
-+void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                                    Register dst, Address src, Register tmp1, Register tmp_thread) {
-+  assert_cond(masm != NULL);
-+  bool on_oop = is_reference_type(type);
-+  bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
-+  bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
-+  bool on_reference = on_weak || on_phantom;
-+  ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
-+  if (on_oop && on_reference) {
-+    // RA is live.  It must be saved around calls.
-+    __ enter(); // barrier may call runtime
-+    // Generate the G1 pre-barrier code to log the value of
-+    // the referent field in an SATB buffer.
-+    g1_write_barrier_pre(masm /* masm */,
-+                         noreg /* obj */,
-+                         dst /* pre_val */,
-+                         xthread /* thread */,
-+                         tmp1 /* tmp */,
-+                         true /* tosca_live */,
-+                         true /* expand_call */);
-+    __ leave();
-+  }
-+}
++  effect(TEMP tmp, KILL cr);
++  format %{
++    "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapN_shenandoah"
++    "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)"
++  %}
 +
-+void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                                         Address dst, Register val, Register tmp1, Register tmp2) {
-+  assert_cond(masm != NULL);
-+  // flatten object address if needed
-+  if (dst.offset() == 0) {
-+    if (dst.base() != x13) {
-+      __ mv(x13, dst.base());
-+    }
-+  } else {
-+    __ la(x13, dst);
-+  }
++  ins_encode %{
++    Register tmp = $tmp$$Register;
++    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
++    // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
++                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
++                                                   false /* is_cae */, $res$$Register);
++  %}
 +
-+  g1_write_barrier_pre(masm,
-+                       x13 /* obj */,
-+                       tmp2 /* pre_val */,
-+                       xthread /* thread */,
-+                       tmp1  /* tmp */,
-+                       val != noreg /* tosca_live */,
-+                       false /* expand_call */);
++  ins_pipe(pipe_slow);
++%}
 +
-+  if (val == noreg) {
-+    BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg);
-+  } else {
-+    // G1 barrier needs uncompressed oop for region cross check.
-+    Register new_val = val;
-+    if (UseCompressedOops) {
-+      new_val = t1;
-+      __ mv(new_val, val);
-+    }
-+    BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg);
-+    g1_write_barrier_post(masm,
-+                          x13 /* store_adr */,
-+                          new_val /* new_val */,
-+                          xthread /* thread */,
-+                          tmp1 /* tmp */,
-+                          tmp2 /* tmp2 */);
-+  }
-+}
++instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
++  match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval)));
++  ins_cost(10 * DEFAULT_COST);
 +
-+#ifdef COMPILER1
-+
-+#undef __
-+#define __ ce->masm()->
++  effect(TEMP tmp, KILL cr);
++  format %{
++    "cmpxchg_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapP_shenandoah"
++  %}
 +
-+void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) {
-+  G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
++  ins_encode %{
++    Register tmp = $tmp$$Register;
++    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
++                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
++                                                   false /* is_cae */, $res$$Register);
++  %}
 +
-+  // At this point we know that marking is in progress.
-+  // If do_load() is true then we have to emit the
-+  // load of the previous value; otherwise it has already
-+  // been loaded into _pre_val.
-+  __ bind(*stub->entry());
++  ins_pipe(pipe_slow);
++%}
+diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
+new file mode 100644
+index 0000000000..d6ce8da07b
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
+@@ -0,0 +1,46 @@
++/*
++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  assert(stub->pre_val()->is_register(), "Precondition.");
++#ifndef CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
++#define CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
 +
-+  Register pre_val_reg = stub->pre_val()->as_register();
++const int StackAlignmentInBytes = 16;
 +
-+  if (stub->do_load()) {
-+    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */);
-+  }
-+  __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
-+  ce->store_parameter(stub->pre_val()->as_register(), 0);
-+  __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
-+  __ j(*stub->continuation());
-+}
++// Indicates whether the C calling conventions require that
++// 32-bit integer argument values are extended to 64 bits.
++const bool CCallingConventionRequiresIntsAsLongs = false;
 +
-+void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
-+  G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
-+  __ bind(*stub->entry());
-+  assert(stub->addr()->is_register(), "Precondition");
-+  assert(stub->new_val()->is_register(), "Precondition");
-+  Register new_val_reg = stub->new_val()->as_register();
-+  __ beqz(new_val_reg, *stub->continuation(), /* is_far */ true);
-+  ce->store_parameter(stub->addr()->as_pointer_register(), 0);
-+  __ far_call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin()));
-+  __ j(*stub->continuation());
-+}
++// To be safe, we deoptimize when we come across an access that needs
++// patching. This is similar to what is done on aarch64.
++#define DEOPTIMIZE_WHEN_PATCHING
 +
-+#undef __
++#define SUPPORTS_NATIVE_CX8
 +
-+#define __ sasm->
++#define SUPPORT_RESERVED_STACK_AREA
 +
-+void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
-+  __ prologue("g1_pre_barrier", false);
++#define THREAD_LOCAL_POLL
 +
-+  BarrierSet* bs = BarrierSet::barrier_set();
++#endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
+new file mode 100644
+index 0000000000..90db2f4460
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
+@@ -0,0 +1,111 @@
++/*
++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  // arg0 : previous value of memory
-+  const Register pre_val = x10;
-+  const Register thread = xthread;
-+  const Register tmp = t0;
++#ifndef CPU_RISCV_GLOBALS_RISCV_HPP
++#define CPU_RISCV_GLOBALS_RISCV_HPP
 +
-+  Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
-+  Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
-+  Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
 +
-+  Label done;
-+  Label runtime;
++// Sets the default values for platform dependent flags used by the runtime system.
++// (see globals.hpp)
 +
-+  // Is marking still active?
-+  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {  // 4-byte width
-+    __ lwu(tmp, in_progress);
-+  } else {
-+    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
-+    __ lbu(tmp, in_progress);
-+  }
-+  __ beqz(tmp, done);
++define_pd_global(bool, NeedsDeoptSuspend,        false); // only register window machines need this
 +
-+  // Can we store original value in the thread's buffer?
-+  __ ld(tmp, queue_index);
-+  __ beqz(tmp, runtime);
++define_pd_global(bool, ImplicitNullChecks,       true);  // Generate code for implicit null checks
++define_pd_global(bool, TrapBasedNullChecks,      false);
++define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs past to check cast
 +
-+  __ sub(tmp, tmp, wordSize);
-+  __ sd(tmp, queue_index);
-+  __ ld(t1, buffer);
-+  __ add(tmp, tmp, t1);
-+  __ load_parameter(0, t1);
-+  __ sd(t1, Address(tmp, 0));
-+  __ j(done);
++define_pd_global(uintx, CodeCacheSegmentSize,    64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment.
++define_pd_global(intx, CodeEntryAlignment,       64);
++define_pd_global(intx, OptoLoopAlignment,        16);
++define_pd_global(intx, InlineFrequencyCount,     100);
 +
-+  __ bind(runtime);
-+  __ push_call_clobbered_registers();
-+  __ load_parameter(0, pre_val);
-+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
-+  __ pop_call_clobbered_registers();
-+  __ bind(done);
++#define DEFAULT_STACK_YELLOW_PAGES (2)
++#define DEFAULT_STACK_RED_PAGES (1)
++// Java_java_net_SocketOutputStream_socketWrite0() uses a 64k buffer on the
++// stack if compiled for unix and LP64. To pass stack overflow tests we need
++// 20 shadow pages.
++#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+5))
++#define DEFAULT_STACK_RESERVED_PAGES (1)
 +
-+  __ epilogue();
-+}
++#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES
++#define MIN_STACK_RED_PAGES    DEFAULT_STACK_RED_PAGES
++#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES
++#define MIN_STACK_RESERVED_PAGES (0)
 +
-+void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
-+  __ prologue("g1_post_barrier", false);
++define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES);
++define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES);
++define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES);
++define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES);
 +
-+  // arg0 : store_address
-+  Address store_addr(fp, 2 * BytesPerWord); // 2 BytesPerWord from fp
++define_pd_global(bool, RewriteBytecodes,     true);
++define_pd_global(bool, RewriteFrequentPairs, true);
 +
-+  BarrierSet* bs = BarrierSet::barrier_set();
-+  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
-+  CardTable* ct = ctbs->card_table();
++define_pd_global(bool, UseMembar,            true);
 +
-+  Label done;
-+  Label runtime;
++define_pd_global(bool, PreserveFramePointer, false);
 +
-+  // At this point we know new_value is non-NULL and the new_value crosses regions.
-+  // Must check to see if card is already dirty
-+  const Register thread = xthread;
++// GC Ergo Flags
++define_pd_global(uintx, CMSYoungGenPerWorker, 64*M);  // default max size of CMS young gen, per GC worker thread
 +
-+  Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
-+  Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
++define_pd_global(uintx, TypeProfileLevel, 111);
 +
-+  const Register card_offset = t1;
-+  // RA is free here, so we can use it to hold the byte_map_base.
-+  const Register byte_map_base = ra;
++define_pd_global(bool, CompactStrings, true);
 +
-+  assert_different_registers(card_offset, byte_map_base, t0);
++// Clear short arrays bigger than one word in an arch-specific way
++define_pd_global(intx, InitArrayShortSize, BytesPerLong);
 +
-+  __ load_parameter(0, card_offset);
-+  __ srli(card_offset, card_offset, CardTable::card_shift());
-+  __ load_byte_map_base(byte_map_base);
++define_pd_global(bool, ThreadLocalHandshakes, true);
 +
-+  // Convert card offset into an address in card_addr
-+  Register card_addr = card_offset;
-+  __ add(card_addr, byte_map_base, card_addr);
++define_pd_global(intx, InlineSmallCode,          1000);
 +
-+  __ lbu(t0, Address(card_addr, 0));
-+  __ sub(t0, t0, (int)G1CardTable::g1_young_card_val());
-+  __ beqz(t0, done);
++#define ARCH_FLAGS(develop,                                                      \
++                   product,                                                      \
++                   diagnostic,                                                   \
++                   experimental,                                                 \
++                   notproduct,                                                   \
++                   range,                                                        \
++                   constraint,                                                   \
++                   writeable)                                                    \
++                                                                                 \
++  product(bool, NearCpool, true,                                                 \
++         "constant pool is close to instructions")                               \
++  product(intx, BlockZeroingLowLimit, 256,                                       \
++          "Minimum size in bytes when block zeroing will be used")               \
++          range(1, max_jint)                                                     \
++  product(bool, TraceTraps, false, "Trace all traps the signal handler")         \
++  /* For now we're going to be safe and add the I/O bits to userspace fences. */ \
++  product(bool, UseConservativeFence, true,                                      \
++          "Extend i for r and o for w in the pred/succ flags of fence")          \
++  product(bool, AvoidUnalignedAccesses, true,                                    \
++          "Avoid generating unaligned memory accesses")                          \
++  experimental(bool, UseRVV, false, "Use RVV instructions")                      \
++  experimental(bool, UseZba, false, "Use Zba instructions")                      \
++  experimental(bool, UseZbb, false, "Use Zbb instructions")                      \
++  experimental(bool, UseZbs, false, "Use Zbs instructions")                      \
++  experimental(bool, UseRVC, false, "Use RVC instructions")
 +
-+  assert((int)CardTable::dirty_card_val() == 0, "must be 0");
++#endif // CPU_RISCV_GLOBALS_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/icBuffer_riscv.cpp b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp
+new file mode 100644
+index 0000000000..cc93103dc5
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp
+@@ -0,0 +1,79 @@
++/*
++ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  __ membar(MacroAssembler::StoreLoad);
-+  __ lbu(t0, Address(card_addr, 0));
-+  __ beqz(t0, done);
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/icBuffer.hpp"
++#include "gc/shared/collectedHeap.inline.hpp"
++#include "interpreter/bytecodes.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_riscv.hpp"
++#include "oops/oop.inline.hpp"
 +
-+  // storing region crossing non-NULL, card is clean.
-+  // dirty card and log.
-+  __ sb(zr, Address(card_addr, 0));
++int InlineCacheBuffer::ic_stub_code_size() {
++  // 6: auipc + ld + auipc + jalr + address(2 * instruction_size)
++  // 5: auipc + ld + j + address(2 * instruction_size)
++  return (MacroAssembler::far_branches() ? 6 : 5) * NativeInstruction::instruction_size;
++}
 +
-+  __ ld(t0, queue_index);
-+  __ beqz(t0, runtime);
-+  __ sub(t0, t0, wordSize);
-+  __ sd(t0, queue_index);
++#define __ masm->
 +
-+  // Reuse RA to hold buffer_addr
-+  const Register buffer_addr = ra;
++void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) {
++  assert_cond(code_begin != NULL && entry_point != NULL);
++  ResourceMark rm;
++  CodeBuffer      code(code_begin, ic_stub_code_size());
++  MacroAssembler* masm            = new MacroAssembler(&code);
++  // Note: even though the code contains an embedded value, we do not need reloc info
++  // because
++  // (1) the value is old (i.e., doesn't matter for scavenges)
++  // (2) these ICStubs are removed *before* a GC happens, so the roots disappear
 +
-+  __ ld(buffer_addr, buffer);
-+  __ add(t0, buffer_addr, t0);
-+  __ sd(card_addr, Address(t0, 0));
-+  __ j(done);
++  address start = __ pc();
++  Label l;
++  __ ld(t1, l);
++  __ far_jump(ExternalAddress(entry_point));
++  __ align(wordSize);
++  __ bind(l);
++  __ emit_int64((intptr_t)cached_value);
++  // Only need to invalidate the 1st two instructions - not the whole ic stub
++  ICache::invalidate_range(code_begin, InlineCacheBuffer::ic_stub_code_size());
++  assert(__ pc() - start == ic_stub_code_size(), "must be");
++}
 +
-+  __ bind(runtime);
-+  __ push_call_clobbered_registers();
-+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
-+  __ pop_call_clobbered_registers();
-+  __ bind(done);
-+  __ epilogue();
++address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
++  NativeMovConstReg* move = nativeMovConstReg_at(code_begin);   // creation also verifies the object
++  NativeJump* jump = nativeJump_at(move->next_instruction_address());
++  return jump->jump_destination();
 +}
 +
-+#undef __
 +
-+#endif // COMPILER1
-diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
++void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
++  // The word containing the cached value is at the end of this IC buffer
++  uintptr_t *p = (uintptr_t *)(code_begin + ic_stub_code_size() - wordSize);
++  void* o = (void*)*p;
++  return o;
++}
+diff --git a/src/hotspot/cpu/riscv/icache_riscv.cpp b/src/hotspot/cpu/riscv/icache_riscv.cpp
 new file mode 100644
-index 00000000000..37bc183f39c
+index 0000000000..d615dcfb9e
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
-@@ -0,0 +1,78 @@
++++ b/src/hotspot/cpu/riscv/icache_riscv.cpp
+@@ -0,0 +1,68 @@
 +/*
-+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2023, Rivos Inc. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -15902,68 +15739,57 @@ index 00000000000..37bc183f39c
 + *
 + */
 +
-+#ifndef CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
-+#define CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
-+
++#include "precompiled.hpp"
 +#include "asm/macroAssembler.hpp"
-+#include "gc/shared/modRefBarrierSetAssembler.hpp"
-+#include "utilities/macros.hpp"
++#include "riscv_flush_icache.hpp"
++#include "runtime/java.hpp"
++#include "runtime/icache.hpp"
 +
-+#ifdef COMPILER1
-+class LIR_Assembler;
-+#endif
-+class StubAssembler;
-+class G1PreBarrierStub;
-+class G1PostBarrierStub;
++#define __ _masm->
 +
-+class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
-+protected:
-+  void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
-+                                       Register addr, Register count, RegSet saved_regs);
-+  void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
-+                                        Register start, Register count, Register tmp, RegSet saved_regs);
++static int icache_flush(address addr, int lines, int magic) {
++  // To make a store to instruction memory visible to all RISC-V harts,
++  // the writing hart has to execute a data FENCE before requesting that
++  // all remote RISC-V harts execute a FENCE.I.
 +
-+  void g1_write_barrier_pre(MacroAssembler* masm,
-+                            Register obj,
-+                            Register pre_val,
-+                            Register thread,
-+                            Register tmp,
-+                            bool tosca_live,
-+                            bool expand_call);
++  // We need to make sure stores happens before the I/D cache synchronization.
++  __asm__ volatile("fence rw, rw" : : : "memory");
 +
-+  void g1_write_barrier_post(MacroAssembler* masm,
-+                             Register store_addr,
-+                             Register new_val,
-+                             Register thread,
-+                             Register tmp,
-+                             Register tmp2);
++  RiscvFlushIcache::flush((uintptr_t)addr, ((uintptr_t)lines) << ICache::log2_line_size);
 +
-+  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                            Address dst, Register val, Register tmp1, Register tmp2);
++  return magic;
++}
 +
-+public:
-+#ifdef COMPILER1
-+  void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
-+  void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
++void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) {
++  // Only riscv_flush_icache is supported as I-cache synchronization.
++  // We must make sure the VM can execute such without error.
++  if (!RiscvFlushIcache::test()) {
++    vm_exit_during_initialization("Unable to synchronize I-cache");
++  }
 +
-+  void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
-+  void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
-+#endif
++  address start = (address)icache_flush;
++  *flush_icache_stub = (ICache::flush_icache_stub_t)start;
 +
-+  void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+               Register dst, Address src, Register tmp1, Register tmp_thread);
-+};
++  // ICache::invalidate_range() contains explicit condition that the first
++  // call is invoked on the generated icache flush stub code range.
++  ICache::invalidate_range(start, 0);
 +
-+#endif // CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp
++  {
++    StubCodeMark mark(this, "ICache", "fake_stub_for_inlined_icache_flush");
++    __ ret();
++  }
++}
++
++#undef __
+diff --git a/src/hotspot/cpu/riscv/icache_riscv.hpp b/src/hotspot/cpu/riscv/icache_riscv.hpp
 new file mode 100644
-index 00000000000..8735fd014ff
+index 0000000000..5bf40ca820
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp
-@@ -0,0 +1,31 @@
++++ b/src/hotspot/cpu/riscv/icache_riscv.hpp
+@@ -0,0 +1,42 @@
 +/*
-+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -15986,20 +15812,32 @@ index 00000000000..8735fd014ff
 + *
 + */
 +
-+#ifndef CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP
-+#define CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP
++#ifndef CPU_RISCV_ICACHE_RISCV_HPP
++#define CPU_RISCV_ICACHE_RISCV_HPP
 +
-+const size_t G1MergeHeapRootsPrefetchCacheSize = 16;
++// Interface for updating the instruction cache. Whenever the VM
++// modifies code, part of the processor instruction cache potentially
++// has to be flushed.
 +
-+#endif // CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
++class ICache : public AbstractICache {
++public:
++  enum {
++    stub_size      = 16,                // Size of the icache flush stub in bytes
++    line_size      = BytesPerWord,      // conservative
++    log2_line_size = LogBytesPerWord    // log2(line_size)
++  };
++};
++
++#endif // CPU_RISCV_ICACHE_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
 new file mode 100644
-index 00000000000..3c115a2ea02
+index 0000000000..b50be7e726
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
-@@ -0,0 +1,302 @@
++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+@@ -0,0 +1,1931 @@
 +/*
-+ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -16024,2066 +15862,1920 @@ index 00000000000..3c115a2ea02
 + */
 +
 +#include "precompiled.hpp"
-+#include "classfile/classLoaderData.hpp"
++#include "asm/macroAssembler.inline.hpp"
 +#include "gc/shared/barrierSet.hpp"
 +#include "gc/shared/barrierSetAssembler.hpp"
-+#include "gc/shared/barrierSetNMethod.hpp"
-+#include "gc/shared/collectedHeap.hpp"
-+#include "interpreter/interp_masm.hpp"
-+#include "memory/universe.hpp"
-+#include "runtime/jniHandles.hpp"
++#include "interp_masm_riscv.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "logging/log.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/method.hpp"
++#include "oops/methodData.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/basicLock.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/safepointMechanism.hpp"
 +#include "runtime/sharedRuntime.hpp"
-+#include "runtime/stubRoutines.hpp"
-+#include "runtime/thread.hpp"
++#include "runtime/thread.inline.hpp"
 +
-+#define __ masm->
++void InterpreterMacroAssembler::narrow(Register result) {
++  // Get method->_constMethod->_result_type
++  ld(t0, Address(fp, frame::interpreter_frame_method_offset * wordSize));
++  ld(t0, Address(t0, Method::const_offset()));
++  lbu(t0, Address(t0, ConstMethod::result_type_offset()));
 +
-+void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                                  Register dst, Address src, Register tmp1, Register tmp_thread) {
-+  assert_cond(masm != NULL);
++  Label done, notBool, notByte, notChar;
 +
-+  // RA is live. It must be saved around calls.
++  // common case first
++  mv(t1, T_INT);
++  beq(t0, t1, done);
 +
-+  bool in_heap = (decorators & IN_HEAP) != 0;
-+  bool in_native = (decorators & IN_NATIVE) != 0;
-+  bool is_not_null = (decorators & IS_NOT_NULL) != 0;
-+  switch (type) {
-+    case T_OBJECT:  // fall through
-+    case T_ARRAY: {
-+      if (in_heap) {
-+        if (UseCompressedOops) {
-+          __ lwu(dst, src);
-+          if (is_not_null) {
-+            __ decode_heap_oop_not_null(dst);
-+          } else {
-+            __ decode_heap_oop(dst);
-+          }
-+        } else {
-+          __ ld(dst, src);
-+        }
-+      } else {
-+        assert(in_native, "why else?");
-+        __ ld(dst, src);
-+      }
-+      break;
-+    }
-+    case T_BOOLEAN: __ load_unsigned_byte (dst, src); break;
-+    case T_BYTE:    __ load_signed_byte   (dst, src); break;
-+    case T_CHAR:    __ load_unsigned_short(dst, src); break;
-+    case T_SHORT:   __ load_signed_short  (dst, src); break;
-+    case T_INT:     __ lw                 (dst, src); break;
-+    case T_LONG:    __ ld                 (dst, src); break;
-+    case T_ADDRESS: __ ld                 (dst, src); break;
-+    case T_FLOAT:   __ flw                (f10, src); break;
-+    case T_DOUBLE:  __ fld                (f10, src); break;
-+    default: Unimplemented();
++  // mask integer result to narrower return type.
++  mv(t1, T_BOOLEAN);
++  bne(t0, t1, notBool);
++
++  andi(result, result, 0x1);
++  j(done);
++
++  bind(notBool);
++  mv(t1, T_BYTE);
++  bne(t0, t1, notByte);
++  sign_extend(result, result, 8);
++  j(done);
++
++  bind(notByte);
++  mv(t1, T_CHAR);
++  bne(t0, t1, notChar);
++  zero_extend(result, result, 16);
++  j(done);
++
++  bind(notChar);
++  sign_extend(result, result, 16);
++
++  // Nothing to do for T_INT
++  bind(done);
++  addw(result, result, zr);
++}
++
++void InterpreterMacroAssembler::jump_to_entry(address entry) {
++  assert(entry != NULL, "Entry must have been generated by now");
++  j(entry);
++}
++
++void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) {
++  if (JvmtiExport::can_pop_frame()) {
++    Label L;
++    // Initiate popframe handling only if it is not already being
++    // processed. If the flag has the popframe_processing bit set,
++    // it means that this code is called *during* popframe handling - we
++    // don't want to reenter.
++    // This method is only called just after the call into the vm in
++    // call_VM_base, so the arg registers are available.
++    lwu(t1, Address(xthread, JavaThread::popframe_condition_offset()));
++    andi(t0, t1, JavaThread::popframe_pending_bit);
++    beqz(t0, L);
++    andi(t0, t1, JavaThread::popframe_processing_bit);
++    bnez(t0, L);
++    // Call Interpreter::remove_activation_preserving_args_entry() to get the
++    // address of the same-named entrypoint in the generated interpreter code.
++    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry));
++    jr(x10);
++    bind(L);
 +  }
 +}
 +
-+void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                                   Address dst, Register val, Register tmp1, Register tmp2) {
-+  assert_cond(masm != NULL);
-+  bool in_heap = (decorators & IN_HEAP) != 0;
-+  bool in_native = (decorators & IN_NATIVE) != 0;
-+  switch (type) {
-+    case T_OBJECT: // fall through
-+    case T_ARRAY: {
-+      val = val == noreg ? zr : val;
-+      if (in_heap) {
-+        if (UseCompressedOops) {
-+          assert(!dst.uses(val), "not enough registers");
-+          if (val != zr) {
-+            __ encode_heap_oop(val);
-+          }
-+          __ sw(val, dst);
-+        } else {
-+          __ sd(val, dst);
-+        }
-+      } else {
-+        assert(in_native, "why else?");
-+        __ sd(val, dst);
-+      }
++
++void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
++  ld(x12, Address(xthread, JavaThread::jvmti_thread_state_offset()));
++  const Address tos_addr(x12, JvmtiThreadState::earlyret_tos_offset());
++  const Address oop_addr(x12, JvmtiThreadState::earlyret_oop_offset());
++  const Address val_addr(x12, JvmtiThreadState::earlyret_value_offset());
++  switch (state) {
++    case atos:
++      ld(x10, oop_addr);
++      sd(zr, oop_addr);
++      verify_oop(x10);
 +      break;
-+    }
-+    case T_BOOLEAN:
-+      __ andi(val, val, 0x1);  // boolean is true if LSB is 1
-+      __ sb(val, dst);
++    case ltos:
++      ld(x10, val_addr);
 +      break;
-+    case T_BYTE:    __ sb(val, dst); break;
-+    case T_CHAR:    __ sh(val, dst); break;
-+    case T_SHORT:   __ sh(val, dst); break;
-+    case T_INT:     __ sw(val, dst); break;
-+    case T_LONG:    __ sd(val, dst); break;
-+    case T_ADDRESS: __ sd(val, dst); break;
-+    case T_FLOAT:   __ fsw(f10,  dst); break;
-+    case T_DOUBLE:  __ fsd(f10,  dst); break;
-+    default: Unimplemented();
++    case btos:  // fall through
++    case ztos:  // fall through
++    case ctos:  // fall through
++    case stos:  // fall through
++    case itos:
++      lwu(x10, val_addr);
++      break;
++    case ftos:
++      flw(f10, val_addr);
++      break;
++    case dtos:
++      fld(f10, val_addr);
++      break;
++    case vtos:
++      /* nothing to do */
++      break;
++    default:
++      ShouldNotReachHere();
 +  }
++  // Clean up tos value in the thread object
++  mvw(t0, (int) ilgl);
++  sw(t0, tos_addr);
++  sw(zr, val_addr);
++}
++
++
++void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) {
++  if (JvmtiExport::can_force_early_return()) {
++    Label L;
++    ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset()));
++    beqz(t0, L);  // if [thread->jvmti_thread_state() == NULL] then exit
++
++    // Initiate earlyret handling only if it is not already being processed.
++    // If the flag has the earlyret_processing bit set, it means that this code
++    // is called *during* earlyret handling - we don't want to reenter.
++    lwu(t0, Address(t0, JvmtiThreadState::earlyret_state_offset()));
++    mv(t1, JvmtiThreadState::earlyret_pending);
++    bne(t0, t1, L);
 +
++    // Call Interpreter::remove_activation_early_entry() to get the address of the
++    // same-named entrypoint in the generated interpreter code.
++    ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset()));
++    lwu(t0, Address(t0, JvmtiThreadState::earlyret_tos_offset()));
++    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), t0);
++    jr(x10);
++    bind(L);
++  }
 +}
 +
-+void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
-+                                                        Register obj, Register tmp, Label& slowpath) {
-+  assert_cond(masm != NULL);
-+  // If mask changes we need to ensure that the inverse is still encodable as an immediate
-+  STATIC_ASSERT(JNIHandles::weak_tag_mask == 1);
-+  __ andi(obj, obj, ~JNIHandles::weak_tag_mask);
-+  __ ld(obj, Address(obj, 0));             // *obj
++void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset) {
++  assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode");
++  lhu(reg, Address(xbcp, bcp_offset));
++  revb_h(reg, reg);
 +}
 +
-+// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
-+void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj,
-+                                        Register var_size_in_bytes,
-+                                        int con_size_in_bytes,
-+                                        Register tmp1,
-+                                        Register tmp2,
-+                                        Label& slow_case,
-+                                        bool is_far) {
-+  assert_cond(masm != NULL);
-+  assert_different_registers(obj, tmp2);
-+  assert_different_registers(obj, var_size_in_bytes);
-+  Register end = tmp2;
++void InterpreterMacroAssembler::get_dispatch() {
++  int32_t offset = 0;
++  la_patchable(xdispatch, ExternalAddress((address)Interpreter::dispatch_table()), offset);
++  addi(xdispatch, xdispatch, offset);
++}
 +
-+  __ ld(obj, Address(xthread, JavaThread::tlab_top_offset()));
-+  if (var_size_in_bytes == noreg) {
-+    __ la(end, Address(obj, con_size_in_bytes));
++void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
++                                                       int bcp_offset,
++                                                       size_t index_size) {
++  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
++  if (index_size == sizeof(u2)) {
++    load_unsigned_short(index, Address(xbcp, bcp_offset));
++  } else if (index_size == sizeof(u4)) {
++    lwu(index, Address(xbcp, bcp_offset));
++    // Check if the secondary index definition is still ~x, otherwise
++    // we have to change the following assembler code to calculate the
++    // plain index.
++    assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
++    xori(index, index, -1);
++    addw(index, index, zr);
++  } else if (index_size == sizeof(u1)) {
++    load_unsigned_byte(index, Address(xbcp, bcp_offset));
 +  } else {
-+    __ add(end, obj, var_size_in_bytes);
++    ShouldNotReachHere();
 +  }
-+  __ ld(t0, Address(xthread, JavaThread::tlab_end_offset()));
-+  __ bgtu(end, t0, slow_case, is_far);
++}
 +
-+  // update the tlab top pointer
-+  __ sd(end, Address(xthread, JavaThread::tlab_top_offset()));
++// Return
++// Rindex: index into constant pool
++// Rcache: address of cache entry - ConstantPoolCache::base_offset()
++//
++// A caller must add ConstantPoolCache::base_offset() to Rcache to get
++// the true address of the cache entry.
++//
++void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
++                                                           Register index,
++                                                           int bcp_offset,
++                                                           size_t index_size) {
++  assert_different_registers(cache, index);
++  assert_different_registers(cache, xcpool);
++  get_cache_index_at_bcp(index, bcp_offset, index_size);
++  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
++  // Convert from field index to ConstantPoolCacheEntry
++  // riscv already has the cache in xcpool so there is no need to
++  // install it in cache. Instead we pre-add the indexed offset to
++  // xcpool and return it in cache. All clients of this method need to
++  // be modified accordingly.
++  shadd(cache, index, xcpool, cache, 5);
++}
 +
-+  // recover var_size_in_bytes if necessary
-+  if (var_size_in_bytes == end) {
-+    __ sub(var_size_in_bytes, var_size_in_bytes, obj);
-+  }
++
++void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
++                                                                        Register index,
++                                                                        Register bytecode,
++                                                                        int byte_no,
++                                                                        int bcp_offset,
++                                                                        size_t index_size) {
++  get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
++  // We use a 32-bit load here since the layout of 64-bit words on
++  // little-endian machines allow us that.
++  // n.b. unlike x86 cache already includes the index offset
++  la(bytecode, Address(cache,
++                       ConstantPoolCache::base_offset() +
++                       ConstantPoolCacheEntry::indices_offset()));
++  membar(MacroAssembler::AnyAny);
++  lwu(bytecode, bytecode);
++  membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
++  const int shift_count = (1 + byte_no) * BitsPerByte;
++  slli(bytecode, bytecode, XLEN - (shift_count + BitsPerByte));
++  srli(bytecode, bytecode, XLEN - BitsPerByte);
 +}
 +
-+// Defines obj, preserves var_size_in_bytes
-+void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj,
-+                                        Register var_size_in_bytes,
-+                                        int con_size_in_bytes,
-+                                        Register tmp1,
-+                                        Label& slow_case,
-+                                        bool is_far) {
-+  assert_cond(masm != NULL);
-+  assert_different_registers(obj, var_size_in_bytes, tmp1);
-+  if (!Universe::heap()->supports_inline_contig_alloc()) {
-+    __ j(slow_case);
-+  } else {
-+    Register end = tmp1;
-+    Label retry;
-+    __ bind(retry);
++void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
++                                                               Register tmp,
++                                                               int bcp_offset,
++                                                               size_t index_size) {
++  assert(cache != tmp, "must use different register");
++  get_cache_index_at_bcp(tmp, bcp_offset, index_size);
++  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
++  // Convert from field index to ConstantPoolCacheEntry index
++  // and from word offset to byte offset
++  assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord,
++         "else change next line");
++  ld(cache, Address(fp, frame::interpreter_frame_cache_offset * wordSize));
++  // skip past the header
++  add(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
++  // construct pointer to cache entry
++  shadd(cache, tmp, cache, tmp, 2 + LogBytesPerWord);
++}
 +
-+    // Get the current end of the heap
-+    ExternalAddress address_end((address) Universe::heap()->end_addr());
-+    {
-+      int32_t offset;
-+      __ la_patchable(t1, address_end, offset);
-+      __ ld(t1, Address(t1, offset));
-+    }
++// Load object from cpool->resolved_references(index)
++void InterpreterMacroAssembler::load_resolved_reference_at_index(
++                                Register result, Register index, Register tmp) {
++  assert_different_registers(result, index);
 +
-+    // Get the current top of the heap
-+    ExternalAddress address_top((address) Universe::heap()->top_addr());
-+    {
-+      int32_t offset;
-+      __ la_patchable(t0, address_top, offset);
-+      __ addi(t0, t0, offset);
-+      __ lr_d(obj, t0, Assembler::aqrl);
-+    }
++  get_constant_pool(result);
++  // Load pointer for resolved_references[] objArray
++  ld(result, Address(result, ConstantPool::cache_offset_in_bytes()));
++  ld(result, Address(result, ConstantPoolCache::resolved_references_offset_in_bytes()));
++  resolve_oop_handle(result, tmp);
++  // Add in the index
++  addi(index, index, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
++  shadd(result, index, result, index, LogBytesPerHeapOop);
++  load_heap_oop(result, Address(result, 0));
++}
 +
-+    // Adjust it my the size of our new object
-+    if (var_size_in_bytes == noreg) {
-+      __ la(end, Address(obj, con_size_in_bytes));
-+    } else {
-+      __ add(end, obj, var_size_in_bytes);
-+    }
++void InterpreterMacroAssembler::load_resolved_klass_at_offset(
++                                Register cpool, Register index, Register klass, Register temp) {
++  shadd(temp, index, cpool, temp, LogBytesPerWord);
++  lhu(temp, Address(temp, sizeof(ConstantPool))); // temp = resolved_klass_index
++  ld(klass, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); // klass = cpool->_resolved_klasses
++  shadd(klass, temp, klass, temp, LogBytesPerWord);
++  ld(klass, Address(klass, Array<Klass*>::base_offset_in_bytes()));
++}
 +
-+    // if end < obj then we wrapped around high memory
-+    __ bltu(end, obj, slow_case, is_far);
++// Generate a subtype check: branch to ok_is_subtype if sub_klass is a
++// subtype of super_klass.
++//
++// Args:
++//      x10: superklass
++//      Rsub_klass: subklass
++//
++// Kills:
++//      x12, x15
++void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass,
++                                                  Label& ok_is_subtype) {
++  assert(Rsub_klass != x10, "x10 holds superklass");
++  assert(Rsub_klass != x12, "x12 holds 2ndary super array length");
++  assert(Rsub_klass != x15, "x15 holds 2ndary super array scan ptr");
 +
-+    __ bgtu(end, t1, slow_case, is_far);
++  // Profile the not-null value's klass.
++  profile_typecheck(x12, Rsub_klass, x15); // blows x12, reloads x15
 +
-+    // If heap_top hasn't been changed by some other thread, update it.
-+    __ sc_d(t1, end, t0, Assembler::rl);
-+    __ bnez(t1, retry);
++  // Do the check.
++  check_klass_subtype(Rsub_klass, x10, x12, ok_is_subtype); // blows x12
 +
-+    incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, tmp1);
-+  }
++  // Profile the failure of the check.
++  profile_typecheck_failed(x12); // blows x12
 +}
 +
-+void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm,
-+                                               Register var_size_in_bytes,
-+                                               int con_size_in_bytes,
-+                                               Register tmp1) {
-+  assert_cond(masm != NULL);
-+  assert(tmp1->is_valid(), "need temp reg");
++// Java Expression Stack
 +
-+  __ ld(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset())));
-+  if (var_size_in_bytes->is_valid()) {
-+    __ add(tmp1, tmp1, var_size_in_bytes);
-+  } else {
-+    __ add(tmp1, tmp1, con_size_in_bytes);
-+  }
-+  __ sd(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset())));
++void InterpreterMacroAssembler::pop_ptr(Register r) {
++  ld(r, Address(esp, 0));
++  addi(esp, esp, wordSize);
 +}
 +
-+void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {
-+  BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
-+
-+  if (bs_nm == NULL) {
-+    return;
-+  }
++void InterpreterMacroAssembler::pop_i(Register r) {
++  lw(r, Address(esp, 0)); // lw do signed extended
++  addi(esp, esp, wordSize);
++}
 +
-+  // RISCV atomic operations require that the memory address be naturally aligned.
-+  __ align(4);
++void InterpreterMacroAssembler::pop_l(Register r) {
++  ld(r, Address(esp, 0));
++  addi(esp, esp, 2 * Interpreter::stackElementSize);
++}
 +
-+  Label skip, guard;
-+  Address thread_disarmed_addr(xthread, in_bytes(bs_nm->thread_disarmed_offset()));
++void InterpreterMacroAssembler::push_ptr(Register r) {
++  addi(esp, esp, -wordSize);
++  sd(r, Address(esp, 0));
++}
 +
-+  __ lwu(t0, guard);
++void InterpreterMacroAssembler::push_i(Register r) {
++  addi(esp, esp, -wordSize);
++  addw(r, r, zr); // signed extended
++  sd(r, Address(esp, 0));
++}
 +
-+  // Subsequent loads of oops must occur after load of guard value.
-+  // BarrierSetNMethod::disarm sets guard with release semantics.
-+  __ membar(MacroAssembler::LoadLoad);
-+  __ lwu(t1, thread_disarmed_addr);
-+  __ beq(t0, t1, skip);
++void InterpreterMacroAssembler::push_l(Register r) {
++  addi(esp, esp, -2 * wordSize);
++  sd(zr, Address(esp, wordSize));
++  sd(r, Address(esp));
++}
 +
-+  int32_t offset = 0;
-+  __ movptr_with_offset(t0, StubRoutines::riscv::method_entry_barrier(), offset);
-+  __ jalr(ra, t0, offset);
-+  __ j(skip);
++void InterpreterMacroAssembler::pop_f(FloatRegister r) {
++  flw(r, esp, 0);
++  addi(esp, esp, wordSize);
++}
 +
-+  __ bind(guard);
++void InterpreterMacroAssembler::pop_d(FloatRegister r) {
++  fld(r, esp, 0);
++  addi(esp, esp, 2 * Interpreter::stackElementSize);
++}
 +
-+  assert(__ offset() % 4 == 0, "bad alignment");
-+  __ emit_int32(0); // nmethod guard value. Skipped over in common case.
++void InterpreterMacroAssembler::push_f(FloatRegister r) {
++  addi(esp, esp, -wordSize);
++  fsw(r, Address(esp, 0));
++}
 +
-+  __ bind(skip);
++void InterpreterMacroAssembler::push_d(FloatRegister r) {
++  addi(esp, esp, -2 * wordSize);
++  fsd(r, Address(esp, 0));
 +}
 +
-+void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) {
-+  BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod();
-+  if (bs == NULL) {
-+    return;
++void InterpreterMacroAssembler::pop(TosState state) {
++  switch (state) {
++    case atos:
++      pop_ptr();
++      verify_oop(x10);
++      break;
++    case btos:  // fall through
++    case ztos:  // fall through
++    case ctos:  // fall through
++    case stos:  // fall through
++    case itos:
++      pop_i();
++      break;
++    case ltos:
++      pop_l();
++      break;
++    case ftos:
++      pop_f();
++      break;
++    case dtos:
++      pop_d();
++      break;
++    case vtos:
++      /* nothing to do */
++      break;
++    default:
++      ShouldNotReachHere();
 +  }
++}
 +
-+  Label bad_call;
-+  __ beqz(xmethod, bad_call);
++void InterpreterMacroAssembler::push(TosState state) {
++  switch (state) {
++    case atos:
++      verify_oop(x10);
++      push_ptr();
++      break;
++    case btos:  // fall through
++    case ztos:  // fall through
++    case ctos:  // fall through
++    case stos:  // fall through
++    case itos:
++      push_i();
++      break;
++    case ltos:
++      push_l();
++      break;
++    case ftos:
++      push_f();
++      break;
++    case dtos:
++      push_d();
++      break;
++    case vtos:
++      /* nothing to do */
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
 +
-+  // Pointer chase to the method holder to find out if the method is concurrently unloading.
-+  Label method_live;
-+  __ load_method_holder_cld(t0, xmethod);
++// Helpers for swap and dup
++void InterpreterMacroAssembler::load_ptr(int n, Register val) {
++  ld(val, Address(esp, Interpreter::expr_offset_in_bytes(n)));
++}
 +
-+  // Is it a strong CLD?
-+  __ lwu(t1, Address(t0, ClassLoaderData::keep_alive_offset()));
-+  __ bnez(t1, method_live);
++void InterpreterMacroAssembler::store_ptr(int n, Register val) {
++  sd(val, Address(esp, Interpreter::expr_offset_in_bytes(n)));
++}
 +
-+  // Is it a weak but alive CLD?
-+  __ push_reg(RegSet::of(x28, x29), sp);
++void InterpreterMacroAssembler::load_float(Address src) {
++  flw(f10, src);
++}
 +
-+  __ ld(x28, Address(t0, ClassLoaderData::holder_offset()));
++void InterpreterMacroAssembler::load_double(Address src) {
++  fld(f10, src);
++}
 +
-+  // Uses x28 & x29, so we must pass new temporaries.
-+  __ resolve_weak_handle(x28, x29);
-+  __ mv(t0, x28);
++void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() {
++  // set sender sp
++  mv(x30, sp);
++  // record last_sp
++  sd(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++}
 +
-+  __ pop_reg(RegSet::of(x28, x29), sp);
++// Jump to from_interpreted entry of a call unless single stepping is possible
++// in this thread in which case we must call the i2i entry
++void InterpreterMacroAssembler::jump_from_interpreted(Register method) {
++  prepare_to_jump_from_interpreted();
++  if (JvmtiExport::can_post_interpreter_events()) {
++    Label run_compiled_code;
++    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
++    // compiled code in threads for which the event is enabled.  Check here for
++    // interp_only_mode if these events CAN be enabled.
++    lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset()));
++    beqz(t0, run_compiled_code);
++    ld(t0, Address(method, Method::interpreter_entry_offset()));
++    jr(t0);
++    bind(run_compiled_code);
++  }
 +
-+  __ bnez(t0, method_live);
-+
-+  __ bind(bad_call);
++  ld(t0, Address(method, Method::from_interpreted_offset()));
++  jr(t0);
++}
 +
-+  __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
-+  __ bind(method_live);
++// The following two routines provide a hook so that an implementation
++// can schedule the dispatch in two parts.  amd64 does not do this.
++void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) {
 +}
-diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
-new file mode 100644
-index 00000000000..b85f7f5582b
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
-@@ -0,0 +1,79 @@
-+/*
-+ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
 +
-+#ifndef CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
-+#define CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
++void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
++  dispatch_next(state, step);
++}
 +
-+#include "asm/macroAssembler.hpp"
-+#include "gc/shared/barrierSet.hpp"
-+#include "gc/shared/barrierSetNMethod.hpp"
-+#include "memory/allocation.hpp"
-+#include "oops/access.hpp"
++void InterpreterMacroAssembler::dispatch_base(TosState state,
++                                              address* table,
++                                              bool verifyoop,
++                                              bool generate_poll,
++                                              Register Rs) {
++  // Pay attention to the argument Rs, which is acquiesce in t0.
++  if (VerifyActivationFrameSize) {
++    Unimplemented();
++  }
++  if (verifyoop && state == atos) {
++    verify_oop(x10);
++  }
 +
-+class BarrierSetAssembler: public CHeapObj<mtGC> {
-+private:
-+  void incr_allocated_bytes(MacroAssembler* masm,
-+                            Register var_size_in_bytes, int con_size_in_bytes,
-+                            Register t1 = noreg);
++  Label safepoint;
++  address* const safepoint_table = Interpreter::safept_table(state);
++  bool needs_thread_local_poll = generate_poll &&
++    SafepointMechanism::uses_thread_local_poll() && table != safepoint_table;
 +
-+public:
-+  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-+                                  Register src, Register dst, Register count, RegSet saved_regs) {}
-+  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-+                                  Register start, Register end, Register tmp, RegSet saved_regs) {}
-+  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                       Register dst, Address src, Register tmp1, Register tmp_thread);
-+  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                        Address dst, Register val, Register tmp1, Register tmp2);
++  if (needs_thread_local_poll) {
++    NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
++    ld(t1, Address(xthread, Thread::polling_page_offset()));
++    andi(t1, t1, SafepointMechanism::poll_bit());
++    bnez(t1, safepoint);
++  }
++  if (table == Interpreter::dispatch_table(state)) {
++    li(t1, Interpreter::distance_from_dispatch_table(state));
++    add(t1, Rs, t1);
++    shadd(t1, t1, xdispatch, t1, 3);
++  } else {
++    mv(t1, (address)table);
++    shadd(t1, Rs, t1, Rs, 3);
++  }
++  ld(t1, Address(t1));
++  jr(t1);
 +
-+  virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
-+                                             Register obj, Register tmp, Label& slowpath);
++  if (needs_thread_local_poll) {
++    bind(safepoint);
++    la(t1, ExternalAddress((address)safepoint_table));
++    shadd(t1, Rs, t1, Rs, 3);
++    ld(t1, Address(t1));
++    jr(t1);
++  }
++}
 +
-+  virtual void tlab_allocate(MacroAssembler* masm,
-+    Register obj,                      // result: pointer to object after successful allocation
-+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
-+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
-+    Register tmp1,                     // temp register
-+    Register tmp2,                     // temp register
-+    Label&   slow_case,                // continuation point if fast allocation fails
-+    bool is_far = false
-+  );
++void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll, Register Rs) {
++  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll, Rs);
++}
 +
-+  void eden_allocate(MacroAssembler* masm,
-+    Register obj,                      // result: pointer to object after successful allocation
-+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
-+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
-+    Register tmp1,                     // temp register
-+    Label&   slow_case,                // continuation point if fast allocation fails
-+    bool is_far = false
-+  );
-+  virtual void barrier_stubs_init() {}
++void InterpreterMacroAssembler::dispatch_only_normal(TosState state, Register Rs) {
++  dispatch_base(state, Interpreter::normal_table(state), Rs);
++}
 +
-+  virtual void nmethod_entry_barrier(MacroAssembler* masm);
-+  virtual void c2i_entry_barrier(MacroAssembler* masm);
-+  virtual ~BarrierSetAssembler() {}
-+};
++void InterpreterMacroAssembler::dispatch_only_noverify(TosState state, Register Rs) {
++  dispatch_base(state, Interpreter::normal_table(state), false, Rs);
++}
 +
-+#endif // CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp
-new file mode 100644
-index 00000000000..ae7ee4c5a44
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp
-@@ -0,0 +1,171 @@
-+/*
-+ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) {
++  // load next bytecode
++  load_unsigned_byte(t0, Address(xbcp, step));
++  add(xbcp, xbcp, step);
++  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
++}
 +
-+#include "precompiled.hpp"
-+#include "code/codeCache.hpp"
-+#include "code/nativeInst.hpp"
-+#include "gc/shared/barrierSetNMethod.hpp"
-+#include "logging/log.hpp"
-+#include "memory/resourceArea.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "runtime/registerMap.hpp"
-+#include "runtime/thread.hpp"
-+#include "utilities/align.hpp"
-+#include "utilities/debug.hpp"
++void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
++  // load current bytecode
++  lbu(t0, Address(xbcp, 0));
++  dispatch_base(state, table);
++}
 +
-+class NativeNMethodBarrier: public NativeInstruction {
-+  address instruction_address() const { return addr_at(0); }
++// remove activation
++//
++// Unlock the receiver if this is a synchronized method.
++// Unlock any Java monitors from syncronized blocks.
++// Remove the activation from the stack.
++//
++// If there are locked Java monitors
++//    If throw_monitor_exception
++//       throws IllegalMonitorStateException
++//    Else if install_monitor_exception
++//       installs IllegalMonitorStateException
++//    Else
++//       no error processing
++void InterpreterMacroAssembler::remove_activation(
++                                TosState state,
++                                bool throw_monitor_exception,
++                                bool install_monitor_exception,
++                                bool notify_jvmdi) {
++  // Note: Registers x13 may be in use for the
++  // result check if synchronized method
++  Label unlocked, unlock, no_unlock;
 +
-+  int *guard_addr() {
-+    /* auipc + lwu + fence + lwu + beq + lui + addi + slli + addi + slli + jalr + j */
-+    return reinterpret_cast<int*>(instruction_address() + 12 * 4);
-+  }
++  // get the value of _do_not_unlock_if_synchronized into x13
++  const Address do_not_unlock_if_synchronized(xthread,
++    in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++  lbu(x13, do_not_unlock_if_synchronized);
++  sb(zr, do_not_unlock_if_synchronized); // reset the flag
 +
-+public:
-+  int get_value() {
-+    return Atomic::load_acquire(guard_addr());
-+  }
++  // get method access flags
++  ld(x11, Address(fp, frame::interpreter_frame_method_offset * wordSize));
++  ld(x12, Address(x11, Method::access_flags_offset()));
++  andi(t0, x12, JVM_ACC_SYNCHRONIZED);
++  beqz(t0, unlocked);
 +
-+  void set_value(int value) {
-+    Atomic::release_store(guard_addr(), value);
-+  }
++  // Don't unlock anything if the _do_not_unlock_if_synchronized flag
++  // is set.
++  bnez(x13, no_unlock);
 +
-+  void verify() const;
-+};
++  // unlock monitor
++  push(state); // save result
 +
-+// Store the instruction bitmask, bits and name for checking the barrier.
-+struct CheckInsn {
-+  uint32_t mask;
-+  uint32_t bits;
-+  const char *name;
-+};
++  // BasicObjectLock will be first in list, since this is a
++  // synchronized method. However, need to check that the object has
++  // not been unlocked by an explicit monitorexit bytecode.
++  const Address monitor(fp, frame::interpreter_frame_initial_sp_offset *
++                        wordSize - (int) sizeof(BasicObjectLock));
++  // We use c_rarg1 so that if we go slow path it will be the correct
++  // register for unlock_object to pass to VM directly
++  la(c_rarg1, monitor); // address of first monitor
 +
-+static const struct CheckInsn barrierInsn[] = {
-+  { 0x00000fff, 0x00000297, "auipc  t0, 0           "},
-+  { 0x000fffff, 0x0002e283, "lwu    t0, 48(t0)      "},
-+  { 0xffffffff, 0x0aa0000f, "fence  ir, ir          "},
-+  { 0x000fffff, 0x000be303, "lwu    t1, 112(xthread)"},
-+  { 0x01fff07f, 0x00628063, "beq    t0, t1, skip    "},
-+  { 0x00000fff, 0x000002b7, "lui    t0, imm0        "},
-+  { 0x000fffff, 0x00028293, "addi   t0, t0, imm1    "},
-+  { 0xffffffff, 0x00b29293, "slli   t0, t0, 11      "},
-+  { 0x000fffff, 0x00028293, "addi   t0, t0, imm2    "},
-+  { 0xffffffff, 0x00529293, "slli   t0, t0, 5       "},
-+  { 0x000fffff, 0x000280e7, "jalr   ra, imm3(t0)    "},
-+  { 0x00000fff, 0x0000006f, "j      skip            "}
-+  /* guard: */
-+  /* 32bit nmethod guard value */
-+  /* skip: */
-+};
++  ld(x10, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
++  bnez(x10, unlock);
 +
-+// The encodings must match the instructions emitted by
-+// BarrierSetAssembler::nmethod_entry_barrier. The matching ignores the specific
-+// register numbers and immediate values in the encoding.
-+void NativeNMethodBarrier::verify() const {
-+  intptr_t addr = (intptr_t) instruction_address();
-+  for(unsigned int i = 0; i < sizeof(barrierInsn)/sizeof(struct CheckInsn); i++ ) {
-+    uint32_t inst = *((uint32_t*) addr);
-+    if ((inst & barrierInsn[i].mask) != barrierInsn[i].bits) {
-+      tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", addr, inst);
-+      fatal("not an %s instruction.", barrierInsn[i].name);
++  pop(state);
++  if (throw_monitor_exception) {
++    // Entry already unlocked, need to throw exception
++    call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                    InterpreterRuntime::throw_illegal_monitor_state_exception));
++    should_not_reach_here();
++  } else {
++    // Monitor already unlocked during a stack unroll. If requested,
++    // install an illegal_monitor_state_exception.  Continue with
++    // stack unrolling.
++    if (install_monitor_exception) {
++      call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                      InterpreterRuntime::new_illegal_monitor_state_exception));
 +    }
-+    addr += 4;
++    j(unlocked);
 +  }
-+}
-+
 +
-+/* We're called from an nmethod when we need to deoptimize it. We do
-+   this by throwing away the nmethod's frame and jumping to the
-+   ic_miss stub. This looks like there has been an IC miss at the
-+   entry of the nmethod, so we resolve the call, which will fall back
-+   to the interpreter if the nmethod has been unloaded. */
-+void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) {
++  bind(unlock);
++  unlock_object(c_rarg1);
++  pop(state);
 +
-+  typedef struct {
-+    intptr_t *sp; intptr_t *fp; address ra; address pc;
-+  } frame_pointers_t;
++  // Check that for block-structured locking (i.e., that all locked
++  // objects has been unlocked)
++  bind(unlocked);
 +
-+  frame_pointers_t *new_frame = (frame_pointers_t *)(return_address_ptr - 5);
++  // x10: Might contain return value
 +
-+  JavaThread *thread = JavaThread::current();
-+  RegisterMap reg_map(thread, false);
-+  frame frame = thread->last_frame();
++  // Check that all monitors are unlocked
++  {
++    Label loop, exception, entry, restart;
++    const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++    const Address monitor_block_top(
++      fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    const Address monitor_block_bot(
++      fp, frame::interpreter_frame_initial_sp_offset * wordSize);
 +
-+  assert(frame.is_compiled_frame() || frame.is_native_frame(), "must be");
-+  assert(frame.cb() == nm, "must be");
-+  frame = frame.sender(&reg_map);
++    bind(restart);
++    // We use c_rarg1 so that if we go slow path it will be the correct
++    // register for unlock_object to pass to VM directly
++    ld(c_rarg1, monitor_block_top); // points to current entry, starting
++                                     // with top-most entry
++    la(x9, monitor_block_bot);  // points to word before bottom of
++                                  // monitor block
 +
-+  LogTarget(Trace, nmethod, barrier) out;
-+  if (out.is_enabled()) {
-+    ResourceMark mark;
-+    log_trace(nmethod, barrier)("deoptimize(nmethod: %s(%p), return_addr: %p, osr: %d, thread: %p(%s), making rsp: %p) -> %p",
-+                                nm->method()->name_and_sig_as_C_string(),
-+                                nm, *(address *) return_address_ptr, nm->is_osr_method(), thread,
-+                                thread->name(), frame.sp(), nm->verified_entry_point());
-+  }
++    j(entry);
 +
-+  new_frame->sp = frame.sp();
-+  new_frame->fp = frame.fp();
-+  new_frame->ra = frame.pc();
-+  new_frame->pc = SharedRuntime::get_handle_wrong_method_stub();
-+}
++    // Entry already locked, need to throw exception
++    bind(exception);
 +
-+// This is the offset of the entry barrier from where the frame is completed.
-+// If any code changes between the end of the verified entry where the entry
-+// barrier resides, and the completion of the frame, then
-+// NativeNMethodCmpBarrier::verify() will immediately complain when it does
-+// not find the expected native instruction at this offset, which needs updating.
-+// Note that this offset is invariant of PreserveFramePointer.
++    if (throw_monitor_exception) {
++      // Throw exception
++      MacroAssembler::call_VM(noreg,
++                              CAST_FROM_FN_PTR(address, InterpreterRuntime::
++                                               throw_illegal_monitor_state_exception));
 +
-+// see BarrierSetAssembler::nmethod_entry_barrier
-+// auipc + lwu + fence + lwu + beq + movptr_with_offset(5 instructions) + jalr + j + int32
-+static const int entry_barrier_offset = -4 * 13;
++      should_not_reach_here();
++    } else {
++      // Stack unrolling. Unlock object and install illegal_monitor_exception.
++      // Unlock does not block, so don't have to worry about the frame.
++      // We don't have to preserve c_rarg1 since we are going to throw an exception.
 +
-+static NativeNMethodBarrier* native_nmethod_barrier(nmethod* nm) {
-+  address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset;
-+  NativeNMethodBarrier* barrier = reinterpret_cast<NativeNMethodBarrier*>(barrier_address);
-+  debug_only(barrier->verify());
-+  return barrier;
-+}
++      push(state);
++      unlock_object(c_rarg1);
++      pop(state);
 +
-+void BarrierSetNMethod::disarm(nmethod* nm) {
-+  if (!supports_entry_barrier(nm)) {
-+    return;
-+  }
++      if (install_monitor_exception) {
++        call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                        InterpreterRuntime::
++                                        new_illegal_monitor_state_exception));
++      }
 +
-+  // Disarms the nmethod guard emitted by BarrierSetAssembler::nmethod_entry_barrier.
-+  NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
++      j(restart);
++    }
 +
-+  barrier->set_value(disarmed_value());
-+}
++    bind(loop);
++    // check if current entry is used
++    add(t0, c_rarg1, BasicObjectLock::obj_offset_in_bytes());
++    ld(t0, Address(t0, 0));
++    bnez(t0, exception);
 +
-+bool BarrierSetNMethod::is_armed(nmethod* nm) {
-+  if (!supports_entry_barrier(nm)) {
-+    return false;
++    add(c_rarg1, c_rarg1, entry_size); // otherwise advance to next entry
++    bind(entry);
++    bne(c_rarg1, x9, loop); // check if bottom reached if not at bottom then check this entry
 +  }
 +
-+  NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
-+  return barrier->get_value() != disarmed_value();
-+}
-diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
-new file mode 100644
-index 00000000000..a419f92b5f6
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
-@@ -0,0 +1,111 @@
-+/*
-+ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "gc/shared/barrierSet.hpp"
-+#include "gc/shared/cardTable.hpp"
-+#include "gc/shared/cardTableBarrierSet.hpp"
-+#include "gc/shared/cardTableBarrierSetAssembler.hpp"
-+#include "gc/shared/gc_globals.hpp"
-+#include "interpreter/interp_masm.hpp"
-+
-+#define __ masm->
++  bind(no_unlock);
 +
++  // jvmti support
++  if (notify_jvmdi) {
++    notify_method_exit(state, NotifyJVMTI);    // preserve TOSCA
 +
-+void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Register tmp) {
-+  assert_cond(masm != NULL);
-+  assert_different_registers(obj, tmp);
-+  BarrierSet* bs = BarrierSet::barrier_set();
-+  assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind");
++  } else {
++    notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA
++  }
 +
-+  __ srli(obj, obj, CardTable::card_shift());
++  // remove activation
++  // get sender esp
++  ld(t1,
++     Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize));
++  if (StackReservedPages > 0) {
++    // testing if reserved zone needs to be re-enabled
++    Label no_reserved_zone_enabling;
 +
-+  assert(CardTable::dirty_card_val() == 0, "must be");
++    ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset()));
++    ble(t1, t0, no_reserved_zone_enabling);
 +
-+  __ load_byte_map_base(tmp);
-+  __ add(tmp, obj, tmp);
++    call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), xthread);
++    call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                    InterpreterRuntime::throw_delayed_StackOverflowError));
++    should_not_reach_here();
 +
-+  if (UseCondCardMark) {
-+    Label L_already_dirty;
-+    __ membar(MacroAssembler::StoreLoad);
-+    __ lbu(t1,  Address(tmp));
-+    __ beqz(t1, L_already_dirty);
-+    __ sb(zr, Address(tmp));
-+    __ bind(L_already_dirty);
-+  } else {
-+    __ sb(zr, Address(tmp));
++    bind(no_reserved_zone_enabling);
 +  }
-+}
 +
-+void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
-+                                                                    Register start, Register count, Register tmp, RegSet saved_regs) {
-+  assert_cond(masm != NULL);
-+  assert_different_registers(start, tmp);
-+  assert_different_registers(count, tmp);
++  // restore sender esp
++  mv(esp, t1);
 +
-+  Label L_loop, L_done;
-+  const Register end = count;
++  // remove frame anchor
++  leave();
++  // If we're returning to interpreted code we will shortly be
++  // adjusting SP to allow some space for ESP.  If we're returning to
++  // compiled code the saved sender SP was saved in sender_sp, so this
++  // restores it.
++  andi(sp, esp, -16);
++}
 +
-+  __ beqz(count, L_done); // zero count - nothing to do
-+  // end = start + count << LogBytesPerHeapOop
-+  __ shadd(end, count, start, count, LogBytesPerHeapOop);
-+  __ sub(end, end, BytesPerHeapOop); // last element address to make inclusive
++// Lock object
++//
++// Args:
++//      c_rarg1: BasicObjectLock to be used for locking
++//
++// Kills:
++//      x10
++//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs)
++//      t0, t1 (temp regs)
++void InterpreterMacroAssembler::lock_object(Register lock_reg)
++{
++  assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1");
++  if (UseHeavyMonitors) {
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
++            lock_reg);
++  } else {
++    Label done;
 +
-+  __ srli(start, start, CardTable::card_shift());
-+  __ srli(end, end, CardTable::card_shift());
-+  __ sub(count, end, start); // number of bytes to copy
++    const Register swap_reg = x10;
++    const Register tmp = c_rarg2;
++    const Register obj_reg = c_rarg3; // Will contain the oop
 +
-+  __ load_byte_map_base(tmp);
-+  __ add(start, start, tmp);
++    const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
++    const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
++    const int mark_offset = lock_offset +
++                            BasicLock::displaced_header_offset_in_bytes();
 +
-+  __ bind(L_loop);
-+  __ add(tmp, start, count);
-+  __ sb(zr, Address(tmp));
-+  __ sub(count, count, 1);
-+  __ bgez(count, L_loop);
-+  __ bind(L_done);
-+}
++    Label slow_case;
 +
-+void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                                                Address dst, Register val, Register tmp1, Register tmp2) {
-+  bool in_heap = (decorators & IN_HEAP) != 0;
-+  bool is_array = (decorators & IS_ARRAY) != 0;
-+  bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
-+  bool precise = is_array || on_anonymous;
++    // Load object pointer into obj_reg c_rarg3
++    ld(obj_reg, Address(lock_reg, obj_offset));
 +
-+  bool needs_post_barrier = val != noreg && in_heap;
-+  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg);
-+  if (needs_post_barrier) {
-+    // flatten object address if needed
-+    if (!precise || dst.offset() == 0) {
-+      store_check(masm, dst.base(), x13);
-+    } else {
-+      assert_cond(masm != NULL);
-+      __ la(x13, dst);
-+      store_check(masm, x13, t0);
++    if (UseBiasedLocking) {
++      biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case);
 +    }
-+  }
-+}
-diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
-new file mode 100644
-index 00000000000..686fe8fa478
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
-@@ -0,0 +1,42 @@
-+/*
-+ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
 +
-+#ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
-+#define CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
++    // Load (object->mark() | 1) into swap_reg
++    ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
++    ori(swap_reg, t0, 1);
 +
-+#include "asm/macroAssembler.hpp"
-+#include "gc/shared/modRefBarrierSetAssembler.hpp"
++    // Save (object->mark() | 1) into BasicLock's displaced header
++    sd(swap_reg, Address(lock_reg, mark_offset));
 +
-+class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler {
-+protected:
-+  void store_check(MacroAssembler* masm, Register obj, Register tmp);
++    assert(lock_offset == 0,
++           "displached header must be first word in BasicObjectLock");
 +
-+  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
-+                                                Register start, Register count, Register tmp, RegSet saved_regs);
-+  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                            Address dst, Register val, Register tmp1, Register tmp2);
-+};
++    if (PrintBiasedLockingStatistics) {
++      Label fail, fast;
++      cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, fast, &fail);
++      bind(fast);
++      atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
++                  t1, t0);
++      j(done);
++      bind(fail);
++    } else {
++      cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL);
++    }
 +
-+#endif // #ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp
-new file mode 100644
-index 00000000000..7aa2015f9ec
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp
-@@ -0,0 +1,55 @@
-+/*
-+ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++    // Test if the oopMark is an obvious stack pointer, i.e.,
++    //  1) (mark & 7) == 0, and
++    //  2) sp <= mark < mark + os::pagesize()
++    //
++    // These 3 tests can be done by evaluating the following
++    // expression: ((mark - sp) & (7 - os::vm_page_size())),
++    // assuming both stack pointer and pagesize have their
++    // least significant 3 bits clear.
++    // NOTE: the oopMark is in swap_reg x10 as the result of cmpxchg
++    sub(swap_reg, swap_reg, sp);
++    li(t0, (int64_t)(7 - os::vm_page_size()));
++    andr(swap_reg, swap_reg, t0);
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "gc/shared/modRefBarrierSetAssembler.hpp"
++    // Save the test result, for recursive case, the result is zero
++    sd(swap_reg, Address(lock_reg, mark_offset));
 +
-+#define __ masm->
++    if (PrintBiasedLockingStatistics) {
++      bnez(swap_reg, slow_case);
++      atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
++                  t1, t0);
++    }
++    beqz(swap_reg, done);
 +
-+void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-+                                                   Register src, Register dst, Register count, RegSet saved_regs) {
++    bind(slow_case);
 +
-+  if (is_oop) {
-+    gen_write_ref_array_pre_barrier(masm, decorators, dst, count, saved_regs);
-+  }
-+}
++    // Call the runtime routine for slow case
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
++            lock_reg);
 +
-+void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-+                                                   Register start, Register count, Register tmp,
-+                                                   RegSet saved_regs) {
-+  if (is_oop) {
-+    gen_write_ref_array_post_barrier(masm, decorators, start, count, tmp, saved_regs);
++    bind(done);
 +  }
 +}
 +
-+void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                                         Address dst, Register val, Register tmp1, Register tmp2) {
-+  if (is_reference_type(type)) {
-+    oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++
++// Unlocks an object. Used in monitorexit bytecode and
++// remove_activation.  Throws an IllegalMonitorException if object is
++// not locked by current thread.
++//
++// Args:
++//      c_rarg1: BasicObjectLock for lock
++//
++// Kills:
++//      x10
++//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, ... (param regs)
++//      t0, t1 (temp regs)
++void InterpreterMacroAssembler::unlock_object(Register lock_reg)
++{
++  assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1");
++
++  if (UseHeavyMonitors) {
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
++            lock_reg);
 +  } else {
-+    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
-+  }
-+}
-diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp
-new file mode 100644
-index 00000000000..00419c3163c
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp
-@@ -0,0 +1,55 @@
-+/*
-+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++    Label done;
 +
-+#ifndef CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
-+#define CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
++    const Register swap_reg   = x10;
++    const Register header_reg = c_rarg2;  // Will contain the old oopMark
++    const Register obj_reg    = c_rarg3;  // Will contain the oop
 +
-+#include "asm/macroAssembler.hpp"
-+#include "gc/shared/barrierSetAssembler.hpp"
++    save_bcp(); // Save in case of exception
 +
-+// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other
-+// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected
-+// accesses, which are overridden in the concrete BarrierSetAssembler.
++    // Convert from BasicObjectLock structure to object and BasicLock
++    // structure Store the BasicLock address into x10
++    la(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes()));
 +
-+class ModRefBarrierSetAssembler: public BarrierSetAssembler {
-+protected:
-+  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
-+                                               Register addr, Register count, RegSet saved_regs) {}
-+  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
-+                                                Register start, Register count, Register tmp, RegSet saved_regs) {}
++    // Load oop into obj_reg(c_rarg3)
++    ld(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
 +
-+  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                            Address dst, Register val, Register tmp1, Register tmp2) = 0;
++    // Free entry
++    sd(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
 +
-+public:
-+  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-+                                  Register src, Register dst, Register count, RegSet saved_regs);
-+  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-+                                  Register start, Register count, Register tmp, RegSet saved_regs);
-+  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                        Address dst, Register val, Register tmp1, Register tmp2);
-+};
++    if (UseBiasedLocking) {
++      biased_locking_exit(obj_reg, header_reg, done);
++    }
 +
-+#endif // CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
-new file mode 100644
-index 00000000000..cd568cc723f
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
-@@ -0,0 +1,117 @@
-+/*
-+ * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++    // Load the old header from BasicLock structure
++    ld(header_reg, Address(swap_reg,
++                           BasicLock::displaced_header_offset_in_bytes()));
 +
-+#include "precompiled.hpp"
-+#include "c1/c1_LIRAssembler.hpp"
-+#include "c1/c1_MacroAssembler.hpp"
-+#include "gc/shared/gc_globals.hpp"
-+#include "gc/shenandoah/shenandoahBarrierSet.hpp"
-+#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
-+#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
++    // Test for recursion
++    beqz(header_reg, done);
 +
-+#define __ masm->masm()->
++    // Atomic swap back the old header
++    cmpxchg_obj_header(swap_reg, header_reg, obj_reg, t0, done, /*fallthrough*/NULL);
 +
-+void LIR_OpShenandoahCompareAndSwap::emit_code(LIR_Assembler* masm) {
-+  Register addr = _addr->as_register_lo();
-+  Register newval = _new_value->as_register();
-+  Register cmpval = _cmp_value->as_register();
-+  Register tmp1 = _tmp1->as_register();
-+  Register tmp2 = _tmp2->as_register();
-+  Register result = result_opr()->as_register();
++    // Call the runtime routine for slow case.
++    sd(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
++            lock_reg);
 +
-+  ShenandoahBarrierSet::assembler()->iu_barrier(masm->masm(), newval, t1);
++    bind(done);
 +
-+  if (UseCompressedOops) {
-+    __ encode_heap_oop(tmp1, cmpval);
-+    cmpval = tmp1;
-+    __ encode_heap_oop(tmp2, newval);
-+    newval = tmp2;
++    restore_bcp();
 +  }
-+
-+  ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /* acquire */ Assembler::aq,
-+                                                 /* release */ Assembler::rl, /* is_cae */ false, result);
 +}
 +
-+#undef __
-+
-+#ifdef ASSERT
-+#define __ gen->lir(__FILE__, __LINE__)->
-+#else
-+#define __ gen->lir()->
-+#endif
 +
-+LIR_Opr ShenandoahBarrierSetC1::atomic_cmpxchg_at_resolved(LIRAccess& access, LIRItem& cmp_value, LIRItem& new_value) {
-+  BasicType bt = access.type();
-+  if (access.is_oop()) {
-+    LIRGenerator *gen = access.gen();
-+    if (ShenandoahSATBBarrier) {
-+      pre_barrier(gen, access.access_emit_info(), access.decorators(), access.resolved_addr(),
-+                  LIR_OprFact::illegalOpr /* pre_val */);
-+    }
-+    if (ShenandoahCASBarrier) {
-+      cmp_value.load_item();
-+      new_value.load_item();
++void InterpreterMacroAssembler::test_method_data_pointer(Register mdp,
++                                                         Label& zero_continue) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  ld(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
++  beqz(mdp, zero_continue);
++}
 +
-+      LIR_Opr tmp1 = gen->new_register(T_OBJECT);
-+      LIR_Opr tmp2 = gen->new_register(T_OBJECT);
-+      LIR_Opr addr = access.resolved_addr()->as_address_ptr()->base();
-+      LIR_Opr result = gen->new_register(T_INT);
++// Set the method data pointer for the current bcp.
++void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  Label set_mdp;
++  push_reg(0xc00, sp); // save x10, x11
 +
-+      __ append(new LIR_OpShenandoahCompareAndSwap(addr, cmp_value.result(), new_value.result(), tmp1, tmp2, result));
-+      return result;
-+    }
-+  }
-+  return BarrierSetC1::atomic_cmpxchg_at_resolved(access, cmp_value, new_value);
++  // Test MDO to avoid the call if it is NULL.
++  ld(x10, Address(xmethod, in_bytes(Method::method_data_offset())));
++  beqz(x10, set_mdp);
++  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), xmethod, xbcp);
++  // x10: mdi
++  // mdo is guaranteed to be non-zero here, we checked for it before the call.
++  ld(x11, Address(xmethod, in_bytes(Method::method_data_offset())));
++  la(x11, Address(x11, in_bytes(MethodData::data_offset())));
++  add(x10, x11, x10);
++  sd(x10, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
++  bind(set_mdp);
++  pop_reg(0xc00, sp);
 +}
 +
-+LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRItem& value) {
-+  LIRGenerator* gen = access.gen();
-+  BasicType type = access.type();
++void InterpreterMacroAssembler::verify_method_data_pointer() {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++#ifdef ASSERT
++  Label verify_continue;
++  add(sp, sp, -4 * wordSize);
++  sd(x10, Address(sp, 0));
++  sd(x11, Address(sp, wordSize));
++  sd(x12, Address(sp, 2 * wordSize));
++  sd(x13, Address(sp, 3 * wordSize));
++  test_method_data_pointer(x13, verify_continue); // If mdp is zero, continue
++  get_method(x11);
 +
-+  LIR_Opr result = gen->new_register(type);
-+  value.load_item();
-+  LIR_Opr value_opr = value.result();
++  // If the mdp is valid, it will point to a DataLayout header which is
++  // consistent with the bcp.  The converse is highly probable also.
++  lh(x12, Address(x13, in_bytes(DataLayout::bci_offset())));
++  ld(t0, Address(x11, Method::const_offset()));
++  add(x12, x12, t0);
++  la(x12, Address(x12, ConstMethod::codes_offset()));
++  beq(x12, xbcp, verify_continue);
++  // x10: method
++  // xbcp: bcp // xbcp == 22
++  // x13: mdp
++  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp),
++               x11, xbcp, x13);
++  bind(verify_continue);
++  ld(x10, Address(sp, 0));
++  ld(x11, Address(sp, wordSize));
++  ld(x12, Address(sp, 2 * wordSize));
++  ld(x13, Address(sp, 3 * wordSize));
++  add(sp, sp, 4 * wordSize);
++#endif // ASSERT
++}
 +
-+  if (access.is_oop()) {
-+    value_opr = iu_barrier(access.gen(), value_opr, access.access_emit_info(), access.decorators());
-+  }
 +
-+  assert(type == T_INT || is_reference_type(type) LP64_ONLY( || type == T_LONG ), "unexpected type");
-+  LIR_Opr tmp = gen->new_register(T_INT);
-+  __ xchg(access.resolved_addr(), value_opr, result, tmp);
++void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in,
++                                                int constant,
++                                                Register value) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  Address data(mdp_in, constant);
++  sd(value, data);
++}
 +
-+  if (access.is_oop()) {
-+    result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), access.decorators());
-+    LIR_Opr tmp_opr = gen->new_register(type);
-+    __ move(result, tmp_opr);
-+    result = tmp_opr;
-+    if (ShenandoahSATBBarrier) {
-+      pre_barrier(access.gen(), access.access_emit_info(), access.decorators(), LIR_OprFact::illegalOpr,
-+                  result /* pre_val */);
-+    }
-+  }
 +
-+  return result;
++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
++                                                      int constant,
++                                                      bool decrement) {
++  increment_mdp_data_at(mdp_in, noreg, constant, decrement);
 +}
-diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
-new file mode 100644
-index 00000000000..d0ac6e52436
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
-@@ -0,0 +1,712 @@
-+/*
-+ * Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#include "precompiled.hpp"
-+#include "gc/shenandoah/shenandoahBarrierSet.hpp"
-+#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
-+#include "gc/shenandoah/shenandoahForwarding.hpp"
-+#include "gc/shenandoah/shenandoahHeap.inline.hpp"
-+#include "gc/shenandoah/shenandoahHeapRegion.hpp"
-+#include "gc/shenandoah/shenandoahRuntime.hpp"
-+#include "gc/shenandoah/shenandoahThreadLocalData.hpp"
-+#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
-+#include "interpreter/interpreter.hpp"
-+#include "interpreter/interp_masm.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "runtime/thread.hpp"
-+#ifdef COMPILER1
-+#include "c1/c1_LIRAssembler.hpp"
-+#include "c1/c1_MacroAssembler.hpp"
-+#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
-+#endif
 +
-+#define __ masm->
++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
++                                                      Register reg,
++                                                      int constant,
++                                                      bool decrement) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  // %%% this does 64bit counters at best it is wasting space
++  // at worst it is a rare bug when counters overflow
 +
-+void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-+                                                       Register src, Register dst, Register count, RegSet saved_regs) {
-+  if (is_oop) {
-+    bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
-+    if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahIUBarrier || ShenandoahLoadRefBarrier) {
++  assert_different_registers(t1, t0, mdp_in, reg);
 +
-+      Label done;
++  Address addr1(mdp_in, constant);
++  Address addr2(t1, 0);
++  Address &addr = addr1;
++  if (reg != noreg) {
++    la(t1, addr1);
++    add(t1, t1, reg);
++    addr = addr2;
++  }
 +
-+      // Avoid calling runtime if count == 0
-+      __ beqz(count, done);
++  if (decrement) {
++    ld(t0, addr);
++    addi(t0, t0, -DataLayout::counter_increment);
++    Label L;
++    bltz(t0, L);      // skip store if counter underflow
++    sd(t0, addr);
++    bind(L);
++  } else {
++    assert(DataLayout::counter_increment == 1,
++           "flow-free idiom only works with 1");
++    ld(t0, addr);
++    addi(t0, t0, DataLayout::counter_increment);
++    Label L;
++    blez(t0, L);       // skip store if counter overflow
++    sd(t0, addr);
++    bind(L);
++  }
++}
 +
-+      // Is GC active?
-+      Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
-+      assert_different_registers(src, dst, count, t0);
++void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
++                                                int flag_byte_constant) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  int flags_offset = in_bytes(DataLayout::flags_offset());
++  // Set the flag
++  lbu(t1, Address(mdp_in, flags_offset));
++  ori(t1, t1, flag_byte_constant);
++  sb(t1, Address(mdp_in, flags_offset));
++}
 +
-+      __ lbu(t0, gc_state);
-+      if (ShenandoahSATBBarrier && dest_uninitialized) {
-+        __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED);
-+        __ beqz(t0, done);
-+      } else {
-+        __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING);
-+        __ beqz(t0, done);
-+      }
 +
-+      __ push_reg(saved_regs, sp);
-+      if (UseCompressedOops) {
-+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry),
-+                        src, dst, count);
-+      } else {
-+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), src, dst, count);
-+      }
-+      __ pop_reg(saved_regs, sp);
-+      __ bind(done);
-+    }
++void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in,
++                                                 int offset,
++                                                 Register value,
++                                                 Register test_value_out,
++                                                 Label& not_equal_continue) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  if (test_value_out == noreg) {
++    ld(t1, Address(mdp_in, offset));
++    bne(value, t1, not_equal_continue);
++  } else {
++    // Put the test value into a register, so caller can use it:
++    ld(test_value_out, Address(mdp_in, offset));
++    bne(value, test_value_out, not_equal_continue);
 +  }
 +}
 +
-+void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm,
-+                                                                 Register obj,
-+                                                                 Register pre_val,
-+                                                                 Register thread,
-+                                                                 Register tmp,
-+                                                                 bool tosca_live,
-+                                                                 bool expand_call) {
-+  if (ShenandoahSATBBarrier) {
-+    satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call);
-+  }
++
++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
++                                                     int offset_of_disp) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  ld(t1, Address(mdp_in, offset_of_disp));
++  add(mdp_in, mdp_in, t1);
++  sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
 +}
 +
-+void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm,
-+                                                           Register obj,
-+                                                           Register pre_val,
-+                                                           Register thread,
-+                                                           Register tmp,
-+                                                           bool tosca_live,
-+                                                           bool expand_call) {
-+  // If expand_call is true then we expand the call_VM_leaf macro
-+  // directly to skip generating the check by
-+  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
-+  assert(thread == xthread, "must be");
++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
++                                                     Register reg,
++                                                     int offset_of_disp) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  add(t1, mdp_in, reg);
++  ld(t1, Address(t1, offset_of_disp));
++  add(mdp_in, mdp_in, t1);
++  sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
++}
 +
-+  Label done;
-+  Label runtime;
 +
-+  assert_different_registers(obj, pre_val, tmp, t0);
-+  assert(pre_val != noreg &&  tmp != noreg, "expecting a register");
++void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in,
++                                                       int constant) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  addi(mdp_in, mdp_in, (unsigned)constant);
++  sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
++}
 +
-+  Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset()));
-+  Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
-+  Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
 +
-+  // Is marking active?
-+  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
-+    __ lwu(tmp, in_progress);
-+  } else {
-+    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
-+    __ lbu(tmp, in_progress);
-+  }
-+  __ beqz(tmp, done);
++void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
 +
-+  // Do we need to load the previous value?
-+  if (obj != noreg) {
-+    __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
-+  }
++  // save/restore across call_VM
++  addi(sp, sp, -2 * wordSize);
++  sd(zr, Address(sp, 0));
++  sd(return_bci, Address(sp, wordSize));
++  call_VM(noreg,
++          CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret),
++          return_bci);
++  ld(zr, Address(sp, 0));
++  ld(return_bci, Address(sp, wordSize));
++  addi(sp, sp, 2 * wordSize);
++}
 +
-+  // Is the previous value null?
-+  __ beqz(pre_val, done);
++void InterpreterMacroAssembler::profile_taken_branch(Register mdp,
++                                                     Register bumped_count) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+  // Can we store original value in the thread's buffer?
-+  // Is index == 0?
-+  // (The index field is typed as size_t.)
-+  __ ld(tmp, index);                        // tmp := *index_adr
-+  __ beqz(tmp, runtime);                    // tmp == 0? If yes, goto runtime
++    // If no method data exists, go to profile_continue.
++    // Otherwise, assign to mdp
++    test_method_data_pointer(mdp, profile_continue);
 +
-+  __ sub(tmp, tmp, wordSize);               // tmp := tmp - wordSize
-+  __ sd(tmp, index);                        // *index_adr := tmp
-+  __ ld(t0, buffer);
-+  __ add(tmp, tmp, t0);                     // tmp := tmp + *buffer_adr
++    // We are taking a branch.  Increment the taken count.
++    Address data(mdp, in_bytes(JumpData::taken_offset()));
++    ld(bumped_count, data);
++    assert(DataLayout::counter_increment == 1,
++            "flow-free idiom only works with 1");
++    addi(bumped_count, bumped_count, DataLayout::counter_increment);
++    Label L;
++    // eg: bumped_count=0x7fff ffff ffff ffff  + 1 < 0. so we use <= 0;
++    blez(bumped_count, L);       // skip store if counter overflow,
++    sd(bumped_count, data);
++    bind(L);
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()));
++    bind(profile_continue);
++  }
++}
 +
-+  // Record the previous value
-+  __ sd(pre_val, Address(tmp, 0));
-+  __ j(done);
++void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+  __ bind(runtime);
-+  // save the live input values
-+  RegSet saved = RegSet::of(pre_val);
-+  if (tosca_live) saved += RegSet::of(x10);
-+  if (obj != noreg) saved += RegSet::of(obj);
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+  __ push_reg(saved, sp);
++    // We are taking a branch.  Increment the not taken count.
++    increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()));
 +
-+  // Calling the runtime using the regular call_VM_leaf mechanism generates
-+  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
-+  // that checks that the *(rfp+frame::interpreter_frame_last_sp) == NULL.
-+  //
-+  // If we care generating the pre-barrier without a frame (e.g. in the
-+  // intrinsified Reference.get() routine) then ebp might be pointing to
-+  // the caller frame and so this check will most likely fail at runtime.
-+  //
-+  // Expanding the call directly bypasses the generation of the check.
-+  // So when we do not have have a full interpreter frame on the stack
-+  // expand_call should be passed true.
-+  if (expand_call) {
-+    assert(pre_val != c_rarg1, "smashed arg");
-+    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
-+  } else {
-+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
++    // The method data pointer needs to be updated to correspond to
++    // the next bytecode
++    update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size()));
++    bind(profile_continue);
 +  }
++}
 +
-+  __ pop_reg(saved, sp);
++void InterpreterMacroAssembler::profile_call(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+  __ bind(done);
-+}
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) {
-+  assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
++    // We are making a call.  Increment the count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
 +
-+  Label is_null;
-+  __ beqz(dst, is_null);
-+  resolve_forward_pointer_not_null(masm, dst, tmp);
-+  __ bind(is_null);
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size()));
++    bind(profile_continue);
++  }
 +}
 +
-+// IMPORTANT: This must preserve all registers, even t0 and t1, except those explicitely
-+// passed in.
-+void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) {
-+  assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
-+  // The below loads the mark word, checks if the lowest two bits are
-+  // set, and if so, clear the lowest two bits and copy the result
-+  // to dst. Otherwise it leaves dst alone.
-+  // Implementing this is surprisingly awkward. I do it here by:
-+  // - Inverting the mark word
-+  // - Test lowest two bits == 0
-+  // - If so, set the lowest two bits
-+  // - Invert the result back, and copy to dst
-+  RegSet saved_regs = RegSet::of(t2);
-+  bool borrow_reg = (tmp == noreg);
-+  if (borrow_reg) {
-+    // No free registers available. Make one useful.
-+    tmp = t0;
-+    if (tmp == dst) {
-+      tmp = t1;
-+    }
-+    saved_regs += RegSet::of(tmp);
-+  }
++void InterpreterMacroAssembler::profile_final_call(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+  assert_different_registers(tmp, dst, t2);
-+  __ push_reg(saved_regs, sp);
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+  Label done;
-+  __ ld(tmp, Address(dst, oopDesc::mark_offset_in_bytes()));
-+  __ xori(tmp, tmp, -1); // eon with 0 is equivalent to XOR with -1
-+  __ andi(t2, tmp, markWord::lock_mask_in_place);
-+  __ bnez(t2, done);
-+  __ ori(tmp, tmp, markWord::marked_value);
-+  __ xori(dst, tmp, -1); // eon with 0 is equivalent to XOR with -1
-+  __ bind(done);
++    // We are making a call.  Increment the count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
 +
-+  __ pop_reg(saved_regs, sp);
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp,
++                           in_bytes(VirtualCallData::
++                                    virtual_call_data_size()));
++    bind(profile_continue);
++  }
 +}
 +
-+void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm,
-+                                                           Register dst,
-+                                                           Address load_addr,
-+                                                           DecoratorSet decorators) {
-+  assert(ShenandoahLoadRefBarrier, "Should be enabled");
-+  assert(dst != t1 && load_addr.base() != t1, "need t1");
-+  assert_different_registers(load_addr.base(), t0, t1);
-+
-+  bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
-+  bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
-+  bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
-+  bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
-+  bool is_narrow  = UseCompressedOops && !is_native;
 +
-+  Label heap_stable, not_cset;
-+  __ enter();
-+  Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
-+  __ lbu(t1, gc_state);
++void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
++                                                     Register mdp,
++                                                     Register reg2,
++                                                     bool receiver_can_be_null) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+  // Check for heap stability
-+  if (is_strong) {
-+    __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED);
-+    __ beqz(t1, heap_stable);
-+  } else {
-+    Label lrb;
-+    __ andi(t0, t1, ShenandoahHeap::WEAK_ROOTS);
-+    __ bnez(t0, lrb);
-+    __ andi(t0, t1, ShenandoahHeap::HAS_FORWARDED);
-+    __ beqz(t0, heap_stable);
-+    __ bind(lrb);
-+  }
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+  // use x11 for load address
-+  Register result_dst = dst;
-+  if (dst == x11) {
-+    __ mv(t1, dst);
-+    dst = t1;
-+  }
++    Label skip_receiver_profile;
++    if (receiver_can_be_null) {
++      Label not_null;
++      // We are making a call.  Increment the count for null receiver.
++      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++      j(skip_receiver_profile);
++      bind(not_null);
++    }
 +
-+  // Save x10 and x11, unless it is an output register
-+  RegSet saved_regs = RegSet::of(x10, x11) - result_dst;
-+  __ push_reg(saved_regs, sp);
-+  __ la(x11, load_addr);
-+  __ mv(x10, dst);
++    // Record the receiver type.
++    record_klass_in_profile(receiver, mdp, reg2, true);
++    bind(skip_receiver_profile);
 +
-+  // Test for in-cset
-+  if (is_strong) {
-+    __ li(t1, (uint64_t)ShenandoahHeap::in_cset_fast_test_addr());
-+    __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint());
-+    __ add(t1, t1, t0);
-+    __ lbu(t1, Address(t1));
-+    __ andi(t0, t1, 1);
-+    __ beqz(t0, not_cset);
-+  }
++    // The method data pointer needs to be updated to reflect the new target.
 +
-+  __ push_call_clobbered_registers();
-+  if (is_strong) {
-+    if (is_narrow) {
-+      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow);
-+    } else {
-+      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong);
-+    }
-+  } else if (is_weak) {
-+    if (is_narrow) {
-+      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow);
-+    } else {
-+      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak);
-+    }
-+  } else {
-+    assert(is_phantom, "only remaining strength");
-+    assert(!is_narrow, "phantom access cannot be narrow");
-+    __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak);
++    update_mdp_by_constant(mdp,
++                           in_bytes(VirtualCallData::
++                                    virtual_call_data_size()));
++    bind(profile_continue);
 +  }
-+  __ jalr(ra);
-+  __ mv(t0, x10);
-+  __ pop_call_clobbered_registers();
-+  __ mv(x10, t0);
-+  __ bind(not_cset);
-+  __ mv(result_dst, x10);
-+  __ pop_reg(saved_regs, sp);
-+
-+  __ bind(heap_stable);
-+  __ leave();
 +}
 +
-+void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register dst, Register tmp) {
-+  if (ShenandoahIUBarrier) {
-+    __ push_call_clobbered_registers();
++// This routine creates a state machine for updating the multi-row
++// type profile at a virtual call site (or other type-sensitive bytecode).
++// The machine visits each row (of receiver/count) until the receiver type
++// is found, or until it runs out of rows.  At the same time, it remembers
++// the location of the first empty row.  (An empty row records null for its
++// receiver, and can be allocated for a newly-observed receiver type.)
++// Because there are two degrees of freedom in the state, a simple linear
++// search will not work; it must be a decision tree.  Hence this helper
++// function is recursive, to generate the required tree structured code.
++// It's the interpreter, so we are trading off code space for speed.
++// See below for example code.
++void InterpreterMacroAssembler::record_klass_in_profile_helper(
++                                Register receiver, Register mdp,
++                                Register reg2,
++                                Label& done, bool is_virtual_call) {
++  if (TypeProfileWidth == 0) {
++    if (is_virtual_call) {
++      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++    }
 +
-+    satb_write_barrier_pre(masm, noreg, dst, xthread, tmp, true, false);
++  } else {
++    int non_profiled_offset = -1;
++    if (is_virtual_call) {
++      non_profiled_offset = in_bytes(CounterData::count_offset());
++    }
 +
-+    __ pop_call_clobbered_registers();
++    record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth,
++      &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset);
 +  }
 +}
 +
-+//
-+// Arguments:
-+//
-+// Inputs:
-+//   src:        oop location to load from, might be clobbered
-+//
-+// Output:
-+//   dst:        oop loaded from src location
-+//
-+// Kill:
-+//   x30 (tmp reg)
-+//
-+// Alias:
-+//   dst: x30 (might use x30 as temporary output register to avoid clobbering src)
-+//
-+void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm,
-+                                            DecoratorSet decorators,
-+                                            BasicType type,
-+                                            Register dst,
-+                                            Address src,
-+                                            Register tmp1,
-+                                            Register tmp_thread) {
-+  // 1: non-reference load, no additional barrier is needed
-+  if (!is_reference_type(type)) {
-+    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
-+    return;
-+  }
-+
-+  // 2: load a reference from src location and apply LRB if needed
-+  if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {
-+    Register result_dst = dst;
++void InterpreterMacroAssembler::record_item_in_profile_helper(
++  Register item, Register mdp, Register reg2, int start_row, Label& done, int total_rows,
++  OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, int non_profiled_offset) {
++  int last_row = total_rows - 1;
++  assert(start_row <= last_row, "must be work left to do");
++  // Test this row for both the item and for null.
++  // Take any of three different outcomes:
++  //   1. found item => increment count and goto done
++  //   2. found null => keep looking for case 1, maybe allocate this cell
++  //   3. found something else => keep looking for cases 1 and 2
++  // Case 3 is handled by a recursive call.
++  for (int row = start_row; row <= last_row; row++) {
++    Label next_test;
++    bool test_for_null_also = (row == start_row);
 +
-+    // Preserve src location for LRB
-+    RegSet saved_regs;
-+    if (dst == src.base()) {
-+      dst = (src.base() == x28) ? x29 : x28;
-+      saved_regs = RegSet::of(dst);
-+      __ push_reg(saved_regs, sp);
-+    }
-+    assert_different_registers(dst, src.base());
++    // See if the item is item[n].
++    int item_offset = in_bytes(item_offset_fn(row));
++    test_mdp_data_at(mdp, item_offset, item,
++                     (test_for_null_also ? reg2 : noreg),
++                     next_test);
++    // (Reg2 now contains the item from the CallData.)
 +
-+    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
++    // The item is item[n].  Increment count[n].
++    int count_offset = in_bytes(item_count_offset_fn(row));
++    increment_mdp_data_at(mdp, count_offset);
++    j(done);
++    bind(next_test);
 +
-+    load_reference_barrier(masm, dst, src, decorators);
++    if (test_for_null_also) {
++      Label found_null;
++      // Failed the equality check on item[n]...  Test for null.
++      if (start_row == last_row) {
++        // The only thing left to do is handle the null case.
++        if (non_profiled_offset >= 0) {
++          beqz(reg2, found_null);
++          // Item did not match any saved item and there is no empty row for it.
++          // Increment total counter to indicate polymorphic case.
++          increment_mdp_data_at(mdp, non_profiled_offset);
++          j(done);
++          bind(found_null);
++        } else {
++          bnez(reg2, done);
++        }
++        break;
++      }
++      // Since null is rare, make it be the branch-taken case.
++      beqz(reg2, found_null);
 +
-+    if (dst != result_dst) {
-+      __ mv(result_dst, dst);
-+      dst = result_dst;
-+    }
++      // Put all the "Case 3" tests here.
++      record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows,
++        item_offset_fn, item_count_offset_fn, non_profiled_offset);
 +
-+    if (saved_regs.bits() != 0) {
-+      __ pop_reg(saved_regs, sp);
++      // Found a null.  Keep searching for a matching item,
++      // but remember that this is an empty (unused) slot.
++      bind(found_null);
 +    }
-+  } else {
-+    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
 +  }
 +
-+  // 3: apply keep-alive barrier if needed
-+  if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
-+    __ enter();
-+    __ push_call_clobbered_registers();
-+    satb_write_barrier_pre(masm /* masm */,
-+                           noreg /* obj */,
-+                           dst /* pre_val */,
-+                           xthread /* thread */,
-+                           tmp1 /* tmp */,
-+                           true /* tosca_live */,
-+                           true /* expand_call */);
-+    __ pop_call_clobbered_registers();
-+    __ leave();
++  // In the fall-through case, we found no matching item, but we
++  // observed the item[start_row] is NULL.
++  // Fill in the item field and increment the count.
++  int item_offset = in_bytes(item_offset_fn(start_row));
++  set_mdp_data_at(mdp, item_offset, item);
++  int count_offset = in_bytes(item_count_offset_fn(start_row));
++  mv(reg2, DataLayout::counter_increment);
++  set_mdp_data_at(mdp, count_offset, reg2);
++  if (start_row > 0) {
++    j(done);
 +  }
 +}
 +
-+void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                                             Address dst, Register val, Register tmp1, Register tmp2) {
-+  bool on_oop = is_reference_type(type);
-+  if (!on_oop) {
-+    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
-+    return;
-+  }
++// Example state machine code for three profile rows:
++//   # main copy of decision tree, rooted at row[1]
++//   if (row[0].rec == rec) then [
++//     row[0].incr()
++//     goto done
++//   ]
++//   if (row[0].rec != NULL) then [
++//     # inner copy of decision tree, rooted at row[1]
++//     if (row[1].rec == rec) then [
++//       row[1].incr()
++//       goto done
++//     ]
++//     if (row[1].rec != NULL) then [
++//       # degenerate decision tree, rooted at row[2]
++//       if (row[2].rec == rec) then [
++//         row[2].incr()
++//         goto done
++//       ]
++//       if (row[2].rec != NULL) then [
++//         count.incr()
++//         goto done
++//       ] # overflow
++//       row[2].init(rec)
++//       goto done
++//     ] else [
++//       # remember row[1] is empty
++//       if (row[2].rec == rec) then [
++//         row[2].incr()
++//         goto done
++//       ]
++//       row[1].init(rec)
++//       goto done
++//     ]
++//   else [
++//     # remember row[0] is empty
++//     if (row[1].rec == rec) then [
++//       row[1].incr()
++//       goto done
++//     ]
++//     if (row[2].rec == rec) then [
++//       row[2].incr()
++//       goto done
++//     ]
++//     row[0].init(rec)
++//     goto done
++//   ]
++//   done:
 +
-+  // flatten object address if needed
-+  if (dst.offset() == 0) {
-+    if (dst.base() != x13) {
-+      __ mv(x13, dst.base());
-+    }
-+  } else {
-+    __ la(x13, dst);
-+  }
++void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
++                                                        Register mdp, Register reg2,
++                                                        bool is_virtual_call) {
++  assert(ProfileInterpreter, "must be profiling");
++  Label done;
 +
-+  shenandoah_write_barrier_pre(masm,
-+                               x13 /* obj */,
-+                               tmp2 /* pre_val */,
-+                               xthread /* thread */,
-+                               tmp1  /* tmp */,
-+                               val != noreg /* tosca_live */,
-+                               false /* expand_call */);
++  record_klass_in_profile_helper(receiver, mdp, reg2, done, is_virtual_call);
 +
-+  if (val == noreg) {
-+    BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg);
-+  } else {
-+    iu_barrier(masm, val, tmp1);
-+    // G1 barrier needs uncompressed oop for region cross check.
-+    Register new_val = val;
-+    if (UseCompressedOops) {
-+      new_val = t1;
-+      __ mv(new_val, val);
-+    }
-+    BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg);
-+  }
++  bind(done);
 +}
 +
-+void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
-+                                                                  Register obj, Register tmp, Label& slowpath) {
-+  Label done;
-+  // Resolve jobject
-+  BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath);
-+
-+  // Check for null.
-+  __ beqz(obj, done);
++void InterpreterMacroAssembler::profile_ret(Register return_bci, Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+  assert(obj != t1, "need t1");
-+  Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset());
-+  __ lbu(t1, gc_state);
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+  // Check for heap in evacuation phase
-+  __ andi(t0, t1, ShenandoahHeap::EVACUATION);
-+  __ bnez(t0, slowpath);
++    // Update the total ret count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
 +
-+  __ bind(done);
-+}
-+
-+// Special Shenandoah CAS implementation that handles false negatives due
-+// to concurrent evacuation.  The service is more complex than a
-+// traditional CAS operation because the CAS operation is intended to
-+// succeed if the reference at addr exactly matches expected or if the
-+// reference at addr holds a pointer to a from-space object that has
-+// been relocated to the location named by expected.  There are two
-+// races that must be addressed:
-+//  a) A parallel thread may mutate the contents of addr so that it points
-+//     to a different object.  In this case, the CAS operation should fail.
-+//  b) A parallel thread may heal the contents of addr, replacing a
-+//     from-space pointer held in addr with the to-space pointer
-+//     representing the new location of the object.
-+// Upon entry to cmpxchg_oop, it is assured that new_val equals NULL
-+// or it refers to an object that is not being evacuated out of
-+// from-space, or it refers to the to-space version of an object that
-+// is being evacuated out of from-space.
-+//
-+// By default the value held in the result register following execution
-+// of the generated code sequence is 0 to indicate failure of CAS,
-+// non-zero to indicate success. If is_cae, the result is the value most
-+// recently fetched from addr rather than a boolean success indicator.
-+//
-+// Clobbers t0, t1
-+void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
-+                                                Register addr,
-+                                                Register expected,
-+                                                Register new_val,
-+                                                Assembler::Aqrl acquire,
-+                                                Assembler::Aqrl release,
-+                                                bool is_cae,
-+                                                Register result) {
-+  bool is_narrow = UseCompressedOops;
-+  Assembler::operand_size size = is_narrow ? Assembler::uint32 : Assembler::int64;
-+
-+  assert_different_registers(addr, expected, t0, t1);
-+  assert_different_registers(addr, new_val, t0, t1);
-+
-+  Label retry, success, fail, done;
++    for (uint row = 0; row < RetData::row_limit(); row++) {
++      Label next_test;
 +
-+  __ bind(retry);
++      // See if return_bci is equal to bci[n]:
++      test_mdp_data_at(mdp,
++                       in_bytes(RetData::bci_offset(row)),
++                       return_bci, noreg,
++                       next_test);
 +
-+  // Step1: Try to CAS.
-+  __ cmpxchg(addr, expected, new_val, size, acquire, release, /* result */ t1);
++      // return_bci is equal to bci[n].  Increment the count.
++      increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)));
 +
-+  // If success, then we are done.
-+  __ beq(expected, t1, success);
++      // The method data pointer needs to be updated to reflect the new target.
++      update_mdp_by_offset(mdp,
++                           in_bytes(RetData::bci_displacement_offset(row)));
++      j(profile_continue);
++      bind(next_test);
++    }
 +
-+  // Step2: CAS failed, check the forwared pointer.
-+  __ mv(t0, t1);
++    update_mdp_for_ret(return_bci);
 +
-+  if (is_narrow) {
-+    __ decode_heap_oop(t0, t0);
++    bind(profile_continue);
 +  }
-+  resolve_forward_pointer(masm, t0);
-+
-+  __ encode_heap_oop(t0, t0);
++}
 +
-+  // Report failure when the forwarded oop was not expected.
-+  __ bne(t0, expected, fail);
++void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+  // Step 3: CAS again using the forwarded oop.
-+  __ cmpxchg(addr, t1, new_val, size, acquire, release, /* result */ t0);
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+  // Retry when failed.
-+  __ bne(t0, t1, retry);
++    set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
 +
-+  __ bind(success);
-+  if (is_cae) {
-+    __ mv(result, expected);
-+  } else {
-+    __ addi(result, zr, 1);
-+  }
-+  __ j(done);
++    // The method data pointer needs to be updated.
++    int mdp_delta = in_bytes(BitData::bit_data_size());
++    if (TypeProfileCasts) {
++      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
++    }
++    update_mdp_by_constant(mdp, mdp_delta);
 +
-+  __ bind(fail);
-+  if (is_cae) {
-+    __ mv(result, t0);
-+  } else {
-+    __ mv(result, zr);
++    bind(profile_continue);
 +  }
-+
-+  __ bind(done);
 +}
 +
-+#undef __
-+
-+#ifdef COMPILER1
-+
-+#define __ ce->masm()->
++void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) {
++    if (ProfileInterpreter && TypeProfileCasts) {
++    Label profile_continue;
 +
-+void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
-+  ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
-+  // At this point we know that marking is in progress.
-+  // If do_load() is true then we have to emit the
-+  // load of the previous value; otherwise it has already
-+  // been loaded into _pre_val.
-+  __ bind(*stub->entry());
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+  assert(stub->pre_val()->is_register(), "Precondition.");
++    int count_offset = in_bytes(CounterData::count_offset());
++    // Back up the address, since we have already bumped the mdp.
++    count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
 +
-+  Register pre_val_reg = stub->pre_val()->as_register();
++    // *Decrement* the counter.  We expect to see zero or small negatives.
++    increment_mdp_data_at(mdp, count_offset, true);
 +
-+  if (stub->do_load()) {
-+    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */);
++    bind (profile_continue);
 +  }
-+  __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
-+  ce->store_parameter(stub->pre_val()->as_register(), 0);
-+  __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
-+  __ j(*stub->continuation());
 +}
 +
-+void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce,
-+                                                                    ShenandoahLoadReferenceBarrierStub* stub) {
-+  ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
-+  __ bind(*stub->entry());
++void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+  DecoratorSet decorators = stub->decorators();
-+  bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
-+  bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
-+  bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
-+  bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+  Register obj = stub->obj()->as_register();
-+  Register res = stub->result()->as_register();
-+  Register addr = stub->addr()->as_pointer_register();
-+  Register tmp1 = stub->tmp1()->as_register();
-+  Register tmp2 = stub->tmp2()->as_register();
++    // The method data pointer needs to be updated.
++    int mdp_delta = in_bytes(BitData::bit_data_size());
++    if (TypeProfileCasts) {
++      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
 +
-+  assert(res == x10, "result must arrive in x10");
-+  assert_different_registers(tmp1, tmp2, t0);
++      // Record the object type.
++      record_klass_in_profile(klass, mdp, reg2, false);
++    }
++    update_mdp_by_constant(mdp, mdp_delta);
 +
-+  if (res != obj) {
-+    __ mv(res, obj);
++    bind(profile_continue);
 +  }
++}
 +
-+  if (is_strong) {
-+    // Check for object in cset.
-+    __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr());
-+    __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint());
-+    __ add(tmp2, tmp2, tmp1);
-+    __ lbu(tmp2, Address(tmp2));
-+    __ beqz(tmp2, *stub->continuation(), true /* is_far */);
-+  }
++void InterpreterMacroAssembler::profile_switch_default(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+  ce->store_parameter(res, 0);
-+  ce->store_parameter(addr, 1);
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+  if (is_strong) {
-+    if (is_native) {
-+      __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin()));
-+    } else {
-+      __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin()));
-+    }
-+  } else if (is_weak) {
-+    __ far_call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin()));
-+  } else {
-+    assert(is_phantom, "only remaining strength");
-+    __ far_call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin()));
-+  }
++    // Update the default case count
++    increment_mdp_data_at(mdp,
++                          in_bytes(MultiBranchData::default_count_offset()));
 +
-+  __ j(*stub->continuation());
++    // The method data pointer needs to be updated.
++    update_mdp_by_offset(mdp,
++                         in_bytes(MultiBranchData::
++                                  default_displacement_offset()));
++
++    bind(profile_continue);
++  }
 +}
 +
-+#undef __
++void InterpreterMacroAssembler::profile_switch_case(Register index,
++                                                    Register mdp,
++                                                    Register reg2) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+#define __ sasm->
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
-+  __ prologue("shenandoah_pre_barrier", false);
++    // Build the base (index * per_case_size_in_bytes()) +
++    // case_array_offset_in_bytes()
++    mvw(reg2, in_bytes(MultiBranchData::per_case_size()));
++    mvw(t0, in_bytes(MultiBranchData::case_array_offset()));
++    Assembler::mul(index, index, reg2);
++    Assembler::add(index, index, t0);
 +
-+  // arg0 : previous value of memory
++    // Update the case count
++    increment_mdp_data_at(mdp,
++                          index,
++                          in_bytes(MultiBranchData::relative_count_offset()));
 +
-+  BarrierSet* bs = BarrierSet::barrier_set();
++    // The method data pointer need to be updated.
++    update_mdp_by_offset(mdp,
++                         index,
++                         in_bytes(MultiBranchData::
++                                  relative_displacement_offset()));
 +
-+  const Register pre_val = x10;
-+  const Register thread = xthread;
-+  const Register tmp = t0;
++    bind(profile_continue);
++  }
++}
 +
-+  Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
-+  Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
++void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ; }
 +
-+  Label done;
-+  Label runtime;
++void InterpreterMacroAssembler::notify_method_entry() {
++  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
++  // track stack depth.  If it is possible to enter interp_only_mode we add
++  // the code to check if the event should be sent.
++  if (JvmtiExport::can_post_interpreter_events()) {
++    Label L;
++    lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset()));
++    beqz(x13, L);
++    call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                    InterpreterRuntime::post_method_entry));
++    bind(L);
++  }
 +
-+  // Is marking still active?
-+  Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
-+  __ lb(tmp, gc_state);
-+  __ andi(tmp, tmp, ShenandoahHeap::MARKING);
-+  __ beqz(tmp, done);
++  {
++    SkipIfEqual skip(this, &DTraceMethodProbes, false);
++    get_method(c_rarg1);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
++                 xthread, c_rarg1);
++  }
 +
-+  // Can we store original value in the thread's buffer?
-+  __ ld(tmp, queue_index);
-+  __ beqz(tmp, runtime);
++  // RedefineClasses() tracing support for obsolete method entry
++  if (log_is_enabled(Trace, redefine, class, obsolete)) {
++    get_method(c_rarg1);
++    call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
++      xthread, c_rarg1);
++  }
++}
 +
-+  __ sub(tmp, tmp, wordSize);
-+  __ sd(tmp, queue_index);
-+  __ ld(t1, buffer);
-+  __ add(tmp, tmp, t1);
-+  __ load_parameter(0, t1);
-+  __ sd(t1, Address(tmp, 0));
-+  __ j(done);
 +
-+  __ bind(runtime);
-+  __ push_call_clobbered_registers();
-+  __ load_parameter(0, pre_val);
-+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
-+  __ pop_call_clobbered_registers();
-+  __ bind(done);
++void InterpreterMacroAssembler::notify_method_exit(
++    TosState state, NotifyMethodExitMode mode) {
++  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
++  // track stack depth.  If it is possible to enter interp_only_mode we add
++  // the code to check if the event should be sent.
++  if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
++    Label L;
++    // Note: frame::interpreter_frame_result has a dependency on how the
++    // method result is saved across the call to post_method_exit. If this
++    // is changed then the interpreter_frame_result implementation will
++    // need to be updated too.
 +
-+  __ epilogue();
++    // template interpreter will leave the result on the top of the stack.
++    push(state);
++    lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset()));
++    beqz(x13, L);
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
++    bind(L);
++    pop(state);
++  }
++
++  {
++    SkipIfEqual skip(this, &DTraceMethodProbes, false);
++    push(state);
++    get_method(c_rarg1);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
++                 xthread, c_rarg1);
++    pop(state);
++  }
 +}
 +
-+void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm,
-+                                                                                    DecoratorSet decorators) {
-+  __ prologue("shenandoah_load_reference_barrier", false);
-+  // arg0 : object to be resolved
 +
-+  __ push_call_clobbered_registers();
-+  __ load_parameter(0, x10);
-+  __ load_parameter(1, x11);
++// Jump if ((*counter_addr += increment) & mask) satisfies the condition.
++void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr,
++                                                        int increment, Address mask,
++                                                        Register tmp1, Register tmp2,
++                                                        bool preloaded, Label* where) {
++  Label done;
++  if (!preloaded) {
++    lwu(tmp1, counter_addr);
++  }
++  add(tmp1, tmp1, increment);
++  sw(tmp1, counter_addr);
++  lwu(tmp2, mask);
++  andr(tmp1, tmp1, tmp2);
++  bnez(tmp1, done);
++  j(*where); // offset is too large so we have to use j instead of beqz here
++  bind(done);
++}
 +
-+  bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
-+  bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
-+  bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
-+  bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
-+  if (is_strong) {
-+    if (is_native) {
-+      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong);
-+    } else {
-+      if (UseCompressedOops) {
-+        __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow);
-+      } else {
-+        __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong);
-+      }
-+    }
-+  } else if (is_weak) {
-+    assert(!is_native, "weak must not be called off-heap");
-+    if (UseCompressedOops) {
-+      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow);
-+    } else {
-+      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak);
-+    }
-+  } else {
-+    assert(is_phantom, "only remaining strength");
-+    assert(is_native, "phantom must only be called off-heap");
-+    __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_phantom);
++void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point,
++                                                  int number_of_arguments) {
++  // interpreter specific
++  //
++  // Note: No need to save/restore rbcp & rlocals pointer since these
++  //       are callee saved registers and no blocking/ GC can happen
++  //       in leaf calls.
++#ifdef ASSERT
++  {
++   Label L;
++   ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++   beqz(t0, L);
++   stop("InterpreterMacroAssembler::call_VM_leaf_base:"
++        " last_sp != NULL");
++   bind(L);
 +  }
-+  __ jalr(ra);
-+  __ mv(t0, x10);
-+  __ pop_call_clobbered_registers();
-+  __ mv(x10, t0);
++#endif /* ASSERT */
++  // super call
++  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
++}
 +
-+  __ epilogue();
++void InterpreterMacroAssembler::call_VM_base(Register oop_result,
++                                             Register java_thread,
++                                             Register last_java_sp,
++                                             address  entry_point,
++                                             int      number_of_arguments,
++                                             bool     check_exceptions) {
++  // interpreter specific
++  //
++  // Note: Could avoid restoring locals ptr (callee saved) - however doesn't
++  //       really make a difference for these runtime calls, since they are
++  //       slow anyway. Btw., bcp must be saved/restored since it may change
++  //       due to GC.
++  save_bcp();
++#ifdef ASSERT
++  {
++    Label L;
++    ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++    beqz(t0, L);
++    stop("InterpreterMacroAssembler::call_VM_base:"
++         " last_sp != NULL");
++    bind(L);
++  }
++#endif /* ASSERT */
++  // super call
++  MacroAssembler::call_VM_base(oop_result, noreg, last_java_sp,
++                               entry_point, number_of_arguments,
++                               check_exceptions);
++// interpreter specific
++  restore_bcp();
++  restore_locals();
 +}
 +
-+#undef __
++void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr, Register tmp) {
++  assert_different_registers(obj, tmp, t0, mdo_addr.base());
++  Label update, next, none;
 +
-+#endif // COMPILER1
-diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
-new file mode 100644
-index 00000000000..a705f497667
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
-@@ -0,0 +1,88 @@
-+/*
-+ * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  verify_oop(obj);
 +
-+#ifndef CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
-+#define CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
++  bnez(obj, update);
++  orptr(mdo_addr, TypeEntries::null_seen, t0, tmp);
++  j(next);
 +
-+#include "asm/macroAssembler.hpp"
-+#include "gc/shared/barrierSetAssembler.hpp"
-+#include "gc/shenandoah/shenandoahBarrierSet.hpp"
-+#ifdef COMPILER1
-+class LIR_Assembler;
-+class ShenandoahPreBarrierStub;
-+class ShenandoahLoadReferenceBarrierStub;
-+class StubAssembler;
-+#endif
-+class StubCodeGenerator;
++  bind(update);
++  load_klass(obj, obj);
 +
-+class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
-+private:
++  ld(t0, mdo_addr);
++  xorr(obj, obj, t0);
++  andi(t0, obj, TypeEntries::type_klass_mask);
++  beqz(t0, next); // klass seen before, nothing to
++                  // do. The unknown bit may have been
++                  // set already but no need to check.
 +
-+  void satb_write_barrier_pre(MacroAssembler* masm,
-+                              Register obj,
-+                              Register pre_val,
-+                              Register thread,
-+                              Register tmp,
-+                              bool tosca_live,
-+                              bool expand_call);
-+  void shenandoah_write_barrier_pre(MacroAssembler* masm,
-+                                    Register obj,
-+                                    Register pre_val,
-+                                    Register thread,
-+                                    Register tmp,
-+                                    bool tosca_live,
-+                                    bool expand_call);
++  andi(t0, obj, TypeEntries::type_unknown);
++  bnez(t0, next);
++  // already unknown. Nothing to do anymore.
 +
-+  void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg);
-+  void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg);
-+  void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, DecoratorSet decorators);
++  ld(t0, mdo_addr);
++  beqz(t0, none);
++  li(tmp, (u1)TypeEntries::null_seen);
++  beq(t0, tmp, none);
++  // There is a chance that the checks above (re-reading profiling
++  // data from memory) fail if another thread has just set the
++  // profiling to this obj's klass
++  ld(t0, mdo_addr);
++  xorr(obj, obj, t0);
++  andi(t0, obj, TypeEntries::type_klass_mask);
++  beqz(t0, next);
 +
-+public:
++  // different than before. Cannot keep accurate profile.
++  orptr(mdo_addr, TypeEntries::type_unknown, t0, tmp);
++  j(next);
 +
-+  void iu_barrier(MacroAssembler* masm, Register dst, Register tmp);
++  bind(none);
++  // first time here. Set profile type.
++  sd(obj, mdo_addr);
 +
-+#ifdef COMPILER1
-+  void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
-+  void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub);
-+  void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
-+  void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators);
-+#endif
++  bind(next);
++}
 +
-+  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-+                                  Register src, Register dst, Register count, RegSet saved_regs);
++void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) {
++  if (!ProfileInterpreter) {
++    return;
++  }
 +
-+  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                       Register dst, Address src, Register tmp1, Register tmp_thread);
-+  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                        Address dst, Register val, Register tmp1, Register tmp2);
++  if (MethodData::profile_arguments() || MethodData::profile_return()) {
++    Label profile_continue;
 +
-+  virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
-+                                             Register obj, Register tmp, Label& slowpath);
++    test_method_data_pointer(mdp, profile_continue);
 +
-+  void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val,
-+                   Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result);
-+};
++    int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
 +
-+#endif // CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
-new file mode 100644
-index 00000000000..6c855f23c2a
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
-@@ -0,0 +1,285 @@
-+//
-+// Copyright (c) 2018, Red Hat, Inc. All rights reserved.
-+// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+//
-+// This code is free software; you can redistribute it and/or modify it
-+// under the terms of the GNU General Public License version 2 only, as
-+// published by the Free Software Foundation.
-+//
-+// This code is distributed in the hope that it will be useful, but WITHOUT
-+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+// version 2 for more details (a copy is included in the LICENSE file that
-+// accompanied this code).
-+//
-+// You should have received a copy of the GNU General Public License version
-+// 2 along with this work; if not, write to the Free Software Foundation,
-+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+//
-+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+// or visit www.oracle.com if you need additional information or have any
-+// questions.
-+//
-+//
-+
-+source_hpp %{
-+#include "gc/shenandoah/shenandoahBarrierSet.hpp"
-+#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
-+%}
-+
-+instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
-+  match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
-+
-+  effect(TEMP tmp, KILL cr);
-+
-+  format %{
-+    "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapP_shenandoah"
-+  %}
-+
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
-+                                                   false /* is_cae */, $res$$Register);
-+  %}
-+
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
-+  match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
-+
-+  effect(TEMP tmp, KILL cr);
-+
-+  format %{
-+    "cmpxchgw_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapN_shenandoah"
-+  %}
-+
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
-+                                                   false /* is_cae */, $res$$Register);
-+  %}
-+
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
-+  predicate(needs_acquiring_load_reserved(n));
-+  match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
-+
-+  effect(TEMP tmp, KILL cr);
-+
-+  format %{
-+    "cmpxchg_acq_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapPAcq_shenandoah"
-+  %}
-+
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
-+                                                   false /* is_cae */, $res$$Register);
-+  %}
-+
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
-+  predicate(needs_acquiring_load_reserved(n));
-+  match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
++    lbu(t0, Address(mdp, in_bytes(DataLayout::tag_offset()) - off_to_start));
++    if (is_virtual) {
++      li(tmp, (u1)DataLayout::virtual_call_type_data_tag);
++      bne(t0, tmp, profile_continue);
++    } else {
++      li(tmp, (u1)DataLayout::call_type_data_tag);
++      bne(t0, tmp, profile_continue);
++    }
 +
-+  effect(TEMP tmp, KILL cr);
++    // calculate slot step
++    static int stack_slot_offset0 = in_bytes(TypeEntriesAtCall::stack_slot_offset(0));
++    static int slot_step = in_bytes(TypeEntriesAtCall::stack_slot_offset(1)) - stack_slot_offset0;
 +
-+  format %{
-+    "cmpxchgw_acq_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapNAcq_shenandoah"
-+  %}
++    // calculate type step
++    static int argument_type_offset0 = in_bytes(TypeEntriesAtCall::argument_type_offset(0));
++    static int type_step = in_bytes(TypeEntriesAtCall::argument_type_offset(1)) - argument_type_offset0;
 +
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
-+                                                   false /* is_cae */, $res$$Register);
-+  %}
++    if (MethodData::profile_arguments()) {
++      Label done, loop, loopEnd, profileArgument, profileReturnType;
++      RegSet pushed_registers;
++      pushed_registers += x15;
++      pushed_registers += x16;
++      pushed_registers += x17;
++      Register mdo_addr = x15;
++      Register index = x16;
++      Register off_to_args = x17;
++      push_reg(pushed_registers, sp);
 +
-+  ins_pipe(pipe_slow);
-+%}
++      mv(off_to_args, in_bytes(TypeEntriesAtCall::args_data_offset()));
++      mv(t0, TypeProfileArgsLimit);
++      beqz(t0, loopEnd);
 +
-+instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
-+  match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
-+  effect(TEMP_DEF res, TEMP tmp, KILL cr);
++      mv(index, zr); // index < TypeProfileArgsLimit
++      bind(loop);
++      bgtz(index, profileReturnType);
++      li(t0, (int)MethodData::profile_return());
++      beqz(t0, profileArgument); // (index > 0 || MethodData::profile_return()) == false
++      bind(profileReturnType);
++      // If return value type is profiled we may have no argument to profile
++      ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())));
++      mv(t1, - TypeStackSlotEntries::per_arg_count());
++      mul(t1, index, t1);
++      add(tmp, tmp, t1);
++      li(t1, TypeStackSlotEntries::per_arg_count());
++      add(t0, mdp, off_to_args);
++      blt(tmp, t1, done);
 +
-+  format %{
-+    "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN_shenandoah"
-+  %}
++      bind(profileArgument);
 +
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
-+                                                   true /* is_cae */, $res$$Register);
-+  %}
++      ld(tmp, Address(callee, Method::const_offset()));
++      load_unsigned_short(tmp, Address(tmp, ConstMethod::size_of_parameters_offset()));
++      // stack offset o (zero based) from the start of the argument
++      // list, for n arguments translates into offset n - o - 1 from
++      // the end of the argument list
++      li(t0, stack_slot_offset0);
++      li(t1, slot_step);
++      mul(t1, index, t1);
++      add(t0, t0, t1);
++      add(t0, mdp, t0);
++      ld(t0, Address(t0));
++      sub(tmp, tmp, t0);
++      addi(tmp, tmp, -1);
++      Address arg_addr = argument_address(tmp);
++      ld(tmp, arg_addr);
 +
-+  ins_pipe(pipe_slow);
-+%}
++      li(t0, argument_type_offset0);
++      li(t1, type_step);
++      mul(t1, index, t1);
++      add(t0, t0, t1);
++      add(mdo_addr, mdp, t0);
++      Address mdo_arg_addr(mdo_addr, 0);
++      profile_obj_type(tmp, mdo_arg_addr, t1);
 +
-+instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
-+  match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
++      int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
++      addi(off_to_args, off_to_args, to_add);
 +
-+  effect(TEMP_DEF res, TEMP tmp, KILL cr);
-+  format %{
-+    "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndExchangeP_shenandoah"
-+  %}
++      // increment index by 1
++      addi(index, index, 1);
++      li(t1, TypeProfileArgsLimit);
++      blt(index, t1, loop);
++      bind(loopEnd);
 +
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
-+                                                   true /* is_cae */, $res$$Register);
-+  %}
++      if (MethodData::profile_return()) {
++        ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())));
++        addi(tmp, tmp, -TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count());
++      }
 +
-+  ins_pipe(pipe_slow);
-+%}
++      add(t0, mdp, off_to_args);
++      bind(done);
++      mv(mdp, t0);
 +
-+instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
-+  match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
++      // unspill the clobbered registers
++      pop_reg(pushed_registers, sp);
 +
-+  effect(TEMP tmp, KILL cr);
-+  format %{
-+    "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapN_shenandoah"
-+    "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)"
-+  %}
++      if (MethodData::profile_return()) {
++        // We're right after the type profile for the last
++        // argument. tmp is the number of cells left in the
++        // CallTypeData/VirtualCallTypeData to reach its end. Non null
++        // if there's a return to profile.
++        assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
++        shadd(mdp, tmp, mdp, tmp, exact_log2(DataLayout::cell_size));
++      }
++      sd(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
++    } else {
++      assert(MethodData::profile_return(), "either profile call args or call ret");
++      update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size()));
++    }
 +
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-+    // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
-+                                                   false /* is_cae */, $res$$Register);
-+  %}
++    // mdp points right after the end of the
++    // CallTypeData/VirtualCallTypeData, right after the cells for the
++    // return value type if there's one
 +
-+  ins_pipe(pipe_slow);
-+%}
++    bind(profile_continue);
++  }
++}
 +
-+instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
-+  predicate(needs_acquiring_load_reserved(n));
-+  match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
++void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) {
++  assert_different_registers(mdp, ret, tmp, xbcp, t0, t1);
++  if (ProfileInterpreter && MethodData::profile_return()) {
++    Label profile_continue, done;
 +
-+  effect(TEMP_DEF res, TEMP tmp, KILL cr);
-+  format %{
-+    "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq_shenandoah"
-+  %}
++    test_method_data_pointer(mdp, profile_continue);
 +
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register);
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
-+                                                   true /* is_cae */, $res$$Register);
-+  %}
++    if (MethodData::profile_return_jsr292_only()) {
++      assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
 +
-+  ins_pipe(pipe_slow);
-+%}
++      // If we don't profile all invoke bytecodes we must make sure
++      // it's a bytecode we indeed profile. We can't go back to the
++      // begining of the ProfileData we intend to update to check its
++      // type because we're right after it and we don't known its
++      // length
++      Label do_profile;
++      lbu(t0, Address(xbcp, 0));
++      li(tmp, (u1)Bytecodes::_invokedynamic);
++      beq(t0, tmp, do_profile);
++      li(tmp, (u1)Bytecodes::_invokehandle);
++      beq(t0, tmp, do_profile);
++      get_method(tmp);
++      lhu(t0, Address(tmp, Method::intrinsic_id_offset_in_bytes()));
++      li(t1, vmIntrinsics::_compiledLambdaForm);
++      bne(t0, t1, profile_continue);
++      bind(do_profile);
++    }
 +
-+instruct compareAndExchangePAcq_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
-+  predicate(needs_acquiring_load_reserved(n));
-+  match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
++    Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size()));
++    mv(tmp, ret);
++    profile_obj_type(tmp, mdo_ret_addr, t1);
 +
-+  effect(TEMP_DEF res, TEMP tmp, KILL cr);
-+  format %{
-+    "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq_shenandoah"
-+  %}
++    bind(profile_continue);
++  }
++}
 +
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register);
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
-+                                                   true /* is_cae */, $res$$Register);
-+  %}
++void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3) {
++  assert_different_registers(t0, t1, mdp, tmp1, tmp2, tmp3);
++  if (ProfileInterpreter && MethodData::profile_parameters()) {
++    Label profile_continue, done;
 +
-+  ins_pipe(pipe_slow);
-+%}
++    test_method_data_pointer(mdp, profile_continue);
 +
-+instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
-+  match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
++    // Load the offset of the area within the MDO used for
++    // parameters. If it's negative we're not profiling any parameters
++    lwu(tmp1, Address(mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset())));
++    srli(tmp2, tmp1, 31);
++    bnez(tmp2, profile_continue);  // i.e. sign bit set
 +
-+  effect(TEMP tmp, KILL cr);
-+  format %{
-+    "cmpxchg_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapP_shenandoah"
-+  %}
++    // Compute a pointer to the area for parameters from the offset
++    // and move the pointer to the slot for the last
++    // parameters. Collect profiling from last parameter down.
++    // mdo start + parameters offset + array length - 1
++    add(mdp, mdp, tmp1);
++    ld(tmp1, Address(mdp, ArrayData::array_len_offset()));
++    add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count());
 +
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
-+                                                   false /* is_cae */, $res$$Register);
-+  %}
++    Label loop;
++    bind(loop);
 +
-+  ins_pipe(pipe_slow);
-+%}
++    int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0));
++    int type_base = in_bytes(ParametersTypeData::type_offset(0));
++    int per_arg_scale = exact_log2(DataLayout::cell_size);
++    add(t0, mdp, off_base);
++    add(t1, mdp, type_base);
 +
-+instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
-+  predicate(needs_acquiring_load_reserved(n));
-+  match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
++    shadd(tmp2, tmp1, t0, tmp2, per_arg_scale);
++    // load offset on the stack from the slot for this parameter
++    ld(tmp2, Address(tmp2, 0));
++    neg(tmp2, tmp2);
 +
-+  effect(TEMP tmp, KILL cr);
-+  format %{
-+    "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapNAcq_shenandoah"
-+    "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)"
-+  %}
++    // read the parameter from the local area
++    shadd(tmp2, tmp2, xlocals, tmp2, Interpreter::logStackElementSize);
++    ld(tmp2, Address(tmp2, 0));
 +
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-+    // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
-+                                                   false /* is_cae */, $res$$Register);
-+  %}
++    // profile the parameter
++    shadd(t1, tmp1, t1, t0, per_arg_scale);
++    Address arg_type(t1, 0);
++    profile_obj_type(tmp2, arg_type, tmp3);
 +
-+  ins_pipe(pipe_slow);
-+%}
++    // go to next parameter
++    add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count());
++    bgez(tmp1, loop);
 +
-+instruct weakCompareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
-+  predicate(needs_acquiring_load_reserved(n));
-+  match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
++    bind(profile_continue);
++  }
++}
 +
-+  effect(TEMP tmp, KILL cr);
-+  format %{
-+    "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapPAcq_shenandoah"
-+    "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)"
-+  %}
++void InterpreterMacroAssembler::get_method_counters(Register method,
++                                                    Register mcs, Label& skip) {
++  Label has_counters;
++  ld(mcs, Address(method, Method::method_counters_offset()));
++  bnez(mcs, has_counters);
++  call_VM(noreg, CAST_FROM_FN_PTR(address,
++          InterpreterRuntime::build_method_counters), method);
++  ld(mcs, Address(method, Method::method_counters_offset()));
++  beqz(mcs, skip); // No MethodCounters allocated, OutOfMemory
++  bind(has_counters);
++}
 +
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-+    // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
-+                                                   false /* is_cae */, $res$$Register);
-+  %}
++#ifdef ASSERT
++void InterpreterMacroAssembler::verify_access_flags(Register access_flags, uint32_t flag_bits,
++                                                    const char* msg, bool stop_by_hit) {
++  Label L;
++  andi(t0, access_flags, flag_bits);
++  if (stop_by_hit) {
++    beqz(t0, L);
++  } else {
++    bnez(t0, L);
++  }
++  stop(msg);
++  bind(L);
++}
 +
-+  ins_pipe(pipe_slow);
-+%}
-diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
++void InterpreterMacroAssembler::verify_frame_setup() {
++  Label L;
++  const Address monitor_block_top(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++  ld(t0, monitor_block_top);
++  beq(esp, t0, L);
++  stop("broken stack frame setup in interpreter");
++  bind(L);
++}
++#endif
+diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
 new file mode 100644
-index 00000000000..3d3f4d4d774
+index 0000000000..4126e8ee70
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
-@@ -0,0 +1,441 @@
++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
+@@ -0,0 +1,283 @@
 +/*
-+ * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -18106,430 +17798,573 @@ index 00000000000..3d3f4d4d774
 + *
 + */
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "code/codeBlob.hpp"
-+#include "code/vmreg.inline.hpp"
-+#include "gc/z/zBarrier.inline.hpp"
-+#include "gc/z/zBarrierSet.hpp"
-+#include "gc/z/zBarrierSetAssembler.hpp"
-+#include "gc/z/zBarrierSetRuntime.hpp"
-+#include "gc/z/zThreadLocalData.hpp"
-+#include "memory/resourceArea.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "utilities/macros.hpp"
-+#ifdef COMPILER1
-+#include "c1/c1_LIRAssembler.hpp"
-+#include "c1/c1_MacroAssembler.hpp"
-+#include "gc/z/c1/zBarrierSetC1.hpp"
-+#endif // COMPILER1
-+#ifdef COMPILER2
-+#include "gc/z/c2/zBarrierSetC2.hpp"
-+#endif // COMPILER2
-+
-+#ifdef PRODUCT
-+#define BLOCK_COMMENT(str) /* nothing */
-+#else
-+#define BLOCK_COMMENT(str) __ block_comment(str)
-+#endif
++#ifndef CPU_RISCV_INTERP_MASM_RISCV_HPP
++#define CPU_RISCV_INTERP_MASM_RISCV_HPP
 +
-+#undef __
-+#define __ masm->
++#include "asm/macroAssembler.hpp"
++#include "interpreter/invocationCounter.hpp"
++#include "runtime/frame.hpp"
 +
-+void ZBarrierSetAssembler::load_at(MacroAssembler* masm,
-+                                   DecoratorSet decorators,
-+                                   BasicType type,
-+                                   Register dst,
-+                                   Address src,
-+                                   Register tmp1,
-+                                   Register tmp_thread) {
-+  if (!ZBarrierSet::barrier_needed(decorators, type)) {
-+    // Barrier not needed
-+    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
-+    return;
-+  }
++// This file specializes the assember with interpreter-specific macros
 +
-+  assert_different_registers(t1, src.base());
-+  assert_different_registers(t0, t1, dst);
++typedef ByteSize (*OffsetFunction)(uint);
 +
-+  Label done;
++class InterpreterMacroAssembler: public MacroAssembler {
++ protected:
++  // Interpreter specific version of call_VM_base
++  using MacroAssembler::call_VM_leaf_base;
 +
-+  // Load bad mask into temp register.
-+  __ la(t0, src);
-+  __ ld(t1, address_bad_mask_from_thread(xthread));
-+  __ ld(dst, Address(t0));
++  virtual void call_VM_leaf_base(address entry_point,
++                                 int number_of_arguments);
 +
-+  // Test reference against bad mask. If mask bad, then we need to fix it up.
-+  __ andr(t1, dst, t1);
-+  __ beqz(t1, done);
++  virtual void call_VM_base(Register oop_result,
++                            Register java_thread,
++                            Register last_java_sp,
++                            address  entry_point,
++                            int number_of_arguments,
++                            bool check_exceptions);
 +
-+  __ enter();
++  // base routine for all dispatches
++  void dispatch_base(TosState state, address* table, bool verifyoop = true,
++                     bool generate_poll = false, Register Rs = t0);
 +
-+  __ push_call_clobbered_registers_except(RegSet::of(dst));
++ public:
++  InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {}
++  virtual ~InterpreterMacroAssembler() {}
 +
-+  if (c_rarg0 != dst) {
-+    __ mv(c_rarg0, dst);
-+  }
++  void load_earlyret_value(TosState state);
 +
-+  __ mv(c_rarg1, t0);
++  void jump_to_entry(address entry);
 +
-+  __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2);
++  virtual void check_and_handle_popframe(Register java_thread);
++  virtual void check_and_handle_earlyret(Register java_thread);
 +
-+  // Make sure dst has the return value.
-+  if (dst != x10) {
-+    __ mv(dst, x10);
++  // Interpreter-specific registers
++  void save_bcp() {
++    sd(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize));
 +  }
 +
-+  __ pop_call_clobbered_registers_except(RegSet::of(dst));
-+  __ leave();
++  void restore_bcp() {
++    ld(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize));
++  }
 +
-+  __ bind(done);
-+}
++  void restore_locals() {
++    ld(xlocals, Address(fp, frame::interpreter_frame_locals_offset * wordSize));
++  }
 +
-+#ifdef ASSERT
++  void restore_constant_pool_cache() {
++    ld(xcpool, Address(fp, frame::interpreter_frame_cache_offset * wordSize));
++  }
 +
-+void ZBarrierSetAssembler::store_at(MacroAssembler* masm,
-+                                    DecoratorSet decorators,
-+                                    BasicType type,
-+                                    Address dst,
-+                                    Register val,
-+                                    Register tmp1,
-+                                    Register tmp2) {
-+  // Verify value
-+  if (is_reference_type(type)) {
-+    // Note that src could be noreg, which means we
-+    // are storing null and can skip verification.
-+    if (val != noreg) {
-+      Label done;
++  void get_dispatch();
 +
-+      // tmp1 and tmp2 are often set to noreg.
-+      RegSet savedRegs = RegSet::of(t0);
-+      __ push_reg(savedRegs, sp);
++  // Helpers for runtime call arguments/results
++  void get_method(Register reg) {
++    ld(reg, Address(fp, frame::interpreter_frame_method_offset * wordSize));
++  }
 +
-+      __ ld(t0, address_bad_mask_from_thread(xthread));
-+      __ andr(t0, val, t0);
-+      __ beqz(t0, done);
-+      __ stop("Verify oop store failed");
-+      __ should_not_reach_here();
-+      __ bind(done);
-+      __ pop_reg(savedRegs, sp);
-+    }
++  void get_const(Register reg) {
++    get_method(reg);
++    ld(reg, Address(reg, in_bytes(Method::const_offset())));
 +  }
 +
-+  // Store value
-+  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
-+}
++  void get_constant_pool(Register reg) {
++    get_const(reg);
++    ld(reg, Address(reg, in_bytes(ConstMethod::constants_offset())));
++  }
 +
-+#endif // ASSERT
++  void get_constant_pool_cache(Register reg) {
++    get_constant_pool(reg);
++    ld(reg, Address(reg, ConstantPool::cache_offset_in_bytes()));
++  }
 +
-+void ZBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm,
-+                                              DecoratorSet decorators,
-+                                              bool is_oop,
-+                                              Register src,
-+                                              Register dst,
-+                                              Register count,
-+                                              RegSet saved_regs) {
-+  if (!is_oop) {
-+    // Barrier not needed
-+    return;
++  void get_cpool_and_tags(Register cpool, Register tags) {
++    get_constant_pool(cpool);
++    ld(tags, Address(cpool, ConstantPool::tags_offset_in_bytes()));
 +  }
 +
-+  BLOCK_COMMENT("ZBarrierSetAssembler::arraycopy_prologue {");
++  void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset);
++  void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_method_counters(Register method, Register mcs, Label& skip);
++
++  // Load cpool->resolved_references(index).
++  void load_resolved_reference_at_index(Register result, Register index, Register tmp = x15);
 +
-+  assert_different_registers(src, count, t0);
++  // Load cpool->resolved_klass_at(index).
++  void load_resolved_klass_at_offset(Register cpool, Register index, Register klass, Register temp);
 +
-+  __ push_reg(saved_regs, sp);
++  void pop_ptr(Register r = x10);
++  void pop_i(Register r = x10);
++  void pop_l(Register r = x10);
++  void pop_f(FloatRegister r = f10);
++  void pop_d(FloatRegister r = f10);
++  void push_ptr(Register r = x10);
++  void push_i(Register r = x10);
++  void push_l(Register r = x10);
++  void push_f(FloatRegister r = f10);
++  void push_d(FloatRegister r = f10);
 +
-+  if (count == c_rarg0 && src == c_rarg1) {
-+    // exactly backwards!!
-+    __ xorr(c_rarg0, c_rarg0, c_rarg1);
-+    __ xorr(c_rarg1, c_rarg0, c_rarg1);
-+    __ xorr(c_rarg0, c_rarg0, c_rarg1);
-+  } else {
-+    __ mv(c_rarg0, src);
-+    __ mv(c_rarg1, count);
++  void pop(TosState state); // transition vtos -> state
++  void push(TosState state); // transition state -> vtos
++
++  void empty_expression_stack() {
++    ld(esp, Address(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize));
++    // NULL last_sp until next java call
++    sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
 +  }
 +
-+  __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_array_addr(), 2);
++  // Helpers for swap and dup
++  void load_ptr(int n, Register val);
++  void store_ptr(int n, Register val);
 +
-+  __ pop_reg(saved_regs, sp);
++  // Load float value from 'address'. The value is loaded onto the FPU register v0.
++  void load_float(Address src);
++  void load_double(Address src);
 +
-+  BLOCK_COMMENT("} ZBarrierSetAssembler::arraycopy_prologue");
-+}
++  // Generate a subtype check: branch to ok_is_subtype if sub_klass is
++  // a subtype of super_klass.
++  void gen_subtype_check( Register sub_klass, Label &ok_is_subtype );
 +
-+void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm,
-+                                                         Register jni_env,
-+                                                         Register robj,
-+                                                         Register tmp,
-+                                                         Label& slowpath) {
-+  BLOCK_COMMENT("ZBarrierSetAssembler::try_resolve_jobject_in_native {");
++  // Dispatching
++  void dispatch_prolog(TosState state, int step = 0);
++  void dispatch_epilog(TosState state, int step = 0);
++  // dispatch via t0
++  void dispatch_only(TosState state, bool generate_poll = false, Register Rs = t0);
++  // dispatch normal table via t0 (assume t0 is loaded already)
++  void dispatch_only_normal(TosState state, Register Rs = t0);
++  void dispatch_only_noverify(TosState state, Register Rs = t0);
++  // load t0 from [xbcp + step] and dispatch via t0
++  void dispatch_next(TosState state, int step = 0, bool generate_poll = false);
++  // load t0 from [xbcp] and dispatch via t0 and table
++  void dispatch_via (TosState state, address* table);
 +
-+  assert_different_registers(jni_env, robj, tmp);
++  // jump to an invoked target
++  void prepare_to_jump_from_interpreted();
++  void jump_from_interpreted(Register method);
 +
-+  // Resolve jobject
-+  BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, robj, tmp, slowpath);
 +
-+  // Compute the offset of address bad mask from the field of jni_environment
-+  long int bad_mask_relative_offset = (long int) (in_bytes(ZThreadLocalData::address_bad_mask_offset()) -
-+                                                  in_bytes(JavaThread::jni_environment_offset()));
++  // Returning from interpreted functions
++  //
++  // Removes the current activation (incl. unlocking of monitors)
++  // and sets up the return address.  This code is also used for
++  // exception unwindwing. In that case, we do not want to throw
++  // IllegalMonitorStateExceptions, since that might get us into an
++  // infinite rethrow exception loop.
++  // Additionally this code is used for popFrame and earlyReturn.
++  // In popFrame case we want to skip throwing an exception,
++  // installing an exception, and notifying jvmdi.
++  // In earlyReturn case we only want to skip throwing an exception
++  // and installing an exception.
++  void remove_activation(TosState state,
++                         bool throw_monitor_exception = true,
++                         bool install_monitor_exception = true,
++                         bool notify_jvmdi = true);
 +
-+  // Load the address bad mask
-+  __ ld(tmp, Address(jni_env, bad_mask_relative_offset));
++  // FIXME: Give us a valid frame at a null check.
++  virtual void null_check(Register reg, int offset = -1) {
++        MacroAssembler::null_check(reg, offset);
++  }
 +
-+  // Check address bad mask
-+  __ andr(tmp, robj, tmp);
-+  __ bnez(tmp, slowpath);
++  // Object locking
++  void lock_object  (Register lock_reg);
++  void unlock_object(Register lock_reg);
 +
-+  BLOCK_COMMENT("} ZBarrierSetAssembler::try_resolve_jobject_in_native");
-+}
++  // Interpreter profiling operations
++  void set_method_data_pointer_for_bcp();
++  void test_method_data_pointer(Register mdp, Label& zero_continue);
++  void verify_method_data_pointer();
 +
-+#ifdef COMPILER2
++  void set_mdp_data_at(Register mdp_in, int constant, Register value);
++  void increment_mdp_data_at(Address data, bool decrement = false);
++  void increment_mdp_data_at(Register mdp_in, int constant,
++                             bool decrement = false);
++  void increment_mdp_data_at(Register mdp_in, Register reg, int constant,
++                             bool decrement = false);
++  void increment_mask_and_jump(Address counter_addr,
++                               int increment, Address mask,
++                               Register tmp1, Register tmp2,
++                               bool preloaded, Label* where);
 +
-+OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
-+  if (!OptoReg::is_reg(opto_reg)) {
-+    return OptoReg::Bad;
-+  }
++  void set_mdp_flag_at(Register mdp_in, int flag_constant);
++  void test_mdp_data_at(Register mdp_in, int offset, Register value,
++                        Register test_value_out,
++                        Label& not_equal_continue);
 +
-+  const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
-+  if (vm_reg->is_FloatRegister()) {
-+    return opto_reg & ~1;
-+  }
++  void record_klass_in_profile(Register receiver, Register mdp,
++                               Register reg2, bool is_virtual_call);
++  void record_klass_in_profile_helper(Register receiver, Register mdp,
++                                      Register reg2,
++                                      Label& done, bool is_virtual_call);
++  void record_item_in_profile_helper(Register item, Register mdp,
++                                     Register reg2, int start_row, Label& done, int total_rows,
++                                     OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
++                                     int non_profiled_offset);
 +
-+  return opto_reg;
-+}
++  void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
++  void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
++  void update_mdp_by_constant(Register mdp_in, int constant);
++  void update_mdp_for_ret(Register return_bci);
 +
-+#undef __
-+#define __ _masm->
++  // narrow int return value
++  void narrow(Register result);
 +
-+class ZSaveLiveRegisters {
-+private:
-+  MacroAssembler* const _masm;
-+  RegSet                _gp_regs;
-+  FloatRegSet           _fp_regs;
-+  VectorRegSet          _vp_regs;
++  void profile_taken_branch(Register mdp, Register bumped_count);
++  void profile_not_taken_branch(Register mdp);
++  void profile_call(Register mdp);
++  void profile_final_call(Register mdp);
++  void profile_virtual_call(Register receiver, Register mdp,
++                            Register t1,
++                            bool receiver_can_be_null = false);
++  void profile_ret(Register return_bci, Register mdp);
++  void profile_null_seen(Register mdp);
++  void profile_typecheck(Register mdp, Register klass, Register temp);
++  void profile_typecheck_failed(Register mdp);
++  void profile_switch_default(Register mdp);
++  void profile_switch_case(Register index_in_scratch, Register mdp,
++                           Register temp);
 +
-+public:
-+  void initialize(ZLoadBarrierStubC2* stub) {
-+    // Record registers that needs to be saved/restored
-+    RegMaskIterator rmi(stub->live());
-+    while (rmi.has_next()) {
-+      const OptoReg::Name opto_reg = rmi.next();
-+      if (OptoReg::is_reg(opto_reg)) {
-+        const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
-+        if (vm_reg->is_Register()) {
-+          _gp_regs += RegSet::of(vm_reg->as_Register());
-+        } else if (vm_reg->is_FloatRegister()) {
-+          _fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister());
-+        } else if (vm_reg->is_VectorRegister()) {
-+          const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~(VectorRegisterImpl::max_slots_per_register - 1));
-+          _vp_regs += VectorRegSet::of(vm_reg_base->as_VectorRegister());
-+        } else {
-+          fatal("Unknown register type");
-+        }
-+      }
-+    }
++  void profile_obj_type(Register obj, const Address& mdo_addr, Register tmp);
++  void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual);
++  void profile_return_type(Register mdp, Register ret, Register tmp);
++  void profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3);
 +
-+    // Remove C-ABI SOE registers, tmp regs and _ref register that will be updated
-+    _gp_regs -= RegSet::range(x18, x27) + RegSet::of(x2) + RegSet::of(x8, x9) + RegSet::of(x5, stub->ref());
-+  }
++  // Debugging
++  // only if +VerifyFPU  && (state == ftos || state == dtos)
++  void verify_FPU(int stack_depth, TosState state = ftos);
 +
-+  ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
-+      _masm(masm),
-+      _gp_regs(),
-+      _fp_regs(),
-+      _vp_regs() {
-+    // Figure out what registers to save/restore
-+    initialize(stub);
++  typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode;
 +
-+    // Save registers
-+    __ push_reg(_gp_regs, sp);
-+    __ push_fp(_fp_regs, sp);
-+    __ push_vp(_vp_regs, sp);
-+  }
++  // support for jvmti/dtrace
++  void notify_method_entry();
++  void notify_method_exit(TosState state, NotifyMethodExitMode mode);
 +
-+  ~ZSaveLiveRegisters() {
-+    // Restore registers
-+    __ pop_vp(_vp_regs, sp);
-+    __ pop_fp(_fp_regs, sp);
-+    __ pop_reg(_gp_regs, sp);
++  virtual void _call_Unimplemented(address call_site) {
++    save_bcp();
++    set_last_Java_frame(esp, fp, (address) pc(), t0);
++    MacroAssembler::_call_Unimplemented(call_site);
 +  }
++
++#ifdef ASSERT
++  void verify_access_flags(Register access_flags, uint32_t flag_bits,
++                           const char* msg, bool stop_by_hit = true);
++  void verify_frame_setup();
++#endif
 +};
 +
-+class ZSetupArguments {
-+private:
-+  MacroAssembler* const _masm;
-+  const Register        _ref;
-+  const Address         _ref_addr;
++#endif // CPU_RISCV_INTERP_MASM_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
+new file mode 100644
+index 0000000000..776b078723
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
+@@ -0,0 +1,295 @@
++/*
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+public:
-+  ZSetupArguments(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
-+      _masm(masm),
-+      _ref(stub->ref()),
-+      _ref_addr(stub->ref_addr()) {
-+
-+    // Setup arguments
-+    if (_ref_addr.base() == noreg) {
-+      // No self healing
-+      if (_ref != c_rarg0) {
-+        __ mv(c_rarg0, _ref);
-+      }
-+      __ mv(c_rarg1, zr);
-+    } else {
-+      // Self healing
-+      if (_ref == c_rarg0) {
-+        // _ref is already at correct place
-+        __ la(c_rarg1, _ref_addr);
-+      } else if (_ref != c_rarg1) {
-+        // _ref is in wrong place, but not in c_rarg1, so fix it first
-+        __ la(c_rarg1, _ref_addr);
-+        __ mv(c_rarg0, _ref);
-+      } else if (_ref_addr.base() != c_rarg0) {
-+        assert(_ref == c_rarg1, "Mov ref first, vacating c_rarg0");
-+        __ mv(c_rarg0, _ref);
-+        __ la(c_rarg1, _ref_addr);
-+      } else {
-+        assert(_ref == c_rarg1, "Need to vacate c_rarg1 and _ref_addr is using c_rarg0");
-+        if (_ref_addr.base() == c_rarg0) {
-+          __ mv(t1, c_rarg1);
-+          __ la(c_rarg1, _ref_addr);
-+          __ mv(c_rarg0, t1);
-+        } else {
-+          ShouldNotReachHere();
-+        }
-+      }
-+    }
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "memory/allocation.inline.hpp"
++#include "memory/universe.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/icache.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/signature.hpp"
++
++#define __ _masm->
++
++// Implementation of SignatureHandlerGenerator
++Register InterpreterRuntime::SignatureHandlerGenerator::from() { return xlocals; }
++Register InterpreterRuntime::SignatureHandlerGenerator::to()   { return sp; }
++Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return t0; }
++
++Register InterpreterRuntime::SignatureHandlerGenerator::next_gpr() {
++  if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) {
++    return g_INTArgReg[++_num_reg_int_args];
 +  }
++  return noreg;
++}
 +
-+  ~ZSetupArguments() {
-+    // Transfer result
-+    if (_ref != x10) {
-+      __ mv(_ref, x10);
-+    }
++FloatRegister InterpreterRuntime::SignatureHandlerGenerator::next_fpr() {
++  if (_num_reg_fp_args < Argument::n_float_register_parameters_c) {
++    return g_FPArgReg[_num_reg_fp_args++];
++  } else {
++    return fnoreg;
 +  }
-+};
++}
 +
-+#undef __
-+#define __ masm->
++int InterpreterRuntime::SignatureHandlerGenerator::next_stack_offset() {
++  int ret = _stack_offset;
++  _stack_offset += wordSize;
++  return ret;
++}
 +
-+void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const {
-+  BLOCK_COMMENT("ZLoadBarrierStubC2");
++InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator(
++  const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
++  _masm = new MacroAssembler(buffer); // allocate on resourse area by default
++  _num_reg_int_args = (method->is_static() ? 1 : 0);
++  _num_reg_fp_args = 0;
++  _stack_offset = 0;
++}
 +
-+  // Stub entry
-+  __ bind(*stub->entry());
++void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
++  const Address src(from(), Interpreter::local_offset_in_bytes(offset()));
 +
-+  {
-+    ZSaveLiveRegisters save_live_registers(masm, stub);
-+    ZSetupArguments setup_arguments(masm, stub);
-+    int32_t offset = 0;
-+    __ la_patchable(t0, stub->slow_path(), offset);
-+    __ jalr(x1, t0, offset);
++  Register reg = next_gpr();
++  if (reg != noreg) {
++    __ lw(reg, src);
++  } else {
++    __ lw(x10, src);
++    __ sw(x10, Address(to(), next_stack_offset()));
 +  }
-+
-+  // Stub exit
-+  __ j(*stub->continuation());
 +}
 +
-+#undef __
++void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
++  const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1));
 +
-+#endif // COMPILER2
++  Register reg = next_gpr();
++  if (reg != noreg) {
++    __ ld(reg, src);
++  } else  {
++    __ ld(x10, src);
++    __ sd(x10, Address(to(), next_stack_offset()));
++  }
++}
 +
-+#ifdef COMPILER1
-+#undef __
-+#define __ ce->masm()->
++void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
++  const Address src(from(), Interpreter::local_offset_in_bytes(offset()));
 +
-+void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce,
-+                                                         LIR_Opr ref) const {
-+  assert_different_registers(xthread, ref->as_register(), t1);
-+  __ ld(t1, address_bad_mask_from_thread(xthread));
-+  __ andr(t1, t1, ref->as_register());
++  FloatRegister reg = next_fpr();
++  if (reg != fnoreg) {
++    __ flw(reg, src);
++  } else {
++    // a floating-point argument is passed according to the integer calling
++    // convention if no floating-point argument register available
++    pass_int();
++  }
 +}
 +
-+void ZBarrierSetAssembler::generate_c1_load_barrier_stub(LIR_Assembler* ce,
-+                                                         ZLoadBarrierStubC1* stub) const {
-+  // Stub entry
-+  __ bind(*stub->entry());
++void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
++  const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1));
 +
-+  Register ref = stub->ref()->as_register();
-+  Register ref_addr = noreg;
-+  Register tmp = noreg;
++  FloatRegister reg = next_fpr();
++  if (reg != fnoreg) {
++    __ fld(reg, src);
++  } else {
++    // a floating-point argument is passed according to the integer calling
++    // convention if no floating-point argument register available
++    pass_long();
++  }
++}
 +
-+  if (stub->tmp()->is_valid()) {
-+    // Load address into tmp register
-+    ce->leal(stub->ref_addr(), stub->tmp());
-+    ref_addr = tmp = stub->tmp()->as_pointer_register();
++void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
++  Register reg = next_gpr();
++  if (reg == c_rarg1) {
++    assert(offset() == 0, "argument register 1 can only be (non-null) receiver");
++    __ addi(c_rarg1, from(), Interpreter::local_offset_in_bytes(offset()));
++  } else if (reg != noreg) {
++      // c_rarg2-c_rarg7
++      __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset()));
++      __ mv(reg, zr); //_num_reg_int_args:c_rarg -> 1:c_rarg2,  2:c_rarg3...
++      __ ld(temp(), x10);
++      Label L;
++      __ beqz(temp(), L);
++      __ mv(reg, x10);
++      __ bind(L);
 +  } else {
-+    // Address already in register
-+    ref_addr = stub->ref_addr()->as_address_ptr()->base()->as_pointer_register();
++    //to stack
++    __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset()));
++    __ ld(temp(), x10);
++    Label L;
++    __ bnez(temp(), L);
++    __ mv(x10, zr);
++    __ bind(L);
++    assert(sizeof(jobject) == wordSize, "");
++    __ sd(x10, Address(to(), next_stack_offset()));
 +  }
++}
 +
-+  assert_different_registers(ref, ref_addr, noreg);
++void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
++  // generate code to handle arguments
++  iterate(fingerprint);
 +
-+  // Save x10 unless it is the result or tmp register
-+  // Set up SP to accomodate parameters and maybe x10.
-+  if (ref != x10 && tmp != x10) {
-+    __ sub(sp, sp, 32);
-+    __ sd(x10, Address(sp, 16));
-+  } else {
-+    __ sub(sp, sp, 16);
++  // return result handler
++  __ la(x10, ExternalAddress(Interpreter::result_handler(method()->result_type())));
++  __ ret();
++
++  __ flush();
++}
++
++
++// Implementation of SignatureHandlerLibrary
++
++void SignatureHandlerLibrary::pd_set_handler(address handler) {}
++
++
++class SlowSignatureHandler
++  : public NativeSignatureIterator {
++ private:
++  address   _from;
++  intptr_t* _to;
++  intptr_t* _int_args;
++  intptr_t* _fp_args;
++  intptr_t* _fp_identifiers;
++  unsigned int _num_reg_int_args;
++  unsigned int _num_reg_fp_args;
++
++  intptr_t* single_slot_addr() {
++    intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++    return from_addr;
 +  }
 +
-+  // Setup arguments and call runtime stub
-+  ce->store_parameter(ref_addr, 1);
-+  ce->store_parameter(ref, 0);
++  intptr_t* double_slot_addr() {
++    intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(1));
++    _from -= 2 * Interpreter::stackElementSize;
++    return from_addr;
++  }
 +
-+  __ far_call(stub->runtime_stub());
++  int pass_gpr(intptr_t value) {
++    if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) {
++      *_int_args++ = value;
++      return _num_reg_int_args++;
++    }
++    return -1;
++  }
 +
-+  // Verify result
-+  __ verify_oop(x10, "Bad oop");
++  int pass_fpr(intptr_t value) {
++    if (_num_reg_fp_args < Argument::n_float_register_parameters_c) {
++      *_fp_args++ = value;
++      return _num_reg_fp_args++;
++    }
++    return -1;
++  }
 +
++  void pass_stack(intptr_t value) {
++    *_to++ = value;
++  }
 +
-+  // Move result into place
-+  if (ref != x10) {
-+    __ mv(ref, x10);
++  virtual void pass_int() {
++    jint value = *(jint*)single_slot_addr();
++    if (pass_gpr(value) < 0) {
++      pass_stack(value);
++    }
 +  }
 +
-+  // Restore x10 unless it is the result or tmp register
-+  if (ref != x10 && tmp != x10) {
-+    __ ld(x10, Address(sp, 16));
-+    __ add(sp, sp, 32);
-+  } else {
-+    __ add(sp, sp, 16);
++  virtual void pass_long() {
++    intptr_t value = *double_slot_addr();
++    if (pass_gpr(value) < 0) {
++      pass_stack(value);
++    }
 +  }
 +
-+  // Stub exit
-+  __ j(*stub->continuation());
-+}
++  virtual void pass_object() {
++    intptr_t* addr = single_slot_addr();
++    intptr_t value = *addr == 0 ? NULL : (intptr_t)addr;
++    if (pass_gpr(value) < 0) {
++      pass_stack(value);
++    }
++  }
 +
-+#undef __
-+#define __ sasm->
++  virtual void pass_float() {
++    jint value = *(jint*) single_slot_addr();
++    // a floating-point argument is passed according to the integer calling
++    // convention if no floating-point argument register available
++    if (pass_fpr(value) < 0 && pass_gpr(value) < 0) {
++      pass_stack(value);
++    }
++  }
++
++  virtual void pass_double() {
++    intptr_t value = *double_slot_addr();
++    int arg = pass_fpr(value);
++    if (0 <= arg) {
++      *_fp_identifiers |= (1ull << arg); // mark as double
++    } else if (pass_gpr(value) < 0) { // no need to mark if passing by integer registers or stack
++      pass_stack(value);
++    }
++  }
 +
-+void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler* sasm,
-+                                                                 DecoratorSet decorators) const {
-+  __ prologue("zgc_load_barrier stub", false);
++ public:
++  SlowSignatureHandler(const methodHandle& method, address from, intptr_t* to)
++    : NativeSignatureIterator(method)
++  {
++    _from = from;
++    _to   = to;
 +
-+  __ push_call_clobbered_registers_except(RegSet::of(x10));
++    _int_args = to - (method->is_static() ? 16 : 17);
++    _fp_args  = to - 8;
++    _fp_identifiers = to - 9;
++    *(int*) _fp_identifiers = 0;
++    _num_reg_int_args = (method->is_static() ? 1 : 0);
++    _num_reg_fp_args = 0;
++  }
 +
-+  // Setup arguments
-+  __ load_parameter(0, c_rarg0);
-+  __ load_parameter(1, c_rarg1);
++  ~SlowSignatureHandler()
++  {
++    _from           = NULL;
++    _to             = NULL;
++    _int_args       = NULL;
++    _fp_args        = NULL;
++    _fp_identifiers = NULL;
++  }
++};
 +
-+  __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2);
 +
-+  __ pop_call_clobbered_registers_except(RegSet::of(x10));
++IRT_ENTRY(address,
++          InterpreterRuntime::slow_signature_handler(JavaThread* thread,
++                                                     Method* method,
++                                                     intptr_t* from,
++                                                     intptr_t* to))
++  methodHandle m(thread, (Method*)method);
++  assert(m->is_native(), "sanity check");
 +
-+  __ epilogue();
-+}
++  // handle arguments
++  SlowSignatureHandler ssh(m, (address)from, to);
++  ssh.iterate(UCONST64(-1));
 +
-+#undef __
-+#endif // COMPILER1
-diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp
++  // return result handler
++  return Interpreter::result_handler(m->result_type());
++IRT_END
+diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp
 new file mode 100644
-index 00000000000..dc07ab635fe
+index 0000000000..05df63ba2a
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp
-@@ -0,0 +1,101 @@
++++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp
+@@ -0,0 +1,68 @@
 +/*
-+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -18553,91 +18388,57 @@ index 00000000000..dc07ab635fe
 + *
 + */
 +
-+#ifndef CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP
-+#define CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP
++#ifndef CPU_RISCV_INTERPRETERRT_RISCV_HPP
++#define CPU_RISCV_INTERPRETERRT_RISCV_HPP
 +
-+#include "code/vmreg.hpp"
-+#include "oops/accessDecorators.hpp"
-+#ifdef COMPILER2
-+#include "opto/optoreg.hpp"
-+#endif // COMPILER2
-+
-+#ifdef COMPILER1
-+class LIR_Assembler;
-+class LIR_Opr;
-+class StubAssembler;
-+class ZLoadBarrierStubC1;
-+#endif // COMPILER1
-+
-+#ifdef COMPILER2
-+class Node;
-+class ZLoadBarrierStubC2;
-+#endif // COMPILER2
-+
-+class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase {
-+public:
-+  virtual void load_at(MacroAssembler* masm,
-+                       DecoratorSet decorators,
-+                       BasicType type,
-+                       Register dst,
-+                       Address src,
-+                       Register tmp1,
-+                       Register tmp_thread);
-+
-+#ifdef ASSERT
-+  virtual void store_at(MacroAssembler* masm,
-+                        DecoratorSet decorators,
-+                        BasicType type,
-+                        Address dst,
-+                        Register val,
-+                        Register tmp1,
-+                        Register tmp2);
-+#endif // ASSERT
++// This is included in the middle of class Interpreter.
++// Do not include files here.
 +
-+  virtual void arraycopy_prologue(MacroAssembler* masm,
-+                                  DecoratorSet decorators,
-+                                  bool is_oop,
-+                                  Register src,
-+                                  Register dst,
-+                                  Register count,
-+                                  RegSet saved_regs);
++// native method calls
 +
-+  virtual void try_resolve_jobject_in_native(MacroAssembler* masm,
-+                                             Register jni_env,
-+                                             Register robj,
-+                                             Register tmp,
-+                                             Label& slowpath);
++class SignatureHandlerGenerator: public NativeSignatureIterator {
++ private:
++  MacroAssembler* _masm;
++  unsigned int _num_reg_fp_args;
++  unsigned int _num_reg_int_args;
++  int _stack_offset;
 +
-+#ifdef COMPILER1
-+  void generate_c1_load_barrier_test(LIR_Assembler* ce,
-+                                     LIR_Opr ref) const;
++  void pass_int();
++  void pass_long();
++  void pass_float();
++  void pass_double();
++  void pass_object();
 +
-+  void generate_c1_load_barrier_stub(LIR_Assembler* ce,
-+                                     ZLoadBarrierStubC1* stub) const;
++  Register next_gpr();
++  FloatRegister next_fpr();
++  int next_stack_offset();
 +
-+  void generate_c1_load_barrier_runtime_stub(StubAssembler* sasm,
-+                                             DecoratorSet decorators) const;
-+#endif // COMPILER1
++ public:
++  // Creation
++  SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer);
++  virtual ~SignatureHandlerGenerator() {
++    _masm = NULL;
++  }
 +
-+#ifdef COMPILER2
-+  OptoReg::Name refine_register(const Node* node,
-+                                OptoReg::Name opto_reg);
++  // Code generation
++  void generate(uint64_t fingerprint);
 +
-+  void generate_c2_load_barrier_stub(MacroAssembler* masm,
-+                                     ZLoadBarrierStubC2* stub) const;
-+#endif // COMPILER2
++  // Code generation support
++  static Register from();
++  static Register to();
++  static Register temp();
 +};
 +
-+#endif // CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp
++#endif // CPU_RISCV_INTERPRETERRT_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
 new file mode 100644
-index 00000000000..d14997790af
+index 0000000000..5a0c9b812f
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp
-@@ -0,0 +1,212 @@
++++ b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
+@@ -0,0 +1,89 @@
 +/*
-+ * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -18660,201 +18461,79 @@ index 00000000000..d14997790af
 + *
 + */
 +
-+#include "precompiled.hpp"
-+#include "gc/shared/gcLogPrecious.hpp"
-+#include "gc/shared/gc_globals.hpp"
-+#include "gc/z/zGlobals.hpp"
-+#include "runtime/globals.hpp"
-+#include "runtime/os.hpp"
-+#include "utilities/globalDefinitions.hpp"
-+#include "utilities/powerOfTwo.hpp"
++#ifndef CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
++#define CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
 +
-+#ifdef LINUX
-+#include <sys/mman.h>
-+#endif // LINUX
++private:
 +
-+//
-+// The heap can have three different layouts, depending on the max heap size.
-+//
-+// Address Space & Pointer Layout 1
-+// --------------------------------
-+//
-+//  +--------------------------------+ 0x00007FFFFFFFFFFF (127TB)
-+//  .                                .
-+//  .                                .
-+//  .                                .
-+//  +--------------------------------+ 0x0000014000000000 (20TB)
-+//  |         Remapped View          |
-+//  +--------------------------------+ 0x0000010000000000 (16TB)
-+//  .                                .
-+//  +--------------------------------+ 0x00000c0000000000 (12TB)
-+//  |         Marked1 View           |
-+//  +--------------------------------+ 0x0000080000000000 (8TB)
-+//  |         Marked0 View           |
-+//  +--------------------------------+ 0x0000040000000000 (4TB)
-+//  .                                .
-+//  +--------------------------------+ 0x0000000000000000
-+//
-+//   6                  4 4  4 4
-+//   3                  6 5  2 1                                             0
-+//  +--------------------+----+-----------------------------------------------+
-+//  |00000000 00000000 00|1111|11 11111111 11111111 11111111 11111111 11111111|
-+//  +--------------------+----+-----------------------------------------------+
-+//  |                    |    |
-+//  |                    |    * 41-0 Object Offset (42-bits, 4TB address space)
-+//  |                    |
-+//  |                    * 45-42 Metadata Bits (4-bits)  0001 = Marked0      (Address view 4-8TB)
-+//  |                                                    0010 = Marked1      (Address view 8-12TB)
-+//  |                                                    0100 = Remapped     (Address view 16-20TB)
-+//  |                                                    1000 = Finalizable  (Address view N/A)
-+//  |
-+//  * 63-46 Fixed (18-bits, always zero)
-+//
-+//
-+// Address Space & Pointer Layout 2
-+// --------------------------------
-+//
-+//  +--------------------------------+ 0x00007FFFFFFFFFFF (127TB)
-+//  .                                .
-+//  .                                .
-+//  .                                .
-+//  +--------------------------------+ 0x0000280000000000 (40TB)
-+//  |         Remapped View          |
-+//  +--------------------------------+ 0x0000200000000000 (32TB)
-+//  .                                .
-+//  +--------------------------------+ 0x0000180000000000 (24TB)
-+//  |         Marked1 View           |
-+//  +--------------------------------+ 0x0000100000000000 (16TB)
-+//  |         Marked0 View           |
-+//  +--------------------------------+ 0x0000080000000000 (8TB)
-+//  .                                .
-+//  +--------------------------------+ 0x0000000000000000
-+//
-+//   6                 4 4  4 4
-+//   3                 7 6  3 2                                              0
-+//  +------------------+-----+------------------------------------------------+
-+//  |00000000 00000000 0|1111|111 11111111 11111111 11111111 11111111 11111111|
-+//  +-------------------+----+------------------------------------------------+
-+//  |                   |    |
-+//  |                   |    * 42-0 Object Offset (43-bits, 8TB address space)
-+//  |                   |
-+//  |                   * 46-43 Metadata Bits (4-bits)  0001 = Marked0      (Address view 8-16TB)
-+//  |                                                   0010 = Marked1      (Address view 16-24TB)
-+//  |                                                   0100 = Remapped     (Address view 32-40TB)
-+//  |                                                   1000 = Finalizable  (Address view N/A)
-+//  |
-+//  * 63-47 Fixed (17-bits, always zero)
-+//
-+//
-+// Address Space & Pointer Layout 3
-+// --------------------------------
-+//
-+//  +--------------------------------+ 0x00007FFFFFFFFFFF (127TB)
-+//  .                                .
-+//  .                                .
-+//  .                                .
-+//  +--------------------------------+ 0x0000500000000000 (80TB)
-+//  |         Remapped View          |
-+//  +--------------------------------+ 0x0000400000000000 (64TB)
-+//  .                                .
-+//  +--------------------------------+ 0x0000300000000000 (48TB)
-+//  |         Marked1 View           |
-+//  +--------------------------------+ 0x0000200000000000 (32TB)
-+//  |         Marked0 View           |
-+//  +--------------------------------+ 0x0000100000000000 (16TB)
-+//  .                                .
-+//  +--------------------------------+ 0x0000000000000000
-+//
-+//   6               4  4  4 4
-+//   3               8  7  4 3                                               0
-+//  +------------------+----+-------------------------------------------------+
-+//  |00000000 00000000 |1111|1111 11111111 11111111 11111111 11111111 11111111|
-+//  +------------------+----+-------------------------------------------------+
-+//  |                  |    |
-+//  |                  |    * 43-0 Object Offset (44-bits, 16TB address space)
-+//  |                  |
-+//  |                  * 47-44 Metadata Bits (4-bits)  0001 = Marked0      (Address view 16-32TB)
-+//  |                                                  0010 = Marked1      (Address view 32-48TB)
-+//  |                                                  0100 = Remapped     (Address view 64-80TB)
-+//  |                                                  1000 = Finalizable  (Address view N/A)
-+//  |
-+//  * 63-48 Fixed (16-bits, always zero)
-+//
++  // FP value associated with _last_Java_sp:
++  intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to
 +
-+// Default value if probing is not implemented for a certain platform: 128TB
-+static const size_t DEFAULT_MAX_ADDRESS_BIT = 47;
-+// Minimum value returned, if probing fails: 64GB
-+static const size_t MINIMUM_MAX_ADDRESS_BIT = 36;
-+
-+static size_t probe_valid_max_address_bit() {
-+#ifdef LINUX
-+  size_t max_address_bit = 0;
-+  const size_t page_size = os::vm_page_size();
-+  for (size_t i = DEFAULT_MAX_ADDRESS_BIT; i > MINIMUM_MAX_ADDRESS_BIT; --i) {
-+    const uintptr_t base_addr = ((uintptr_t) 1U) << i;
-+    if (msync((void*)base_addr, page_size, MS_ASYNC) == 0) {
-+      // msync suceeded, the address is valid, and maybe even already mapped.
-+      max_address_bit = i;
-+      break;
-+    }
-+    if (errno != ENOMEM) {
-+      // Some error occured. This should never happen, but msync
-+      // has some undefined behavior, hence ignore this bit.
-+#ifdef ASSERT
-+      fatal("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno));
-+#else // ASSERT
-+      log_warning_p(gc)("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno));
-+#endif // ASSERT
-+      continue;
-+    }
-+    // Since msync failed with ENOMEM, the page might not be mapped.
-+    // Try to map it, to see if the address is valid.
-+    void* const result_addr = mmap((void*) base_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0);
-+    if (result_addr != MAP_FAILED) {
-+      munmap(result_addr, page_size);
-+    }
-+    if ((uintptr_t) result_addr == base_addr) {
-+      // address is valid
-+      max_address_bit = i;
-+      break;
-+    }
++public:
++  // Each arch must define reset, save, restore
++  // These are used by objects that only care about:
++  //  1 - initializing a new state (thread creation, javaCalls)
++  //  2 - saving a current state (javaCalls)
++  //  3 - restoring an old state (javaCalls)
++
++  void clear(void) {
++    // clearing _last_Java_sp must be first
++    _last_Java_sp = NULL;
++    OrderAccess::release();
++    _last_Java_fp = NULL;
++    _last_Java_pc = NULL;
 +  }
-+  if (max_address_bit == 0) {
-+    // probing failed, allocate a very high page and take that bit as the maximum
-+    const uintptr_t high_addr = ((uintptr_t) 1U) << DEFAULT_MAX_ADDRESS_BIT;
-+    void* const result_addr = mmap((void*) high_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0);
-+    if (result_addr != MAP_FAILED) {
-+      max_address_bit = BitsPerSize_t - count_leading_zeros((size_t) result_addr) - 1;
-+      munmap(result_addr, page_size);
++
++  void copy(JavaFrameAnchor* src) {
++    // In order to make sure the transition state is valid for "this"
++    // We must clear _last_Java_sp before copying the rest of the new data
++    //
++    // Hack Alert: Temporary bugfix for 4717480/4721647
++    // To act like previous version (pd_cache_state) don't NULL _last_Java_sp
++    // unless the value is changing
++    //
++    assert(src != NULL, "Src should not be NULL.");
++    if (_last_Java_sp != src->_last_Java_sp) {
++      _last_Java_sp = NULL;
++      OrderAccess::release();
 +    }
++    _last_Java_fp = src->_last_Java_fp;
++    _last_Java_pc = src->_last_Java_pc;
++    // Must be last so profiler will always see valid frame if has_last_frame() is true
++    _last_Java_sp = src->_last_Java_sp;
 +  }
-+  log_info_p(gc, init)("Probing address space for the highest valid bit: " SIZE_FORMAT, max_address_bit);
-+  return MAX2(max_address_bit, MINIMUM_MAX_ADDRESS_BIT);
-+#else // LINUX
-+  return DEFAULT_MAX_ADDRESS_BIT;
-+#endif // LINUX
-+}
 +
-+size_t ZPlatformAddressOffsetBits() {
-+  const static size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1;
-+  const size_t max_address_offset_bits = valid_max_address_offset_bits - 3;
-+  const size_t min_address_offset_bits = max_address_offset_bits - 2;
-+  const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio);
-+  const size_t address_offset_bits = log2i_exact(address_offset);
-+  return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits);
-+}
++  bool walkable(void)                            { return _last_Java_sp != NULL && _last_Java_pc != NULL; }
++  void make_walkable(JavaThread* thread);
++  void capture_last_Java_pc(void);
 +
-+size_t ZPlatformAddressMetadataShift() {
-+  return ZPlatformAddressOffsetBits();
-+}
-diff --git a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp
++  intptr_t* last_Java_sp(void) const             { return _last_Java_sp; }
++
++  const address last_Java_pc(void)               { return _last_Java_pc; }
++
++private:
++
++  static ByteSize last_Java_fp_offset()          { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); }
++
++public:
++
++  void set_last_Java_sp(intptr_t* java_sp)       { _last_Java_sp = java_sp; OrderAccess::release(); }
++
++  intptr_t* last_Java_fp(void)                   { return _last_Java_fp; }
++
++  // Assert (last_Java_sp == NULL || fp == NULL)
++  void set_last_Java_fp(intptr_t* fp)            { OrderAccess::release(); _last_Java_fp = fp; }
++
++#endif // CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
 new file mode 100644
-index 00000000000..f20ecd9b073
+index 0000000000..f6e7351c4f
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp
-@@ -0,0 +1,36 @@
++++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
+@@ -0,0 +1,194 @@
 +/*
-+ * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2004, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -18878,265 +18557,182 @@ index 00000000000..f20ecd9b073
 + *
 + */
 +
-+#ifndef CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP
-+#define CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP
-+
-+const size_t ZPlatformGranuleSizeShift = 21; // 2MB
-+const size_t ZPlatformHeapViews        = 3;
-+const size_t ZPlatformCacheLineSize    = 64;
-+
-+size_t ZPlatformAddressOffsetBits();
-+size_t ZPlatformAddressMetadataShift();
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "memory/resourceArea.hpp"
++#include "prims/jniFastGetField.hpp"
++#include "prims/jvm_misc.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "runtime/safepoint.hpp"
 +
-+#endif // CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad b/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad
-new file mode 100644
-index 00000000000..6b6f87814a5
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad
-@@ -0,0 +1,233 @@
-+//
-+// Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
-+// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+//
-+// This code is free software; you can redistribute it and/or modify it
-+// under the terms of the GNU General Public License version 2 only, as
-+// published by the Free Software Foundation.
-+//
-+// This code is distributed in the hope that it will be useful, but WITHOUT
-+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+// version 2 for more details (a copy is included in the LICENSE file that
-+// accompanied this code).
-+//
-+// You should have received a copy of the GNU General Public License version
-+// 2 along with this work; if not, write to the Free Software Foundation,
-+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+//
-+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+// or visit www.oracle.com if you need additional information or have any
-+// questions.
-+//
++#define __ masm->
 +
-+source_hpp %{
++#define BUFFER_SIZE 30*wordSize
 +
-+#include "gc/shared/gc_globals.hpp"
-+#include "gc/z/c2/zBarrierSetC2.hpp"
-+#include "gc/z/zThreadLocalData.hpp"
++// Instead of issuing a LoadLoad barrier we create an address
++// dependency between loads; this might be more efficient.
 +
-+%}
++// Common register usage:
++// x10/f10:      result
++// c_rarg0:    jni env
++// c_rarg1:    obj
++// c_rarg2:    jfield id
 +
-+source %{
++static const Register robj          = x13;
++static const Register rcounter      = x14;
++static const Register roffset       = x15;
++static const Register rcounter_addr = x16;
++static const Register result        = x17;
 +
-+static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, int barrier_data) {
-+  if (barrier_data == ZLoadBarrierElided) {
-+    return;
++address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
++  const char *name;
++  switch (type) {
++    case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break;
++    case T_BYTE:    name = "jni_fast_GetByteField";    break;
++    case T_CHAR:    name = "jni_fast_GetCharField";    break;
++    case T_SHORT:   name = "jni_fast_GetShortField";   break;
++    case T_INT:     name = "jni_fast_GetIntField";     break;
++    case T_LONG:    name = "jni_fast_GetLongField";    break;
++    case T_FLOAT:   name = "jni_fast_GetFloatField";   break;
++    case T_DOUBLE:  name = "jni_fast_GetDoubleField";  break;
++    default:        ShouldNotReachHere();
++      name = NULL;  // unreachable
 +  }
-+  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, barrier_data);
-+  __ ld(tmp, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
-+  __ andr(tmp, tmp, ref);
-+  __ bnez(tmp, *stub->entry(), true /* far */);
-+  __ bind(*stub->continuation());
-+}
-+
-+static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
-+  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, ZLoadBarrierStrong);
-+  __ j(*stub->entry());
-+  __ bind(*stub->continuation());
-+}
-+
-+%}
-+
-+// Load Pointer
-+instruct zLoadP(iRegPNoSp dst, memory mem)
-+%{
-+  match(Set dst (LoadP mem));
-+  predicate(UseZGC && (n->as_Load()->barrier_data() != 0));
-+  effect(TEMP dst);
-+
-+  ins_cost(4 * DEFAULT_COST);
-+
-+  format %{ "ld  $dst, $mem, #@zLoadP" %}
-+
-+  ins_encode %{
-+    const Address ref_addr (as_Register($mem$$base), $mem$$disp);
-+    __ ld($dst$$Register, ref_addr);
-+    z_load_barrier(_masm, this, ref_addr, $dst$$Register, t0 /* tmp */, barrier_data());
-+  %}
-+
-+  ins_pipe(iload_reg_mem);
-+%}
-+
-+instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
-+  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
-+  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
-+  predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
-+  effect(KILL cr, TEMP_DEF res);
-+
-+  ins_cost(2 * VOLATILE_REF_COST);
-+
-+  format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapP\n\t"
-+            "mv $res, $res == $oldval" %}
++  ResourceMark rm;
++  BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE);
++  CodeBuffer cbuf(blob);
++  MacroAssembler* masm = new MacroAssembler(&cbuf);
++  address fast_entry = __ pc();
 +
-+  ins_encode %{
-+    Label failed;
-+    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
-+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
-+               Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
-+               true /* result_as_bool */);
-+    __ beqz($res$$Register, failed);
-+    __ mv(t0, $oldval$$Register);
-+    __ bind(failed);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      Label good;
-+      __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */);
-+      __ andr(t1, t1, t0);
-+      __ beqz(t1, good);
-+      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */);
-+      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
-+                 Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
-+                 true /* result_as_bool */);
-+      __ bind(good);
-+    }
-+  %}
++  Label slow;
++  int32_t offset = 0;
++  __ la_patchable(rcounter_addr, SafepointSynchronize::safepoint_counter_addr(), offset);
++  __ addi(rcounter_addr, rcounter_addr, offset);
 +
-+  ins_pipe(pipe_slow);
-+%}
++  Address safepoint_counter_addr(rcounter_addr, 0);
++  __ lwu(rcounter, safepoint_counter_addr);
++  // An even value means there are no ongoing safepoint operations
++  __ andi(t0, rcounter, 1);
++  __ bnez(t0, slow);
++  __ xorr(robj, c_rarg1, rcounter);
++  __ xorr(robj, robj, rcounter);               // obj, since
++                                               // robj ^ rcounter ^ rcounter == robj
++                                               // robj is address dependent on rcounter.
 +
-+instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
-+  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
-+  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
-+  predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong));
-+  effect(KILL cr, TEMP_DEF res);
++  // Both robj and t0 are clobbered by try_resolve_jobject_in_native.
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  assert_cond(bs != NULL);
++  bs->try_resolve_jobject_in_native(masm, c_rarg0, robj, t0, slow);
 +
-+  ins_cost(2 * VOLATILE_REF_COST);
++  __ srli(roffset, c_rarg2, 2);                // offset
 +
-+  format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapPAcq\n\t"
-+            "mv $res, $res == $oldval" %}
++  assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
++  speculative_load_pclist[count] = __ pc();   // Used by the segfault handler
++  __ add(roffset, robj, roffset);
 +
-+  ins_encode %{
-+    Label failed;
-+    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
-+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
-+               Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
-+               true /* result_as_bool */);
-+    __ beqz($res$$Register, failed);
-+    __ mv(t0, $oldval$$Register);
-+    __ bind(failed);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      Label good;
-+      __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */);
-+      __ andr(t1, t1, t0);
-+      __ beqz(t1, good);
-+      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */);
-+      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
-+                 Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
-+                 true /* result_as_bool */);
-+      __ bind(good);
++  switch (type) {
++    case T_BOOLEAN: __ lbu(result, Address(roffset, 0)); break;
++    case T_BYTE:    __ lb(result, Address(roffset, 0)); break;
++    case T_CHAR:    __ lhu(result, Address(roffset, 0)); break;
++    case T_SHORT:   __ lh(result, Address(roffset, 0)); break;
++    case T_INT:     __ lw(result, Address(roffset, 0)); break;
++    case T_LONG:    __ ld(result, Address(roffset, 0)); break;
++    case T_FLOAT: {
++      __ flw(f28, Address(roffset, 0)); // f28 as temporaries
++      __ fmv_x_w(result, f28); // f{31--0}-->x
++      break;
 +    }
-+  %}
-+
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{
-+  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
-+  predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
-+  effect(TEMP_DEF res);
-+
-+  ins_cost(2 * VOLATILE_REF_COST);
-+
-+  format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangeP" %}
-+
-+  ins_encode %{
-+    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
-+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
-+               Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      Label good;
-+      __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
-+      __ andr(t0, t0, $res$$Register);
-+      __ beqz(t0, good);
-+      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */);
-+      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
-+                 Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register);
-+      __ bind(good);
++    case T_DOUBLE: {
++      __ fld(f28, Address(roffset, 0)); // f28 as temporaries
++      __ fmv_x_d(result, f28); // d{63--0}-->x
++      break;
 +    }
-+  %}
-+
-+  ins_pipe(pipe_slow);
-+%}
++    default:        ShouldNotReachHere();
++  }
 +
-+instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{
-+  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
-+  predicate(UseZGC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
-+  effect(TEMP_DEF res);
++  __ xorr(rcounter_addr, rcounter_addr, result);
++  __ xorr(rcounter_addr, rcounter_addr, result);
++  __ lw(t0, safepoint_counter_addr);
++  __ bne(rcounter, t0, slow);
 +
-+  ins_cost(2 * VOLATILE_REF_COST);
++  switch (type) {
++    case T_FLOAT:   __ fmv_w_x(f10, result); break;
++    case T_DOUBLE:  __ fmv_d_x(f10, result); break;
++    default:        __ mv(x10, result);   break;
++  }
++  __ ret();
 +
-+  format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangePAcq" %}
++  slowcase_entry_pclist[count++] = __ pc();
++  __ bind(slow);
++  address slow_case_addr;
++  switch (type) {
++    case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break;
++    case T_BYTE:    slow_case_addr = jni_GetByteField_addr();    break;
++    case T_CHAR:    slow_case_addr = jni_GetCharField_addr();    break;
++    case T_SHORT:   slow_case_addr = jni_GetShortField_addr();   break;
++    case T_INT:     slow_case_addr = jni_GetIntField_addr();     break;
++    case T_LONG:    slow_case_addr = jni_GetLongField_addr();    break;
++    case T_FLOAT:   slow_case_addr = jni_GetFloatField_addr();   break;
++    case T_DOUBLE:  slow_case_addr = jni_GetDoubleField_addr();  break;
++    default:        ShouldNotReachHere();
++      slow_case_addr = NULL;  // unreachable
++  }
 +
-+  ins_encode %{
-+    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
-+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
-+               Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      Label good;
-+      __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
-+      __ andr(t0, t0, $res$$Register);
-+      __ beqz(t0, good);
-+      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */);
-+      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
-+                 Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register);
-+      __ bind(good);
-+    }
-+  %}
++  {
++    __ enter();
++    int32_t tmp_offset = 0;
++    __ la_patchable(t0, ExternalAddress(slow_case_addr), tmp_offset);
++    __ jalr(x1, t0, tmp_offset);
++    __ leave();
++    __ ret();
++  }
++  __ flush();
 +
-+  ins_pipe(pipe_slow);
-+%}
++  return fast_entry;
++}
 +
-+instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
-+  match(Set prev (GetAndSetP mem newv));
-+  predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
-+  effect(TEMP_DEF prev, KILL cr);
 +
-+  ins_cost(2 * VOLATILE_REF_COST);
++address JNI_FastGetField::generate_fast_get_boolean_field() {
++  return generate_fast_get_int_field0(T_BOOLEAN);
++}
 +
-+  format %{ "atomic_xchg  $prev, $newv, [$mem], #@zGetAndSetP" %}
++address JNI_FastGetField::generate_fast_get_byte_field() {
++  return generate_fast_get_int_field0(T_BYTE);
++}
 +
-+  ins_encode %{
-+    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
-+    z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data());
-+  %}
++address JNI_FastGetField::generate_fast_get_char_field() {
++  return generate_fast_get_int_field0(T_CHAR);
++}
 +
-+  ins_pipe(pipe_serial);
-+%}
++address JNI_FastGetField::generate_fast_get_short_field() {
++  return generate_fast_get_int_field0(T_SHORT);
++}
 +
-+instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
-+  match(Set prev (GetAndSetP mem newv));
-+  predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() != 0));
-+  effect(TEMP_DEF prev, KILL cr);
++address JNI_FastGetField::generate_fast_get_int_field() {
++  return generate_fast_get_int_field0(T_INT);
++}
 +
-+  ins_cost(VOLATILE_REF_COST);
++address JNI_FastGetField::generate_fast_get_long_field() {
++  return generate_fast_get_int_field0(T_LONG);
++}
 +
-+  format %{ "atomic_xchg_acq  $prev, $newv, [$mem], #@zGetAndSetPAcq" %}
++address JNI_FastGetField::generate_fast_get_float_field() {
++  return generate_fast_get_int_field0(T_FLOAT);
++}
 +
-+  ins_encode %{
-+    __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
-+    z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data());
-+  %}
-+  ins_pipe(pipe_serial);
-+%}
-diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
++address JNI_FastGetField::generate_fast_get_double_field() {
++  return generate_fast_get_int_field0(T_DOUBLE);
++}
+diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
 new file mode 100644
-index 00000000000..2936837d951
+index 0000000000..df3c0267ee
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
-@@ -0,0 +1,52 @@
++++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
+@@ -0,0 +1,106 @@
 +/*
-+ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
++ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -19160,40 +18756,96 @@ index 00000000000..2936837d951
 + *
 + */
 +
-+#ifndef CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
-+#define CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
++#ifndef CPU_RISCV_JNITYPES_RISCV_HPP
++#define CPU_RISCV_JNITYPES_RISCV_HPP
 +
-+const int StackAlignmentInBytes = 16;
++#include "jni.h"
++#include "oops/oop.hpp"
 +
-+// Indicates whether the C calling conventions require that
-+// 32-bit integer argument values are extended to 64 bits.
-+const bool CCallingConventionRequiresIntsAsLongs = false;
++// This file holds platform-dependent routines used to write primitive jni
++// types to the array of arguments passed into JavaCalls::call
 +
-+// RISCV has adopted a multicopy atomic model closely following
-+// that of ARMv8.
-+#define CPU_MULTI_COPY_ATOMIC
++class JNITypes : private AllStatic {
++  // These functions write a java primitive type (in native format)
++  // to a java stack slot array to be passed as an argument to JavaCalls:calls.
++  // I.e., they are functionally 'push' operations if they have a 'pos'
++  // formal parameter.  Note that jlong's and jdouble's are written
++  // _in reverse_ of the order in which they appear in the interpreter
++  // stack.  This is because call stubs (see stubGenerator_sparc.cpp)
++  // reverse the argument list constructed by JavaCallArguments (see
++  // javaCalls.hpp).
 +
-+// To be safe, we deoptimize when we come across an access that needs
-+// patching. This is similar to what is done on aarch64.
-+#define DEOPTIMIZE_WHEN_PATCHING
++public:
++  // Ints are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_int(jint  from, intptr_t *to)           { *(jint *)(to +   0  ) =  from; }
++  static inline void    put_int(jint  from, intptr_t *to, int& pos) { *(jint *)(to + pos++) =  from; }
++  static inline void    put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; }
 +
-+#define SUPPORTS_NATIVE_CX8
++  // Longs are stored in native format in one JavaCallArgument slot at
++  // *(to+1).
++  static inline void put_long(jlong  from, intptr_t *to) {
++    *(jlong*) (to + 1) = from;
++  }
 +
-+#define SUPPORT_RESERVED_STACK_AREA
++  static inline void put_long(jlong  from, intptr_t *to, int& pos) {
++    *(jlong*) (to + 1 + pos) = from;
++    pos += 2;
++  }
 +
-+#define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS false
++  static inline void put_long(jlong *from, intptr_t *to, int& pos) {
++    *(jlong*) (to + 1 + pos) = *from;
++    pos += 2;
++  }
 +
-+#define USE_POINTERS_TO_REGISTER_IMPL_ARRAY
++  // Oops are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_obj(oop  from, intptr_t *to)                { *(oop *)(to +   0  ) =  from; }
++  static inline void    put_obj(oop  from, intptr_t *to, int& pos)      { *(oop *)(to + pos++) =  from; }
++  static inline void    put_obj(oop *from, intptr_t *to, int& pos)      { *(oop *)(to + pos++) = *from; }
 +
-+#endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
++  // Floats are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_float(jfloat  from, intptr_t *to)           { *(jfloat *)(to +   0  ) =  from;  }
++  static inline void    put_float(jfloat  from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) =  from; }
++  static inline void    put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; }
++
++#undef _JNI_SLOT_OFFSET
++#define _JNI_SLOT_OFFSET 1
++  // Doubles are stored in native word format in one JavaCallArgument
++  // slot at *(to+1).
++  static inline void put_double(jdouble  from, intptr_t *to) {
++    *(jdouble*) (to + 1) = from;
++  }
++
++  static inline void put_double(jdouble  from, intptr_t *to, int& pos) {
++    *(jdouble*) (to + 1 + pos) = from;
++    pos += 2;
++  }
++
++  static inline void put_double(jdouble *from, intptr_t *to, int& pos) {
++    *(jdouble*) (to + 1 + pos) = *from;
++    pos += 2;
++  }
++
++  // The get_xxx routines, on the other hand, actually _do_ fetch
++  // java primitive types from the interpreter stack.
++  // No need to worry about alignment on Intel.
++  static inline jint    get_int   (intptr_t *from) { return *(jint *)   from; }
++  static inline jlong   get_long  (intptr_t *from) { return *(jlong *)  (from + _JNI_SLOT_OFFSET); }
++  static inline oop     get_obj   (intptr_t *from) { return *(oop *)    from; }
++  static inline jfloat  get_float (intptr_t *from) { return *(jfloat *) from; }
++  static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); }
++#undef _JNI_SLOT_OFFSET
++};
++
++#endif // CPU_RISCV_JNITYPES_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
 new file mode 100644
-index 00000000000..cbfc0583883
+index 0000000000..e18bd3d8e2
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
-@@ -0,0 +1,99 @@
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+@@ -0,0 +1,5410 @@
 +/*
-+ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -19217,7334 +18869,6924 @@ index 00000000000..cbfc0583883
 + *
 + */
 +
-+#ifndef CPU_RISCV_GLOBALS_RISCV_HPP
-+#define CPU_RISCV_GLOBALS_RISCV_HPP
-+
-+#include "utilities/globalDefinitions.hpp"
-+#include "utilities/macros.hpp"
-+
-+// Sets the default values for platform dependent flags used by the runtime system.
-+// (see globals.hpp)
-+
-+define_pd_global(bool, ImplicitNullChecks,       true);  // Generate code for implicit null checks
-+define_pd_global(bool, TrapBasedNullChecks,      false);
-+define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs past to check cast
-+
-+define_pd_global(uintx, CodeCacheSegmentSize,    64 COMPILER1_AND_COMPILER2_PRESENT(+64)); // Tiered compilation has large code-entry alignment.
-+define_pd_global(intx, CodeEntryAlignment,       64);
-+define_pd_global(intx, OptoLoopAlignment,        16);
-+
-+#define DEFAULT_STACK_YELLOW_PAGES (2)
-+#define DEFAULT_STACK_RED_PAGES (1)
-+// Java_java_net_SocketOutputStream_socketWrite0() uses a 64k buffer on the
-+// stack if compiled for unix and LP64. To pass stack overflow tests we need
-+// 20 shadow pages.
-+#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+5))
-+#define DEFAULT_STACK_RESERVED_PAGES (1)
-+
-+#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES
-+#define MIN_STACK_RED_PAGES    DEFAULT_STACK_RED_PAGES
-+#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES
-+#define MIN_STACK_RESERVED_PAGES (0)
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "compiler/disassembler.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shared/cardTable.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "interpreter/bytecodeHistogram.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/resourceArea.hpp"
++#include "memory/universe.hpp"
++#include "nativeInst_riscv.hpp"
++#include "oops/accessDecorators.hpp"
++#include "oops/compressedOops.inline.hpp"
++#include "oops/klass.inline.hpp"
++#include "oops/oop.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/jniHandles.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.hpp"
++#ifdef COMPILER2
++#include "opto/compile.hpp"
++#include "opto/intrinsicnode.hpp"
++#include "opto/node.hpp"
++#include "opto/output.hpp"
++#endif
 +
-+define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES);
-+define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES);
-+define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES);
-+define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES);
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) block_comment(str)
++#endif
++#define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":")
 +
-+define_pd_global(bool, RewriteBytecodes,     true);
-+define_pd_global(bool, RewriteFrequentPairs, true);
++static void pass_arg0(MacroAssembler* masm, Register arg) {
++  if (c_rarg0 != arg) {
++    assert_cond(masm != NULL);
++    masm->mv(c_rarg0, arg);
++  }
++}
 +
-+define_pd_global(bool, PreserveFramePointer, false);
++static void pass_arg1(MacroAssembler* masm, Register arg) {
++  if (c_rarg1 != arg) {
++    assert_cond(masm != NULL);
++    masm->mv(c_rarg1, arg);
++  }
++}
 +
-+define_pd_global(uintx, TypeProfileLevel, 111);
++static void pass_arg2(MacroAssembler* masm, Register arg) {
++  if (c_rarg2 != arg) {
++    assert_cond(masm != NULL);
++    masm->mv(c_rarg2, arg);
++  }
++}
 +
-+define_pd_global(bool, CompactStrings, true);
++static void pass_arg3(MacroAssembler* masm, Register arg) {
++  if (c_rarg3 != arg) {
++    assert_cond(masm != NULL);
++    masm->mv(c_rarg3, arg);
++  }
++}
 +
-+// Clear short arrays bigger than one word in an arch-specific way
-+define_pd_global(intx, InitArrayShortSize, BytesPerLong);
++void MacroAssembler::align(int modulus, int extra_offset) {
++  CompressibleRegion cr(this);
++  while ((offset() + extra_offset) % modulus != 0) { nop(); }
++}
 +
-+define_pd_global(intx, InlineSmallCode,          1000);
++void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
++  call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions);
++}
 +
-+#define ARCH_FLAGS(develop,                                                      \
-+                   product,                                                      \
-+                   notproduct,                                                   \
-+                   range,                                                        \
-+                   constraint)                                                   \
-+                                                                                 \
-+  product(bool, NearCpool, true,                                                 \
-+         "constant pool is close to instructions")                               \
-+  product(intx, BlockZeroingLowLimit, 256,                                       \
-+          "Minimum size in bytes when block zeroing will be used")               \
-+          range(1, max_jint)                                                     \
-+  product(bool, TraceTraps, false, "Trace all traps the signal handler")         \
-+  /* For now we're going to be safe and add the I/O bits to userspace fences. */ \
-+  product(bool, UseConservativeFence, true,                                      \
-+          "Extend i for r and o for w in the pred/succ flags of fence;"          \
-+          "Extend fence.i to fence.i + fence.")                                  \
-+  product(bool, AvoidUnalignedAccesses, true,                                    \
-+          "Avoid generating unaligned memory accesses")                          \
-+  product(bool, UseRVV, false, EXPERIMENTAL, "Use RVV instructions")             \
-+  product(bool, UseRVB, false, EXPERIMENTAL, "Use RVB instructions")             \
-+  product(bool, UseRVC, false, EXPERIMENTAL, "Use RVC instructions")             \
-+  product(bool, UseRVVForBigIntegerShiftIntrinsics, true,                        \
-+          "Use RVV instructions for left/right shift of BigInteger")
++// Implementation of call_VM versions
 +
-+#endif // CPU_RISCV_GLOBALS_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/icBuffer_riscv.cpp b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp
-new file mode 100644
-index 00000000000..cc93103dc55
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp
-@@ -0,0 +1,79 @@
-+/*
-+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             bool check_exceptions) {
++  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
++}
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "code/icBuffer.hpp"
-+#include "gc/shared/collectedHeap.inline.hpp"
-+#include "interpreter/bytecodes.hpp"
-+#include "memory/resourceArea.hpp"
-+#include "nativeInst_riscv.hpp"
-+#include "oops/oop.inline.hpp"
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             bool check_exceptions) {
++  pass_arg1(this, arg_1);
++  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
++}
 +
-+int InlineCacheBuffer::ic_stub_code_size() {
-+  // 6: auipc + ld + auipc + jalr + address(2 * instruction_size)
-+  // 5: auipc + ld + j + address(2 * instruction_size)
-+  return (MacroAssembler::far_branches() ? 6 : 5) * NativeInstruction::instruction_size;
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             bool check_exceptions) {
++  assert(arg_1 != c_rarg2, "smashed arg");
++  pass_arg2(this, arg_2);
++  pass_arg1(this, arg_1);
++  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
 +}
 +
-+#define __ masm->
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             Register arg_3,
++                             bool check_exceptions) {
++  assert(arg_1 != c_rarg3, "smashed arg");
++  assert(arg_2 != c_rarg3, "smashed arg");
++  pass_arg3(this, arg_3);
 +
-+void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) {
-+  assert_cond(code_begin != NULL && entry_point != NULL);
-+  ResourceMark rm;
-+  CodeBuffer      code(code_begin, ic_stub_code_size());
-+  MacroAssembler* masm            = new MacroAssembler(&code);
-+  // Note: even though the code contains an embedded value, we do not need reloc info
-+  // because
-+  // (1) the value is old (i.e., doesn't matter for scavenges)
-+  // (2) these ICStubs are removed *before* a GC happens, so the roots disappear
++  assert(arg_1 != c_rarg2, "smashed arg");
++  pass_arg2(this, arg_2);
 +
-+  address start = __ pc();
-+  Label l;
-+  __ ld(t1, l);
-+  __ far_jump(ExternalAddress(entry_point));
-+  __ align(wordSize);
-+  __ bind(l);
-+  __ emit_int64((intptr_t)cached_value);
-+  // Only need to invalidate the 1st two instructions - not the whole ic stub
-+  ICache::invalidate_range(code_begin, InlineCacheBuffer::ic_stub_code_size());
-+  assert(__ pc() - start == ic_stub_code_size(), "must be");
++  pass_arg1(this, arg_1);
++  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
 +}
 +
-+address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
-+  NativeMovConstReg* move = nativeMovConstReg_at(code_begin);   // creation also verifies the object
-+  NativeJump* jump = nativeJump_at(move->next_instruction_address());
-+  return jump->jump_destination();
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             int number_of_arguments,
++                             bool check_exceptions) {
++  call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
 +}
 +
-+
-+void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
-+  // The word containing the cached value is at the end of this IC buffer
-+  uintptr_t *p = (uintptr_t *)(code_begin + ic_stub_code_size() - wordSize);
-+  void* o = (void*)*p;
-+  return o;
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             bool check_exceptions) {
++  pass_arg1(this, arg_1);
++  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
 +}
-diff --git a/src/hotspot/cpu/riscv/icache_riscv.cpp b/src/hotspot/cpu/riscv/icache_riscv.cpp
-new file mode 100644
-index 00000000000..922a80f9f3e
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/icache_riscv.cpp
-@@ -0,0 +1,51 @@
-+/*
-+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.hpp"
-+#include "runtime/icache.hpp"
 +
-+#define __ _masm->
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             bool check_exceptions) {
 +
-+static int icache_flush(address addr, int lines, int magic) {
-+  os::icache_flush((long int) addr, (long int) (addr + (lines << ICache::log2_line_size)));
-+  return magic;
++  assert(arg_1 != c_rarg2, "smashed arg");
++  pass_arg2(this, arg_2);
++  pass_arg1(this, arg_1);
++  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
 +}
 +
-+void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) {
-+  address start = (address)icache_flush;
-+  *flush_icache_stub = (ICache::flush_icache_stub_t)start;
-+
-+  // ICache::invalidate_range() contains explicit condition that the first
-+  // call is invoked on the generated icache flush stub code range.
-+  ICache::invalidate_range(start, 0);
-+
-+  {
-+    StubCodeMark mark(this, "ICache", "fake_stub_for_inlined_icache_flush");
-+    __ ret();
-+  }
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             Register arg_3,
++                             bool check_exceptions) {
++  assert(arg_1 != c_rarg3, "smashed arg");
++  assert(arg_2 != c_rarg3, "smashed arg");
++  pass_arg3(this, arg_3);
++  assert(arg_1 != c_rarg2, "smashed arg");
++  pass_arg2(this, arg_2);
++  pass_arg1(this, arg_1);
++  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
 +}
 +
-+#undef __
-diff --git a/src/hotspot/cpu/riscv/icache_riscv.hpp b/src/hotspot/cpu/riscv/icache_riscv.hpp
-new file mode 100644
-index 00000000000..5bf40ca8204
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/icache_riscv.hpp
-@@ -0,0 +1,42 @@
-+/*
-+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++// these are no-ops overridden by InterpreterMacroAssembler
++void MacroAssembler::check_and_handle_earlyret(Register java_thread) {}
++void MacroAssembler::check_and_handle_popframe(Register java_thread) {}
 +
-+#ifndef CPU_RISCV_ICACHE_RISCV_HPP
-+#define CPU_RISCV_ICACHE_RISCV_HPP
++RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
++                                                      Register tmp,
++                                                      int offset) {
++  intptr_t value = *delayed_value_addr;
++  if (value != 0)
++    return RegisterOrConstant(value + offset);
 +
-+// Interface for updating the instruction cache. Whenever the VM
-+// modifies code, part of the processor instruction cache potentially
-+// has to be flushed.
++  // load indirectly to solve generation ordering problem
++  ld(tmp, ExternalAddress((address) delayed_value_addr));
 +
-+class ICache : public AbstractICache {
-+public:
-+  enum {
-+    stub_size      = 16,                // Size of the icache flush stub in bytes
-+    line_size      = BytesPerWord,      // conservative
-+    log2_line_size = LogBytesPerWord    // log2(line_size)
-+  };
-+};
++  if (offset != 0)
++    add(tmp, tmp, offset);
 +
-+#endif // CPU_RISCV_ICACHE_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-new file mode 100644
-index 00000000000..d12dcb2af19
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-@@ -0,0 +1,1940 @@
-+/*
-+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  return RegisterOrConstant(tmp);
++}
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "gc/shared/barrierSet.hpp"
-+#include "gc/shared/barrierSetAssembler.hpp"
-+#include "interp_masm_riscv.hpp"
-+#include "interpreter/interpreter.hpp"
-+#include "interpreter/interpreterRuntime.hpp"
-+#include "logging/log.hpp"
-+#include "oops/arrayOop.hpp"
-+#include "oops/markWord.hpp"
-+#include "oops/method.hpp"
-+#include "oops/methodData.hpp"
-+#include "prims/jvmtiExport.hpp"
-+#include "prims/jvmtiThreadState.hpp"
-+#include "runtime/basicLock.hpp"
-+#include "runtime/frame.inline.hpp"
-+#include "runtime/safepointMechanism.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "runtime/thread.inline.hpp"
-+#include "utilities/powerOfTwo.hpp"
++// Calls to C land
++//
++// When entering C land, the fp, & esp of the last Java frame have to be recorded
++// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
++// has to be reset to 0. This is required to allow proper stack traversal.
++void MacroAssembler::set_last_Java_frame(Register last_java_sp,
++                                         Register last_java_fp,
++                                         Register last_java_pc,
++                                         Register tmp) {
 +
-+void InterpreterMacroAssembler::narrow(Register result) {
-+  // Get method->_constMethod->_result_type
-+  ld(t0, Address(fp, frame::interpreter_frame_method_offset * wordSize));
-+  ld(t0, Address(t0, Method::const_offset()));
-+  lbu(t0, Address(t0, ConstMethod::result_type_offset()));
++  if (last_java_pc->is_valid()) {
++      sd(last_java_pc, Address(xthread,
++                               JavaThread::frame_anchor_offset() +
++                               JavaFrameAnchor::last_Java_pc_offset()));
++  }
 +
-+  Label done, notBool, notByte, notChar;
++  // determine last_java_sp register
++  if (last_java_sp == sp) {
++    mv(tmp, sp);
++    last_java_sp = tmp;
++  } else if (!last_java_sp->is_valid()) {
++    last_java_sp = esp;
++  }
 +
-+  // common case first
-+  mv(t1, T_INT);
-+  beq(t0, t1, done);
++  sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset()));
 +
-+  // mask integer result to narrower return type.
-+  mv(t1, T_BOOLEAN);
-+  bne(t0, t1, notBool);
++  // last_java_fp is optional
++  if (last_java_fp->is_valid()) {
++    sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset()));
++  }
++}
 +
-+  andi(result, result, 0x1);
-+  j(done);
++void MacroAssembler::set_last_Java_frame(Register last_java_sp,
++                                         Register last_java_fp,
++                                         address  last_java_pc,
++                                         Register tmp) {
++  assert(last_java_pc != NULL, "must provide a valid PC");
 +
-+  bind(notBool);
-+  mv(t1, T_BYTE);
-+  bne(t0, t1, notByte);
-+  sign_extend(result, result, 8);
-+  j(done);
++  la(tmp, last_java_pc);
++  sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
 +
-+  bind(notByte);
-+  mv(t1, T_CHAR);
-+  bne(t0, t1, notChar);
-+  zero_extend(result, result, 16);
-+  j(done);
++  set_last_Java_frame(last_java_sp, last_java_fp, noreg, tmp);
++}
 +
-+  bind(notChar);
-+  sign_extend(result, result, 16);
++void MacroAssembler::set_last_Java_frame(Register last_java_sp,
++                                         Register last_java_fp,
++                                         Label &L,
++                                         Register tmp) {
++  if (L.is_bound()) {
++    set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp);
++  } else {
++    InstructionMark im(this);
++    L.add_patch_at(code(), locator());
++    set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp);
++  }
++}
 +
-+  // Nothing to do for T_INT
-+  bind(done);
-+  addw(result, result, zr);
++// Just like safepoint_poll, but use an acquiring load for thread-
++// local polling.
++//
++// We need an acquire here to ensure that any subsequent load of the
++// global SafepointSynchronize::_state flag is ordered after this load
++// of the local Thread::_polling page.  We don't want this poll to
++// return false (i.e. not safepointing) and a later poll of the global
++// SafepointSynchronize::_state spuriously to return true.
++//
++// This is to avoid a race when we're in a native->Java transition
++// racing the code which wakes up from a safepoint.
++//
++void MacroAssembler::safepoint_poll_acquire(Label& slow_path) {
++  if (SafepointMechanism::uses_thread_local_poll()) {
++    membar(MacroAssembler::AnyAny);
++    ld(t1, Address(xthread, Thread::polling_page_offset()));
++    membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
++    andi(t0, t1, SafepointMechanism::poll_bit());
++    bnez(t0, slow_path);
++  } else {
++    safepoint_poll(slow_path);
++  }
 +}
 +
-+void InterpreterMacroAssembler::jump_to_entry(address entry) {
-+  assert(entry != NULL, "Entry must have been generated by now");
-+  j(entry);
++void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
++  // we must set sp to zero to clear frame
++  sd(zr, Address(xthread, JavaThread::last_Java_sp_offset()));
++
++  // must clear fp, so that compiled frames are not confused; it is
++  // possible that we need it only for debugging
++  if (clear_fp) {
++    sd(zr, Address(xthread, JavaThread::last_Java_fp_offset()));
++  }
++
++  // Always clear the pc because it could have been set by make_walkable()
++  sd(zr, Address(xthread, JavaThread::last_Java_pc_offset()));
 +}
 +
-+void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) {
-+  if (JvmtiExport::can_pop_frame()) {
-+    Label L;
-+    // Initiate popframe handling only if it is not already being
-+    // processed. If the flag has the popframe_processing bit set,
-+    // it means that this code is called *during* popframe handling - we
-+    // don't want to reenter.
-+    // This method is only called just after the call into the vm in
-+    // call_VM_base, so the arg registers are available.
-+    lwu(t1, Address(xthread, JavaThread::popframe_condition_offset()));
-+    andi(t0, t1, JavaThread::popframe_pending_bit);
-+    beqz(t0, L);
-+    andi(t0, t1, JavaThread::popframe_processing_bit);
-+    bnez(t0, L);
-+    // Call Interpreter::remove_activation_preserving_args_entry() to get the
-+    // address of the same-named entrypoint in the generated interpreter code.
-+    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry));
-+    jr(x10);
-+    bind(L);
++void MacroAssembler::call_VM_base(Register oop_result,
++                                  Register java_thread,
++                                  Register last_java_sp,
++                                  address  entry_point,
++                                  int      number_of_arguments,
++                                  bool     check_exceptions) {
++   // determine java_thread register
++  if (!java_thread->is_valid()) {
++    java_thread = xthread;
++  }
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = esp;
 +  }
-+}
 +
++  // debugging support
++  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
++  assert(java_thread == xthread, "unexpected register");
 +
-+void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
-+  ld(x12, Address(xthread, JavaThread::jvmti_thread_state_offset()));
-+  const Address tos_addr(x12, JvmtiThreadState::earlyret_tos_offset());
-+  const Address oop_addr(x12, JvmtiThreadState::earlyret_oop_offset());
-+  const Address val_addr(x12, JvmtiThreadState::earlyret_value_offset());
-+  switch (state) {
-+    case atos:
-+      ld(x10, oop_addr);
-+      sd(zr, oop_addr);
-+      verify_oop(x10);
-+      break;
-+    case ltos:
-+      ld(x10, val_addr);
-+      break;
-+    case btos:  // fall through
-+    case ztos:  // fall through
-+    case ctos:  // fall through
-+    case stos:  // fall through
-+    case itos:
-+      lwu(x10, val_addr);
-+      break;
-+    case ftos:
-+      flw(f10, val_addr);
-+      break;
-+    case dtos:
-+      fld(f10, val_addr);
-+      break;
-+    case vtos:
-+      /* nothing to do */
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
-+  // Clean up tos value in the thread object
-+  mvw(t0, (int) ilgl);
-+  sw(t0, tos_addr);
-+  sw(zr, val_addr);
-+}
++  assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
++  assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
 +
++  // push java thread (becomes first argument of C function)
++  mv(c_rarg0, java_thread);
 +
-+void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) {
-+  if (JvmtiExport::can_force_early_return()) {
-+    Label L;
-+    ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset()));
-+    beqz(t0, L);  // if [thread->jvmti_thread_state() == NULL] then exit
++  // set last Java frame before call
++  assert(last_java_sp != fp, "can't use fp");
 +
-+    // Initiate earlyret handling only if it is not already being processed.
-+    // If the flag has the earlyret_processing bit set, it means that this code
-+    // is called *during* earlyret handling - we don't want to reenter.
-+    lwu(t0, Address(t0, JvmtiThreadState::earlyret_state_offset()));
-+    mv(t1, JvmtiThreadState::earlyret_pending);
-+    bne(t0, t1, L);
++  Label l;
++  set_last_Java_frame(last_java_sp, fp, l, t0);
 +
-+    // Call Interpreter::remove_activation_early_entry() to get the address of the
-+    // same-named entrypoint in the generated interpreter code.
-+    ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset()));
-+    lwu(t0, Address(t0, JvmtiThreadState::earlyret_tos_offset()));
-+    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), t0);
-+    jr(x10);
-+    bind(L);
++  // do the call, remove parameters
++  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l);
++
++  // reset last Java frame
++  // Only interpreter should have to clear fp
++  reset_last_Java_frame(true);
++
++   // C++ interp handles this in the interpreter
++  check_and_handle_popframe(java_thread);
++  check_and_handle_earlyret(java_thread);
++
++  if (check_exceptions) {
++    // check for pending exceptions (java_thread is set upon return)
++    ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset())));
++    Label ok;
++    beqz(t0, ok);
++    int32_t offset = 0;
++    la_patchable(t0, RuntimeAddress(StubRoutines::forward_exception_entry()), offset);
++    jalr(x0, t0, offset);
++    bind(ok);
++  }
++
++  // get oop result if there is one and reset the value in the thread
++  if (oop_result->is_valid()) {
++    get_vm_result(oop_result, java_thread);
 +  }
 +}
 +
-+void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset) {
-+  assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode");
-+  lhu(reg, Address(xbcp, bcp_offset));
-+  revb_h(reg, reg);
++void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
++  ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
++  sd(zr, Address(java_thread, JavaThread::vm_result_offset()));
++  verify_oop(oop_result, "broken oop in call_VM_base");
 +}
 +
-+void InterpreterMacroAssembler::get_dispatch() {
-+  int32_t offset = 0;
-+  la_patchable(xdispatch, ExternalAddress((address)Interpreter::dispatch_table()), offset);
-+  addi(xdispatch, xdispatch, offset);
++void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
++  ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
++  sd(zr, Address(java_thread, JavaThread::vm_result_2_offset()));
 +}
 +
-+void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
-+                                                       int bcp_offset,
-+                                                       size_t index_size) {
-+  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
-+  if (index_size == sizeof(u2)) {
-+    load_unsigned_short(index, Address(xbcp, bcp_offset));
-+  } else if (index_size == sizeof(u4)) {
-+    lwu(index, Address(xbcp, bcp_offset));
-+    // Check if the secondary index definition is still ~x, otherwise
-+    // we have to change the following assembler code to calculate the
-+    // plain index.
-+    assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
-+    xori(index, index, -1);
-+    addw(index, index, zr);
-+  } else if (index_size == sizeof(u1)) {
-+    load_unsigned_byte(index, Address(xbcp, bcp_offset));
-+  } else {
-+    ShouldNotReachHere();
++void MacroAssembler::verify_oop(Register reg, const char* s) {
++  if (!VerifyOops) { return; }
++
++  // Pass register number to verify_oop_subroutine
++  const char* b = NULL;
++  {
++    ResourceMark rm;
++    stringStream ss;
++    ss.print("verify_oop: %s: %s", reg->name(), s);
++    b = code_string(ss.as_string());
 +  }
-+}
++  BLOCK_COMMENT("verify_oop {");
 +
-+// Return
-+// Rindex: index into constant pool
-+// Rcache: address of cache entry - ConstantPoolCache::base_offset()
-+//
-+// A caller must add ConstantPoolCache::base_offset() to Rcache to get
-+// the true address of the cache entry.
-+//
-+void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
-+                                                           Register index,
-+                                                           int bcp_offset,
-+                                                           size_t index_size) {
-+  assert_different_registers(cache, index);
-+  assert_different_registers(cache, xcpool);
-+  get_cache_index_at_bcp(index, bcp_offset, index_size);
-+  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
-+  // Convert from field index to ConstantPoolCacheEntry
-+  // riscv already has the cache in xcpool so there is no need to
-+  // install it in cache. Instead we pre-add the indexed offset to
-+  // xcpool and return it in cache. All clients of this method need to
-+  // be modified accordingly.
-+  shadd(cache, index, xcpool, cache, 5);
-+}
++  push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
++
++  mv(c_rarg0, reg); // c_rarg0 : x10
++  // The length of the instruction sequence emitted should be independent
++  // of the values of the local char buffer address so that the size of mach
++  // nodes for scratch emit and normal emit matches.
++  mv(t0, (address)b);
 +
++  // call indirectly to solve generation ordering problem
++  int32_t offset = 0;
++  la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset);
++  ld(t1, Address(t1, offset));
++  jalr(t1);
 +
-+void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
-+                                                                        Register index,
-+                                                                        Register bytecode,
-+                                                                        int byte_no,
-+                                                                        int bcp_offset,
-+                                                                        size_t index_size) {
-+  get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
-+  // We use a 32-bit load here since the layout of 64-bit words on
-+  // little-endian machines allow us that.
-+  // n.b. unlike x86 cache already includes the index offset
-+  la(bytecode, Address(cache,
-+                       ConstantPoolCache::base_offset() +
-+                       ConstantPoolCacheEntry::indices_offset()));
-+  membar(MacroAssembler::AnyAny);
-+  lwu(bytecode, bytecode);
-+  membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
-+  const int shift_count = (1 + byte_no) * BitsPerByte;
-+  slli(bytecode, bytecode, XLEN - (shift_count + BitsPerByte));
-+  srli(bytecode, bytecode, XLEN - BitsPerByte);
-+}
++  pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
 +
-+void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
-+                                                               Register tmp,
-+                                                               int bcp_offset,
-+                                                               size_t index_size) {
-+  assert(cache != tmp, "must use different register");
-+  get_cache_index_at_bcp(tmp, bcp_offset, index_size);
-+  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
-+  // Convert from field index to ConstantPoolCacheEntry index
-+  // and from word offset to byte offset
-+  assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord,
-+         "else change next line");
-+  ld(cache, Address(fp, frame::interpreter_frame_cache_offset * wordSize));
-+  // skip past the header
-+  add(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
-+  // construct pointer to cache entry
-+  shadd(cache, tmp, cache, tmp, 2 + LogBytesPerWord);
++  BLOCK_COMMENT("} verify_oop");
 +}
 +
-+// Load object from cpool->resolved_references(index)
-+void InterpreterMacroAssembler::load_resolved_reference_at_index(
-+                                Register result, Register index, Register tmp) {
-+  assert_different_registers(result, index);
++void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
++  if (!VerifyOops) {
++    return;
++  }
 +
-+  get_constant_pool(result);
-+  // Load pointer for resolved_references[] objArray
-+  ld(result, Address(result, ConstantPool::cache_offset_in_bytes()));
-+  ld(result, Address(result, ConstantPoolCache::resolved_references_offset_in_bytes()));
-+  resolve_oop_handle(result, tmp);
-+  // Add in the index
-+  addi(index, index, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
-+  shadd(result, index, result, index, LogBytesPerHeapOop);
-+  load_heap_oop(result, Address(result, 0));
++  const char* b = NULL;
++  {
++    ResourceMark rm;
++    stringStream ss;
++    ss.print("verify_oop_addr: %s", s);
++    b = code_string(ss.as_string());
++  }
++  BLOCK_COMMENT("verify_oop_addr {");
++
++  push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
++
++  if (addr.uses(sp)) {
++    la(x10, addr);
++    ld(x10, Address(x10, 4 * wordSize));
++  } else {
++    ld(x10, addr);
++  }
++
++  // The length of the instruction sequence emitted should be independent
++  // of the values of the local char buffer address so that the size of mach
++  // nodes for scratch emit and normal emit matches.
++  mv(t0, (address)b);
++
++  // call indirectly to solve generation ordering problem
++  int32_t offset = 0;
++  la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset);
++  ld(t1, Address(t1, offset));
++  jalr(t1);
++
++  pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
++
++  BLOCK_COMMENT("} verify_oop_addr");
 +}
 +
-+void InterpreterMacroAssembler::load_resolved_klass_at_offset(
-+                                Register cpool, Register index, Register klass, Register temp) {
-+  shadd(temp, index, cpool, temp, LogBytesPerWord);
-+  lhu(temp, Address(temp, sizeof(ConstantPool))); // temp = resolved_klass_index
-+  ld(klass, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); // klass = cpool->_resolved_klasses
-+  shadd(klass, temp, klass, temp, LogBytesPerWord);
-+  ld(klass, Address(klass, Array<Klass*>::base_offset_in_bytes()));
++Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
++                                         int extra_slot_offset) {
++  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
++  int stackElementSize = Interpreter::stackElementSize;
++  int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
++#ifdef ASSERT
++  int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
++  assert(offset1 - offset == stackElementSize, "correct arithmetic");
++#endif
++  if (arg_slot.is_constant()) {
++    return Address(esp, arg_slot.as_constant() * stackElementSize + offset);
++  } else {
++    assert_different_registers(t0, arg_slot.as_register());
++    shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize));
++    return Address(t0, offset);
++  }
 +}
 +
-+void InterpreterMacroAssembler::load_resolved_method_at_index(int byte_no,
-+                                                              Register method,
-+                                                              Register cache) {
-+  const int method_offset = in_bytes(
-+    ConstantPoolCache::base_offset() +
-+      ((byte_no == TemplateTable::f2_byte)
-+       ? ConstantPoolCacheEntry::f2_offset()
-+       : ConstantPoolCacheEntry::f1_offset()));
++#ifndef PRODUCT
++extern "C" void findpc(intptr_t x);
++#endif
 +
-+  ld(method, Address(cache, method_offset)); // get f1 Method*
++void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[])
++{
++  // In order to get locks to work, we need to fake a in_VM state
++  if (ShowMessageBoxOnError) {
++    JavaThread* thread = JavaThread::current();
++    JavaThreadState saved_state = thread->thread_state();
++    thread->set_thread_state(_thread_in_vm);
++#ifndef PRODUCT
++    if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
++      ttyLocker ttyl;
++      BytecodeCounter::print();
++    }
++#endif
++    if (os::message_box(msg, "Execution stopped, print registers?")) {
++      ttyLocker ttyl;
++      tty->print_cr(" pc = 0x%016lx", pc);
++#ifndef PRODUCT
++      tty->cr();
++      findpc(pc);
++      tty->cr();
++#endif
++      tty->print_cr(" x0 = 0x%016lx", regs[0]);
++      tty->print_cr(" x1 = 0x%016lx", regs[1]);
++      tty->print_cr(" x2 = 0x%016lx", regs[2]);
++      tty->print_cr(" x3 = 0x%016lx", regs[3]);
++      tty->print_cr(" x4 = 0x%016lx", regs[4]);
++      tty->print_cr(" x5 = 0x%016lx", regs[5]);
++      tty->print_cr(" x6 = 0x%016lx", regs[6]);
++      tty->print_cr(" x7 = 0x%016lx", regs[7]);
++      tty->print_cr(" x8 = 0x%016lx", regs[8]);
++      tty->print_cr(" x9 = 0x%016lx", regs[9]);
++      tty->print_cr("x10 = 0x%016lx", regs[10]);
++      tty->print_cr("x11 = 0x%016lx", regs[11]);
++      tty->print_cr("x12 = 0x%016lx", regs[12]);
++      tty->print_cr("x13 = 0x%016lx", regs[13]);
++      tty->print_cr("x14 = 0x%016lx", regs[14]);
++      tty->print_cr("x15 = 0x%016lx", regs[15]);
++      tty->print_cr("x16 = 0x%016lx", regs[16]);
++      tty->print_cr("x17 = 0x%016lx", regs[17]);
++      tty->print_cr("x18 = 0x%016lx", regs[18]);
++      tty->print_cr("x19 = 0x%016lx", regs[19]);
++      tty->print_cr("x20 = 0x%016lx", regs[20]);
++      tty->print_cr("x21 = 0x%016lx", regs[21]);
++      tty->print_cr("x22 = 0x%016lx", regs[22]);
++      tty->print_cr("x23 = 0x%016lx", regs[23]);
++      tty->print_cr("x24 = 0x%016lx", regs[24]);
++      tty->print_cr("x25 = 0x%016lx", regs[25]);
++      tty->print_cr("x26 = 0x%016lx", regs[26]);
++      tty->print_cr("x27 = 0x%016lx", regs[27]);
++      tty->print_cr("x28 = 0x%016lx", regs[28]);
++      tty->print_cr("x30 = 0x%016lx", regs[30]);
++      tty->print_cr("x31 = 0x%016lx", regs[31]);
++      BREAKPOINT;
++    }
++  }
++  fatal("DEBUG MESSAGE: %s", msg);
 +}
 +
-+// Generate a subtype check: branch to ok_is_subtype if sub_klass is a
-+// subtype of super_klass.
-+//
-+// Args:
-+//      x10: superklass
-+//      Rsub_klass: subklass
-+//
-+// Kills:
-+//      x12, x15
-+void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass,
-+                                                  Label& ok_is_subtype) {
-+  assert(Rsub_klass != x10, "x10 holds superklass");
-+  assert(Rsub_klass != x12, "x12 holds 2ndary super array length");
-+  assert(Rsub_klass != x15, "x15 holds 2ndary super array scan ptr");
++void MacroAssembler::resolve_jobject(Register value, Register thread, Register tmp) {
++  Label done, not_weak;
++  beqz(value, done);           // Use NULL as-is.
 +
-+  // Profile the not-null value's klass.
-+  profile_typecheck(x12, Rsub_klass, x15); // blows x12, reloads x15
++  // Test for jweak tag.
++  andi(t0, value, JNIHandles::weak_tag_mask);
++  beqz(t0, not_weak);
 +
-+  // Do the check.
-+  check_klass_subtype(Rsub_klass, x10, x12, ok_is_subtype); // blows x12
++  // Resolve jweak.
++  access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value,
++                 Address(value, -JNIHandles::weak_tag_value), tmp, thread);
++  verify_oop(value);
++  j(done);
 +
-+  // Profile the failure of the check.
-+  profile_typecheck_failed(x12); // blows x12
++  bind(not_weak);
++  // Resolve (untagged) jobject.
++  access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread);
++  verify_oop(value);
++  bind(done);
 +}
 +
-+// Java Expression Stack
++void MacroAssembler::stop(const char* msg) {
++  address ip = pc();
++  pusha();
++  // The length of the instruction sequence emitted should be independent
++  // of the values of msg and ip so that the size of mach nodes for scratch
++  // emit and normal emit matches.
++  mv(c_rarg0, (address)msg);
++  mv(c_rarg1, (address)ip);
++  mv(c_rarg2, sp);
++  mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64));
++  jalr(c_rarg3);
++  ebreak();
++}
 +
-+void InterpreterMacroAssembler::pop_ptr(Register r) {
-+  ld(r, Address(esp, 0));
-+  addi(esp, esp, wordSize);
++void MacroAssembler::unimplemented(const char* what) {
++  const char* buf = NULL;
++  {
++    ResourceMark rm;
++    stringStream ss;
++    ss.print("unimplemented: %s", what);
++    buf = code_string(ss.as_string());
++  }
++  stop(buf);
 +}
 +
-+void InterpreterMacroAssembler::pop_i(Register r) {
-+  lw(r, Address(esp, 0)); // lw do signed extended
-+  addi(esp, esp, wordSize);
++void MacroAssembler::emit_static_call_stub() {
++  // CompiledDirectStaticCall::set_to_interpreted knows the
++  // exact layout of this stub.
++
++  mov_metadata(xmethod, (Metadata*)NULL);
++
++  // Jump to the entry point of the i2c stub.
++  int32_t offset = 0;
++  movptr_with_offset(t0, 0, offset);
++  jalr(x0, t0, offset);
 +}
 +
-+void InterpreterMacroAssembler::pop_l(Register r) {
-+  ld(r, Address(esp, 0));
-+  addi(esp, esp, 2 * Interpreter::stackElementSize);
++void MacroAssembler::call_VM_leaf_base(address entry_point,
++                                       int number_of_arguments,
++                                       Label *retaddr) {
++  int32_t offset = 0;
++  push_reg(RegSet::of(t0, xmethod), sp);   // push << t0 & xmethod >> to sp
++  movptr_with_offset(t0, entry_point, offset);
++  jalr(x1, t0, offset);
++  if (retaddr != NULL) {
++    bind(*retaddr);
++  }
++  pop_reg(RegSet::of(t0, xmethod), sp);   // pop << t0 & xmethod >> from sp
 +}
 +
-+void InterpreterMacroAssembler::push_ptr(Register r) {
-+  addi(esp, esp, -wordSize);
-+  sd(r, Address(esp, 0));
++void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
++  call_VM_leaf_base(entry_point, number_of_arguments);
 +}
 +
-+void InterpreterMacroAssembler::push_i(Register r) {
-+  addi(esp, esp, -wordSize);
-+  addw(r, r, zr); // signed extended
-+  sd(r, Address(esp, 0));
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
++  pass_arg0(this, arg_0);
++  call_VM_leaf_base(entry_point, 1);
 +}
 +
-+void InterpreterMacroAssembler::push_l(Register r) {
-+  addi(esp, esp, -2 * wordSize);
-+  sd(zr, Address(esp, wordSize));
-+  sd(r, Address(esp));
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
++  pass_arg0(this, arg_0);
++  pass_arg1(this, arg_1);
++  call_VM_leaf_base(entry_point, 2);
 +}
 +
-+void InterpreterMacroAssembler::pop_f(FloatRegister r) {
-+  flw(r, esp, 0);
-+  addi(esp, esp, wordSize);
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0,
++                                  Register arg_1, Register arg_2) {
++  pass_arg0(this, arg_0);
++  pass_arg1(this, arg_1);
++  pass_arg2(this, arg_2);
++  call_VM_leaf_base(entry_point, 3);
 +}
 +
-+void InterpreterMacroAssembler::pop_d(FloatRegister r) {
-+  fld(r, esp, 0);
-+  addi(esp, esp, 2 * Interpreter::stackElementSize);
++void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
++  pass_arg0(this, arg_0);
++  MacroAssembler::call_VM_leaf_base(entry_point, 1);
 +}
 +
-+void InterpreterMacroAssembler::push_f(FloatRegister r) {
-+  addi(esp, esp, -wordSize);
-+  fsw(r, Address(esp, 0));
++void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
++
++  assert(arg_0 != c_rarg1, "smashed arg");
++  pass_arg1(this, arg_1);
++  pass_arg0(this, arg_0);
++  MacroAssembler::call_VM_leaf_base(entry_point, 2);
 +}
 +
-+void InterpreterMacroAssembler::push_d(FloatRegister r) {
-+  addi(esp, esp, -2 * wordSize);
-+  fsd(r, Address(esp, 0));
++void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
++  assert(arg_0 != c_rarg2, "smashed arg");
++  assert(arg_1 != c_rarg2, "smashed arg");
++  pass_arg2(this, arg_2);
++  assert(arg_0 != c_rarg1, "smashed arg");
++  pass_arg1(this, arg_1);
++  pass_arg0(this, arg_0);
++  MacroAssembler::call_VM_leaf_base(entry_point, 3);
 +}
 +
-+void InterpreterMacroAssembler::pop(TosState state) {
-+  switch (state) {
-+    case atos:
-+      pop_ptr();
-+      verify_oop(x10);
-+      break;
-+    case btos:  // fall through
-+    case ztos:  // fall through
-+    case ctos:  // fall through
-+    case stos:  // fall through
-+    case itos:
-+      pop_i();
-+      break;
-+    case ltos:
-+      pop_l();
-+      break;
-+    case ftos:
-+      pop_f();
-+      break;
-+    case dtos:
-+      pop_d();
-+      break;
-+    case vtos:
-+      /* nothing to do */
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
++void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
++  assert(arg_0 != c_rarg3, "smashed arg");
++  assert(arg_1 != c_rarg3, "smashed arg");
++  assert(arg_2 != c_rarg3, "smashed arg");
++  pass_arg3(this, arg_3);
++  assert(arg_0 != c_rarg2, "smashed arg");
++  assert(arg_1 != c_rarg2, "smashed arg");
++  pass_arg2(this, arg_2);
++  assert(arg_0 != c_rarg1, "smashed arg");
++  pass_arg1(this, arg_1);
++  pass_arg0(this, arg_0);
++  MacroAssembler::call_VM_leaf_base(entry_point, 4);
 +}
 +
-+void InterpreterMacroAssembler::push(TosState state) {
-+  switch (state) {
-+    case atos:
-+      verify_oop(x10);
-+      push_ptr();
-+      break;
-+    case btos:  // fall through
-+    case ztos:  // fall through
-+    case ctos:  // fall through
-+    case stos:  // fall through
-+    case itos:
-+      push_i();
-+      break;
-+    case ltos:
-+      push_l();
-+      break;
-+    case ftos:
-+      push_f();
-+      break;
-+    case dtos:
-+      push_d();
-+      break;
-+    case vtos:
-+      /* nothing to do */
-+      break;
-+    default:
-+      ShouldNotReachHere();
++void MacroAssembler::nop() {
++  addi(x0, x0, 0);
++}
++
++void MacroAssembler::mv(Register Rd, Register Rs) {
++  if (Rd != Rs) {
++    addi(Rd, Rs, 0);
 +  }
 +}
 +
-+// Helpers for swap and dup
-+void InterpreterMacroAssembler::load_ptr(int n, Register val) {
-+  ld(val, Address(esp, Interpreter::expr_offset_in_bytes(n)));
++void MacroAssembler::notr(Register Rd, Register Rs) {
++  xori(Rd, Rs, -1);
 +}
 +
-+void InterpreterMacroAssembler::store_ptr(int n, Register val) {
-+  sd(val, Address(esp, Interpreter::expr_offset_in_bytes(n)));
++void MacroAssembler::neg(Register Rd, Register Rs) {
++  sub(Rd, x0, Rs);
 +}
 +
-+void InterpreterMacroAssembler::load_float(Address src) {
-+  flw(f10, src);
++void MacroAssembler::negw(Register Rd, Register Rs) {
++  subw(Rd, x0, Rs);
 +}
 +
-+void InterpreterMacroAssembler::load_double(Address src) {
-+  fld(f10, src);
++void MacroAssembler::sext_w(Register Rd, Register Rs) {
++  addiw(Rd, Rs, 0);
 +}
 +
-+void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() {
-+  // set sender sp
-+  mv(x30, sp);
-+  // record last_sp
-+  sd(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++void MacroAssembler::zext_b(Register Rd, Register Rs) {
++  andi(Rd, Rs, 0xFF);
 +}
 +
-+// Jump to from_interpreted entry of a call unless single stepping is possible
-+// in this thread in which case we must call the i2i entry
-+void InterpreterMacroAssembler::jump_from_interpreted(Register method) {
-+  prepare_to_jump_from_interpreted();
-+  if (JvmtiExport::can_post_interpreter_events()) {
-+    Label run_compiled_code;
-+    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
-+    // compiled code in threads for which the event is enabled.  Check here for
-+    // interp_only_mode if these events CAN be enabled.
-+    lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset()));
-+    beqz(t0, run_compiled_code);
-+    ld(t0, Address(method, Method::interpreter_entry_offset()));
-+    jr(t0);
-+    bind(run_compiled_code);
-+  }
++void MacroAssembler::seqz(Register Rd, Register Rs) {
++  sltiu(Rd, Rs, 1);
++}
 +
-+  ld(t0, Address(method, Method::from_interpreted_offset()));
-+  jr(t0);
++void MacroAssembler::snez(Register Rd, Register Rs) {
++  sltu(Rd, x0, Rs);
 +}
 +
-+// The following two routines provide a hook so that an implementation
-+// can schedule the dispatch in two parts.  amd64 does not do this.
-+void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) {
++void MacroAssembler::sltz(Register Rd, Register Rs) {
++  slt(Rd, Rs, x0);
 +}
 +
-+void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
-+  dispatch_next(state, step);
++void MacroAssembler::sgtz(Register Rd, Register Rs) {
++  slt(Rd, x0, Rs);
 +}
 +
-+void InterpreterMacroAssembler::dispatch_base(TosState state,
-+                                              address* table,
-+                                              bool verifyoop,
-+                                              bool generate_poll,
-+                                              Register Rs) {
-+  // Pay attention to the argument Rs, which is acquiesce in t0.
-+  if (VerifyActivationFrameSize) {
-+    Unimplemented();
-+  }
-+  if (verifyoop && state == atos) {
-+    verify_oop(x10);
++void MacroAssembler::fmv_s(FloatRegister Rd, FloatRegister Rs) {
++  if (Rd != Rs) {
++    fsgnj_s(Rd, Rs, Rs);
 +  }
++}
 +
-+  Label safepoint;
-+  address* const safepoint_table = Interpreter::safept_table(state);
-+  bool needs_thread_local_poll = generate_poll && table != safepoint_table;
++void MacroAssembler::fabs_s(FloatRegister Rd, FloatRegister Rs) {
++  fsgnjx_s(Rd, Rs, Rs);
++}
 +
-+  if (needs_thread_local_poll) {
-+    NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
-+    ld(t1, Address(xthread, JavaThread::polling_word_offset()));
-+    andi(t1, t1, SafepointMechanism::poll_bit());
-+    bnez(t1, safepoint);
-+  }
-+  if (table == Interpreter::dispatch_table(state)) {
-+    li(t1, Interpreter::distance_from_dispatch_table(state));
-+    add(t1, Rs, t1);
-+    shadd(t1, t1, xdispatch, t1, 3);
-+  } else {
-+    mv(t1, (address)table);
-+    shadd(t1, Rs, t1, Rs, 3);
-+  }
-+  ld(t1, Address(t1));
-+  jr(t1);
++void MacroAssembler::fneg_s(FloatRegister Rd, FloatRegister Rs) {
++  fsgnjn_s(Rd, Rs, Rs);
++}
 +
-+  if (needs_thread_local_poll) {
-+    bind(safepoint);
-+    la(t1, ExternalAddress((address)safepoint_table));
-+    shadd(t1, Rs, t1, Rs, 3);
-+    ld(t1, Address(t1));
-+    jr(t1);
++void MacroAssembler::fmv_d(FloatRegister Rd, FloatRegister Rs) {
++  if (Rd != Rs) {
++    fsgnj_d(Rd, Rs, Rs);
 +  }
 +}
 +
-+void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll, Register Rs) {
-+  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll, Rs);
++void MacroAssembler::fabs_d(FloatRegister Rd, FloatRegister Rs) {
++  fsgnjx_d(Rd, Rs, Rs);
 +}
 +
-+void InterpreterMacroAssembler::dispatch_only_normal(TosState state, Register Rs) {
-+  dispatch_base(state, Interpreter::normal_table(state), Rs);
++void MacroAssembler::fneg_d(FloatRegister Rd, FloatRegister Rs) {
++  fsgnjn_d(Rd, Rs, Rs);
 +}
 +
-+void InterpreterMacroAssembler::dispatch_only_noverify(TosState state, Register Rs) {
-+  dispatch_base(state, Interpreter::normal_table(state), false, Rs);
++void MacroAssembler::vmnot_m(VectorRegister vd, VectorRegister vs) {
++  vmnand_mm(vd, vs, vs);
 +}
 +
-+void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) {
-+  // load next bytecode
-+  load_unsigned_byte(t0, Address(xbcp, step));
-+  add(xbcp, xbcp, step);
-+  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
++void MacroAssembler::vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm) {
++  vnsrl_wx(vd, vs, x0, vm);
 +}
 +
-+void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
-+  // load current bytecode
-+  lbu(t0, Address(xbcp, 0));
-+  dispatch_base(state, table);
++void MacroAssembler::vfneg_v(VectorRegister vd, VectorRegister vs) {
++  vfsgnjn_vv(vd, vs, vs);
 +}
 +
-+// remove activation
-+//
-+// Apply stack watermark barrier.
-+// Unlock the receiver if this is a synchronized method.
-+// Unlock any Java monitors from syncronized blocks.
-+// Remove the activation from the stack.
-+//
-+// If there are locked Java monitors
-+//    If throw_monitor_exception
-+//       throws IllegalMonitorStateException
-+//    Else if install_monitor_exception
-+//       installs IllegalMonitorStateException
-+//    Else
-+//       no error processing
-+void InterpreterMacroAssembler::remove_activation(
-+                                TosState state,
-+                                bool throw_monitor_exception,
-+                                bool install_monitor_exception,
-+                                bool notify_jvmdi) {
-+  // Note: Registers x13 may be in use for the
-+  // result check if synchronized method
-+  Label unlocked, unlock, no_unlock;
-+
-+  // The below poll is for the stack watermark barrier. It allows fixing up frames lazily,
-+  // that would normally not be safe to use. Such bad returns into unsafe territory of
-+  // the stack, will call InterpreterRuntime::at_unwind.
-+  Label slow_path;
-+  Label fast_path;
-+  safepoint_poll(slow_path, true /* at_return */, false /* acquire */, false /* in_nmethod */);
-+  j(fast_path);
-+
-+  bind(slow_path);
-+  push(state);
-+  set_last_Java_frame(esp, fp, (address)pc(), t0);
-+  super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), xthread);
-+  reset_last_Java_frame(true);
-+  pop(state);
++void MacroAssembler::la(Register Rd, const address &dest) {
++  int64_t offset = dest - pc();
++  if (is_offset_in_range(offset, 32)) {
++    auipc(Rd, (int32_t)offset + 0x800);  //0x800, Note:the 11th sign bit
++    addi(Rd, Rd, ((int64_t)offset << 52) >> 52);
++  } else {
++    movptr(Rd, dest);
++  }
++}
 +
-+  bind(fast_path);
++void MacroAssembler::la(Register Rd, const Address &adr) {
++  InstructionMark im(this);
++  code_section()->relocate(inst_mark(), adr.rspec());
++  relocInfo::relocType rtype = adr.rspec().reloc()->type();
 +
-+  // get the value of _do_not_unlock_if_synchronized into x13
-+  const Address do_not_unlock_if_synchronized(xthread,
-+    in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
-+  lbu(x13, do_not_unlock_if_synchronized);
-+  sb(zr, do_not_unlock_if_synchronized); // reset the flag
++  switch (adr.getMode()) {
++    case Address::literal: {
++      if (rtype == relocInfo::none) {
++        li(Rd, (intptr_t)(adr.target()));
++      } else {
++        movptr(Rd, adr.target());
++      }
++      break;
++    }
++    case Address::base_plus_offset: {
++      int32_t offset = 0;
++      baseOffset(Rd, adr, offset);
++      addi(Rd, Rd, offset);
++      break;
++    }
++    default:
++      ShouldNotReachHere();
++  }
++}
 +
-+  // get method access flags
-+  ld(x11, Address(fp, frame::interpreter_frame_method_offset * wordSize));
-+  ld(x12, Address(x11, Method::access_flags_offset()));
-+  andi(t0, x12, JVM_ACC_SYNCHRONIZED);
-+  beqz(t0, unlocked);
++void MacroAssembler::la(Register Rd, Label &label) {
++  la(Rd, target(label));
++}
 +
-+  // Don't unlock anything if the _do_not_unlock_if_synchronized flag
-+  // is set.
-+  bnez(x13, no_unlock);
++#define INSN(NAME)                                                                \
++  void MacroAssembler::NAME##z(Register Rs, const address &dest) {                \
++    NAME(Rs, zr, dest);                                                           \
++  }                                                                               \
++  void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) {              \
++    NAME(Rs, zr, l, is_far);                                                      \
++  }                                                                               \
 +
-+  // unlock monitor
-+  push(state); // save result
++  INSN(beq);
++  INSN(bne);
++  INSN(blt);
++  INSN(ble);
++  INSN(bge);
++  INSN(bgt);
 +
-+  // BasicObjectLock will be first in list, since this is a
-+  // synchronized method. However, need to check that the object has
-+  // not been unlocked by an explicit monitorexit bytecode.
-+  const Address monitor(fp, frame::interpreter_frame_initial_sp_offset *
-+                        wordSize - (int) sizeof(BasicObjectLock));
-+  // We use c_rarg1 so that if we go slow path it will be the correct
-+  // register for unlock_object to pass to VM directly
-+  la(c_rarg1, monitor); // address of first monitor
++#undef INSN
 +
-+  ld(x10, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
-+  bnez(x10, unlock);
++// Float compare branch instructions
 +
-+  pop(state);
-+  if (throw_monitor_exception) {
-+    // Entry already unlocked, need to throw exception
-+    call_VM(noreg, CAST_FROM_FN_PTR(address,
-+                                    InterpreterRuntime::throw_illegal_monitor_state_exception));
-+    should_not_reach_here();
-+  } else {
-+    // Monitor already unlocked during a stack unroll. If requested,
-+    // install an illegal_monitor_state_exception.  Continue with
-+    // stack unrolling.
-+    if (install_monitor_exception) {
-+      call_VM(noreg, CAST_FROM_FN_PTR(address,
-+                                      InterpreterRuntime::new_illegal_monitor_state_exception));
-+    }
-+    j(unlocked);
++#define INSN(NAME, FLOATCMP, BRANCH)                                                                                   \
++  void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) {  \
++    FLOATCMP##_s(t0, Rs1, Rs2);                                                                                        \
++    BRANCH(t0, l, is_far);                                                                                             \
++  }                                                                                                                    \
++  void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \
++    FLOATCMP##_d(t0, Rs1, Rs2);                                                                                        \
++    BRANCH(t0, l, is_far);                                                                                             \
 +  }
 +
-+  bind(unlock);
-+  unlock_object(c_rarg1);
-+  pop(state);
++  INSN(beq, feq, bnez);
++  INSN(bne, feq, beqz);
 +
-+  // Check that for block-structured locking (i.e., that all locked
-+  // objects has been unlocked)
-+  bind(unlocked);
++#undef INSN
 +
-+  // x10: Might contain return value
 +
-+  // Check that all monitors are unlocked
-+  {
-+    Label loop, exception, entry, restart;
-+    const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
-+    const Address monitor_block_top(
-+      fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
-+    const Address monitor_block_bot(
-+      fp, frame::interpreter_frame_initial_sp_offset * wordSize);
++#define INSN(NAME, FLOATCMP1, FLOATCMP2)                                              \
++  void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,   \
++                                    bool is_far, bool is_unordered) {                 \
++    if (is_unordered) {                                                               \
++      /* jump if either source is NaN or condition is expected */                     \
++      FLOATCMP2##_s(t0, Rs2, Rs1);                                                    \
++      beqz(t0, l, is_far);                                                            \
++    } else {                                                                          \
++      /* jump if no NaN in source and condition is expected */                        \
++      FLOATCMP1##_s(t0, Rs1, Rs2);                                                    \
++      bnez(t0, l, is_far);                                                            \
++    }                                                                                 \
++  }                                                                                   \
++  void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,  \
++                                     bool is_far, bool is_unordered) {                \
++    if (is_unordered) {                                                               \
++      /* jump if either source is NaN or condition is expected */                     \
++      FLOATCMP2##_d(t0, Rs2, Rs1);                                                    \
++      beqz(t0, l, is_far);                                                            \
++    } else {                                                                          \
++      /* jump if no NaN in source and condition is expected */                        \
++      FLOATCMP1##_d(t0, Rs1, Rs2);                                                    \
++      bnez(t0, l, is_far);                                                            \
++    }                                                                                 \
++  }
 +
-+    bind(restart);
-+    // We use c_rarg1 so that if we go slow path it will be the correct
-+    // register for unlock_object to pass to VM directly
-+    ld(c_rarg1, monitor_block_top); // points to current entry, starting
-+                                     // with top-most entry
-+    la(x9, monitor_block_bot);  // points to word before bottom of
-+                                  // monitor block
++  INSN(ble, fle, flt);
++  INSN(blt, flt, fle);
 +
-+    j(entry);
++#undef INSN
 +
-+    // Entry already locked, need to throw exception
-+    bind(exception);
++#define INSN(NAME, CMP)                                                              \
++  void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,  \
++                                    bool is_far, bool is_unordered) {                \
++    float_##CMP(Rs2, Rs1, l, is_far, is_unordered);                                  \
++  }                                                                                  \
++  void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \
++                                     bool is_far, bool is_unordered) {               \
++    double_##CMP(Rs2, Rs1, l, is_far, is_unordered);                                 \
++  }
 +
-+    if (throw_monitor_exception) {
-+      // Throw exception
-+      MacroAssembler::call_VM(noreg,
-+                              CAST_FROM_FN_PTR(address, InterpreterRuntime::
-+                                               throw_illegal_monitor_state_exception));
++  INSN(bgt, blt);
++  INSN(bge, ble);
 +
-+      should_not_reach_here();
-+    } else {
-+      // Stack unrolling. Unlock object and install illegal_monitor_exception.
-+      // Unlock does not block, so don't have to worry about the frame.
-+      // We don't have to preserve c_rarg1 since we are going to throw an exception.
++#undef INSN
 +
-+      push(state);
-+      unlock_object(c_rarg1);
-+      pop(state);
 +
-+      if (install_monitor_exception) {
-+        call_VM(noreg, CAST_FROM_FN_PTR(address,
-+                                        InterpreterRuntime::
-+                                        new_illegal_monitor_state_exception));
-+      }
++#define INSN(NAME, CSR)                       \
++  void MacroAssembler::NAME(Register Rd) {    \
++    csrr(Rd, CSR);                            \
++  }
 +
-+      j(restart);
-+    }
++  INSN(rdinstret,  CSR_INSTERT);
++  INSN(rdcycle,    CSR_CYCLE);
++  INSN(rdtime,     CSR_TIME);
++  INSN(frcsr,      CSR_FCSR);
++  INSN(frrm,       CSR_FRM);
++  INSN(frflags,    CSR_FFLAGS);
 +
-+    bind(loop);
-+    // check if current entry is used
-+    add(t0, c_rarg1, BasicObjectLock::obj_offset_in_bytes());
-+    ld(t0, Address(t0, 0));
-+    bnez(t0, exception);
++#undef INSN
 +
-+    add(c_rarg1, c_rarg1, entry_size); // otherwise advance to next entry
-+    bind(entry);
-+    bne(c_rarg1, x9, loop); // check if bottom reached if not at bottom then check this entry
++void MacroAssembler::csrr(Register Rd, unsigned csr) {
++  csrrs(Rd, csr, x0);
++}
++
++#define INSN(NAME, OPFUN)                                      \
++  void MacroAssembler::NAME(unsigned csr, Register Rs) {       \
++    OPFUN(x0, csr, Rs);                                        \
 +  }
 +
-+  bind(no_unlock);
++  INSN(csrw, csrrw);
++  INSN(csrs, csrrs);
++  INSN(csrc, csrrc);
 +
-+  // jvmti support
-+  if (notify_jvmdi) {
-+    notify_method_exit(state, NotifyJVMTI);    // preserve TOSCA
++#undef INSN
 +
-+  } else {
-+    notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA
++#define INSN(NAME, OPFUN)                                      \
++  void MacroAssembler::NAME(unsigned csr, unsigned imm) {      \
++    OPFUN(x0, csr, imm);                                       \
 +  }
 +
-+  // remove activation
-+  // get sender esp
-+  ld(t1,
-+     Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize));
-+  if (StackReservedPages > 0) {
-+    // testing if reserved zone needs to be re-enabled
-+    Label no_reserved_zone_enabling;
++  INSN(csrwi, csrrwi);
++  INSN(csrsi, csrrsi);
++  INSN(csrci, csrrci);
 +
-+    ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset()));
-+    ble(t1, t0, no_reserved_zone_enabling);
++#undef INSN
 +
-+    call_VM_leaf(
-+      CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), xthread);
-+    call_VM(noreg, CAST_FROM_FN_PTR(address,
-+                                    InterpreterRuntime::throw_delayed_StackOverflowError));
-+    should_not_reach_here();
++#define INSN(NAME, CSR)                                      \
++  void MacroAssembler::NAME(Register Rd, Register Rs) {      \
++    csrrw(Rd, CSR, Rs);                                      \
++  }
 +
-+    bind(no_reserved_zone_enabling);
++  INSN(fscsr,   CSR_FCSR);
++  INSN(fsrm,    CSR_FRM);
++  INSN(fsflags, CSR_FFLAGS);
++
++#undef INSN
++
++#define INSN(NAME)                              \
++  void MacroAssembler::NAME(Register Rs) {      \
++    NAME(x0, Rs);                               \
 +  }
 +
-+  // restore sender esp
-+  mv(esp, t1);
++  INSN(fscsr);
++  INSN(fsrm);
++  INSN(fsflags);
 +
-+  // remove frame anchor
-+  leave();
-+  // If we're returning to interpreted code we will shortly be
-+  // adjusting SP to allow some space for ESP.  If we're returning to
-+  // compiled code the saved sender SP was saved in sender_sp, so this
-+  // restores it.
-+  andi(sp, esp, -16);
++#undef INSN
++
++void MacroAssembler::fsrmi(Register Rd, unsigned imm) {
++  guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register");
++  csrrwi(Rd, CSR_FRM, imm);
 +}
 +
-+// Lock object
-+//
-+// Args:
-+//      c_rarg1: BasicObjectLock to be used for locking
-+//
-+// Kills:
-+//      x10
-+//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs)
-+//      t0, t1 (temp regs)
-+void InterpreterMacroAssembler::lock_object(Register lock_reg)
-+{
-+  assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1");
-+  if (UseHeavyMonitors) {
-+    call_VM(noreg,
-+            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
-+            lock_reg);
-+  } else {
-+    Label done;
-+
-+    const Register swap_reg = x10;
-+    const Register tmp = c_rarg2;
-+    const Register obj_reg = c_rarg3; // Will contain the oop
-+
-+    const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
-+    const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
-+    const int mark_offset = lock_offset +
-+                            BasicLock::displaced_header_offset_in_bytes();
-+
-+    Label slow_case;
-+
-+    // Load object pointer into obj_reg c_rarg3
-+    ld(obj_reg, Address(lock_reg, obj_offset));
-+
-+    if (DiagnoseSyncOnValueBasedClasses != 0) {
-+      load_klass(tmp, obj_reg);
-+      lwu(tmp, Address(tmp, Klass::access_flags_offset()));
-+      andi(tmp, tmp, JVM_ACC_IS_VALUE_BASED_CLASS);
-+      bnez(tmp, slow_case);
-+    }
++void MacroAssembler::fsflagsi(Register Rd, unsigned imm) {
++   csrrwi(Rd, CSR_FFLAGS, imm);
++}
 +
-+    // Load (object->mark() | 1) into swap_reg
-+    ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-+    ori(swap_reg, t0, 1);
++#define INSN(NAME)                             \
++  void MacroAssembler::NAME(unsigned imm) {    \
++    NAME(x0, imm);                             \
++  }
 +
-+    // Save (object->mark() | 1) into BasicLock's displaced header
-+    sd(swap_reg, Address(lock_reg, mark_offset));
++  INSN(fsrmi);
++  INSN(fsflagsi);
 +
-+    assert(lock_offset == 0,
-+           "displached header must be first word in BasicObjectLock");
++#undef INSN
 +
-+    cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL);
++void MacroAssembler::push_reg(Register Rs)
++{
++  addi(esp, esp, 0 - wordSize);
++  sd(Rs, Address(esp, 0));
++}
 +
-+    // Test if the oopMark is an obvious stack pointer, i.e.,
-+    //  1) (mark & 7) == 0, and
-+    //  2) sp <= mark < mark + os::pagesize()
-+    //
-+    // These 3 tests can be done by evaluating the following
-+    // expression: ((mark - sp) & (7 - os::vm_page_size())),
-+    // assuming both stack pointer and pagesize have their
-+    // least significant 3 bits clear.
-+    // NOTE: the oopMark is in swap_reg x10 as the result of cmpxchg
-+    sub(swap_reg, swap_reg, sp);
-+    li(t0, (int64_t)(7 - os::vm_page_size()));
-+    andr(swap_reg, swap_reg, t0);
++void MacroAssembler::pop_reg(Register Rd)
++{
++  ld(Rd, esp, 0);
++  addi(esp, esp, wordSize);
++}
 +
-+    // Save the test result, for recursive case, the result is zero
-+    sd(swap_reg, Address(lock_reg, mark_offset));
-+    beqz(swap_reg, done);
++int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) {
++  int count = 0;
++  // Scan bitset to accumulate register pairs
++  for (int reg = 31; reg >= 0; reg--) {
++    if ((1U << 31) & bitset) {
++      regs[count++] = reg;
++    }
++    bitset <<= 1;
++  }
++  return count;
++}
 +
-+    bind(slow_case);
++// Push lots of registers in the bit set supplied.  Don't push sp.
++// Return the number of words pushed
++int MacroAssembler::push_reg(unsigned int bitset, Register stack) {
++  DEBUG_ONLY(int words_pushed = 0;)
++  CompressibleRegion cr(this);
 +
-+    // Call the runtime routine for slow case
-+    call_VM(noreg,
-+            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
-+            lock_reg);
++  unsigned char regs[32];
++  int count = bitset_to_regs(bitset, regs);
++  // reserve one slot to align for odd count
++  int offset = is_even(count) ? 0 : wordSize;
 +
-+    bind(done);
++  if (count) {
++    addi(stack, stack, - count * wordSize - offset);
++  }
++  for (int i = count - 1; i >= 0; i--) {
++    sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
++    DEBUG_ONLY(words_pushed ++;)
 +  }
-+}
-+
 +
-+// Unlocks an object. Used in monitorexit bytecode and
-+// remove_activation.  Throws an IllegalMonitorException if object is
-+// not locked by current thread.
-+//
-+// Args:
-+//      c_rarg1: BasicObjectLock for lock
-+//
-+// Kills:
-+//      x10
-+//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, ... (param regs)
-+//      t0, t1 (temp regs)
-+void InterpreterMacroAssembler::unlock_object(Register lock_reg)
-+{
-+  assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1");
++  assert(words_pushed == count, "oops, pushed != count");
 +
-+  if (UseHeavyMonitors) {
-+    call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
-+  } else {
-+    Label done;
++  return count;
++}
 +
-+    const Register swap_reg   = x10;
-+    const Register header_reg = c_rarg2;  // Will contain the old oopMark
-+    const Register obj_reg    = c_rarg3;  // Will contain the oop
++int MacroAssembler::pop_reg(unsigned int bitset, Register stack) {
++  DEBUG_ONLY(int words_popped = 0;)
++  CompressibleRegion cr(this);
 +
-+    save_bcp(); // Save in case of exception
++  unsigned char regs[32];
++  int count = bitset_to_regs(bitset, regs);
++  // reserve one slot to align for odd count
++  int offset = is_even(count) ? 0 : wordSize;
 +
-+    // Convert from BasicObjectLock structure to object and BasicLock
-+    // structure Store the BasicLock address into x10
-+    la(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes()));
++  for (int i = count - 1; i >= 0; i--) {
++    ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
++    DEBUG_ONLY(words_popped ++;)
++  }
 +
-+    // Load oop into obj_reg(c_rarg3)
-+    ld(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
++  if (count) {
++    addi(stack, stack, count * wordSize + offset);
++  }
++  assert(words_popped == count, "oops, popped != count");
 +
-+    // Free entry
-+    sd(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
++  return count;
++}
 +
-+    // Load the old header from BasicLock structure
-+    ld(header_reg, Address(swap_reg,
-+                           BasicLock::displaced_header_offset_in_bytes()));
++// Push float registers in the bitset, except sp.
++// Return the number of heapwords pushed.
++int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
++  CompressibleRegion cr(this);
++  int words_pushed = 0;
++  unsigned char regs[32];
++  int count = bitset_to_regs(bitset, regs);
++  int push_slots = count + (count & 1);
 +
-+    // Test for recursion
-+    beqz(header_reg, done);
++  if (count) {
++    addi(stack, stack, -push_slots * wordSize);
++  }
 +
-+    // Atomic swap back the old header
-+    cmpxchg_obj_header(swap_reg, header_reg, obj_reg, t0, done, /*fallthrough*/NULL);
++  for (int i = count - 1; i >= 0; i--) {
++    fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize));
++    words_pushed++;
++  }
 +
-+    // Call the runtime routine for slow case.
-+    sd(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj
-+    call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
++  assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count);
++  return count;
++}
 +
-+    bind(done);
++int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
++  CompressibleRegion cr(this);
++  int words_popped = 0;
++  unsigned char regs[32];
++  int count = bitset_to_regs(bitset, regs);
++  int pop_slots = count + (count & 1);
 +
-+    restore_bcp();
++  for (int i = count - 1; i >= 0; i--) {
++    fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize));
++    words_popped++;
 +  }
-+}
 +
++  if (count) {
++    addi(stack, stack, pop_slots * wordSize);
++  }
 +
-+void InterpreterMacroAssembler::test_method_data_pointer(Register mdp,
-+                                                         Label& zero_continue) {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+  ld(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
-+  beqz(mdp, zero_continue);
++  assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count);
++  return count;
 +}
 +
-+// Set the method data pointer for the current bcp.
-+void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+  Label set_mdp;
-+  push_reg(0xc00, sp); // save x10, x11
++void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) {
++  CompressibleRegion cr(this);
++  // Push integer registers x7, x10-x17, x28-x31.
++  push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
 +
-+  // Test MDO to avoid the call if it is NULL.
-+  ld(x10, Address(xmethod, in_bytes(Method::method_data_offset())));
-+  beqz(x10, set_mdp);
-+  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), xmethod, xbcp);
-+  // x10: mdi
-+  // mdo is guaranteed to be non-zero here, we checked for it before the call.
-+  ld(x11, Address(xmethod, in_bytes(Method::method_data_offset())));
-+  la(x11, Address(x11, in_bytes(MethodData::data_offset())));
-+  add(x10, x11, x10);
-+  sd(x10, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
-+  bind(set_mdp);
-+  pop_reg(0xc00, sp);
++  // Push float registers f0-f7, f10-f17, f28-f31.
++  addi(sp, sp, - wordSize * 20);
++  int offset = 0;
++  for (int i = 0; i < 32; i++) {
++    if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
++      fsd(as_FloatRegister(i), Address(sp, wordSize * (offset ++)));
++    }
++  }
 +}
 +
-+void InterpreterMacroAssembler::verify_method_data_pointer() {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+#ifdef ASSERT
-+  Label verify_continue;
-+  add(sp, sp, -4 * wordSize);
-+  sd(x10, Address(sp, 0));
-+  sd(x11, Address(sp, wordSize));
-+  sd(x12, Address(sp, 2 * wordSize));
-+  sd(x13, Address(sp, 3 * wordSize));
-+  test_method_data_pointer(x13, verify_continue); // If mdp is zero, continue
-+  get_method(x11);
++void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) {
++  CompressibleRegion cr(this);
++  int offset = 0;
++  for (int i = 0; i < 32; i++) {
++    if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
++      fld(as_FloatRegister(i), Address(sp, wordSize * (offset ++)));
++    }
++  }
++  addi(sp, sp, wordSize * 20);
 +
-+  // If the mdp is valid, it will point to a DataLayout header which is
-+  // consistent with the bcp.  The converse is highly probable also.
-+  lh(x12, Address(x13, in_bytes(DataLayout::bci_offset())));
-+  ld(t0, Address(x11, Method::const_offset()));
-+  add(x12, x12, t0);
-+  la(x12, Address(x12, ConstMethod::codes_offset()));
-+  beq(x12, xbcp, verify_continue);
-+  // x10: method
-+  // xbcp: bcp // xbcp == 22
-+  // x13: mdp
-+  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp),
-+               x11, xbcp, x13);
-+  bind(verify_continue);
-+  ld(x10, Address(sp, 0));
-+  ld(x11, Address(sp, wordSize));
-+  ld(x12, Address(sp, 2 * wordSize));
-+  ld(x13, Address(sp, 3 * wordSize));
-+  add(sp, sp, 4 * wordSize);
-+#endif // ASSERT
++  pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
 +}
 +
++// Push all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4).
++void MacroAssembler::pusha() {
++  CompressibleRegion cr(this);
++  push_reg(0xffffffe2, sp);
++}
 +
-+void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in,
-+                                                int constant,
-+                                                Register value) {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+  Address data(mdp_in, constant);
-+  sd(value, data);
++// Pop all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4).
++void MacroAssembler::popa() {
++  CompressibleRegion cr(this);
++  pop_reg(0xffffffe2, sp);
 +}
 +
++void MacroAssembler::push_CPU_state() {
++  CompressibleRegion cr(this);
++  // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
++  push_reg(0xffffffe0, sp);
 +
-+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
-+                                                      int constant,
-+                                                      bool decrement) {
-+  increment_mdp_data_at(mdp_in, noreg, constant, decrement);
++  // float registers
++  addi(sp, sp, - 32 * wordSize);
++  for (int i = 0; i < 32; i++) {
++    fsd(as_FloatRegister(i), Address(sp, i * wordSize));
++  }
 +}
 +
-+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
-+                                                      Register reg,
-+                                                      int constant,
-+                                                      bool decrement) {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+  // %%% this does 64bit counters at best it is wasting space
-+  // at worst it is a rare bug when counters overflow
-+
-+  assert_different_registers(t1, t0, mdp_in, reg);
++void MacroAssembler::pop_CPU_state() {
++  CompressibleRegion cr(this);
 +
-+  Address addr1(mdp_in, constant);
-+  Address addr2(t1, 0);
-+  Address &addr = addr1;
-+  if (reg != noreg) {
-+    la(t1, addr1);
-+    add(t1, t1, reg);
-+    addr = addr2;
++  // float registers
++  for (int i = 0; i < 32; i++) {
++    fld(as_FloatRegister(i), Address(sp, i * wordSize));
 +  }
++  addi(sp, sp, 32 * wordSize);
 +
-+  if (decrement) {
-+    ld(t0, addr);
-+    addi(t0, t0, -DataLayout::counter_increment);
-+    Label L;
-+    bltz(t0, L);      // skip store if counter underflow
-+    sd(t0, addr);
-+    bind(L);
-+  } else {
-+    assert(DataLayout::counter_increment == 1,
-+           "flow-free idiom only works with 1");
-+    ld(t0, addr);
-+    addi(t0, t0, DataLayout::counter_increment);
-+    Label L;
-+    blez(t0, L);       // skip store if counter overflow
-+    sd(t0, addr);
-+    bind(L);
-+  }
++  // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
++  pop_reg(0xffffffe0, sp);
 +}
 +
-+void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
-+                                                int flag_byte_constant) {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+  int flags_offset = in_bytes(DataLayout::flags_offset());
-+  // Set the flag
-+  lbu(t1, Address(mdp_in, flags_offset));
-+  ori(t1, t1, flag_byte_constant);
-+  sb(t1, Address(mdp_in, flags_offset));
++static int patch_offset_in_jal(address branch, int64_t offset) {
++  assert(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction!\n");
++  Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1);                       // offset[20]    ==> branch[31]
++  Assembler::patch(branch, 30, 21, (offset >> 1)  & 0x3ff);                     // offset[10:1]  ==> branch[30:21]
++  Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1);                       // offset[11]    ==> branch[20]
++  Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff);                      // offset[19:12] ==> branch[19:12]
++  return NativeInstruction::instruction_size;                                   // only one instruction
 +}
 +
-+
-+void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in,
-+                                                 int offset,
-+                                                 Register value,
-+                                                 Register test_value_out,
-+                                                 Label& not_equal_continue) {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+  if (test_value_out == noreg) {
-+    ld(t1, Address(mdp_in, offset));
-+    bne(value, t1, not_equal_continue);
-+  } else {
-+    // Put the test value into a register, so caller can use it:
-+    ld(test_value_out, Address(mdp_in, offset));
-+    bne(value, test_value_out, not_equal_continue);
-+  }
++static int patch_offset_in_conditional_branch(address branch, int64_t offset) {
++  assert(is_imm_in_range(offset, 12, 1), "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne insrusction!\n");
++  Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1);                       // offset[12]    ==> branch[31]
++  Assembler::patch(branch, 30, 25, (offset >> 5)  & 0x3f);                      // offset[10:5]  ==> branch[30:25]
++  Assembler::patch(branch, 7,  7,  (offset >> 11) & 0x1);                       // offset[11]    ==> branch[7]
++  Assembler::patch(branch, 11, 8,  (offset >> 1)  & 0xf);                       // offset[4:1]   ==> branch[11:8]
++  return NativeInstruction::instruction_size;                                   // only one instruction
 +}
 +
++static int patch_offset_in_pc_relative(address branch, int64_t offset) {
++  const int PC_RELATIVE_INSTRUCTION_NUM = 2;                                    // auipc, addi/jalr/load
++  Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff);         // Auipc.          offset[31:12]  ==> branch[31:12]
++  Assembler::patch(branch + 4, 31, 20, offset & 0xfff);                         // Addi/Jalr/Load. offset[11:0]   ==> branch[31:20]
++  return PC_RELATIVE_INSTRUCTION_NUM * NativeInstruction::instruction_size;
++}
 +
-+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
-+                                                     int offset_of_disp) {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+  ld(t1, Address(mdp_in, offset_of_disp));
-+  add(mdp_in, mdp_in, t1);
-+  sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
++static int patch_addr_in_movptr(address branch, address target) {
++  const int MOVPTR_INSTRUCTIONS_NUM = 6;                                        // lui + addi + slli + addi + slli + addi/jalr/load
++  int32_t lower = ((intptr_t)target << 35) >> 35;
++  int64_t upper = ((intptr_t)target - lower) >> 29;
++  Assembler::patch(branch + 0,  31, 12, upper & 0xfffff);                       // Lui.             target[48:29] + target[28] ==> branch[31:12]
++  Assembler::patch(branch + 4,  31, 20, (lower >> 17) & 0xfff);                 // Addi.            target[28:17] ==> branch[31:20]
++  Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff);                  // Addi.            target[16: 6] ==> branch[31:20]
++  Assembler::patch(branch + 20, 31, 20, lower & 0x3f);                          // Addi/Jalr/Load.  target[ 5: 0] ==> branch[31:20]
++  return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
 +}
 +
-+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
-+                                                     Register reg,
-+                                                     int offset_of_disp) {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+  add(t1, mdp_in, reg);
-+  ld(t1, Address(t1, offset_of_disp));
-+  add(mdp_in, mdp_in, t1);
-+  sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
++static int patch_imm_in_li64(address branch, address target) {
++  const int LI64_INSTRUCTIONS_NUM = 8;                                          // lui + addi + slli + addi + slli + addi + slli + addi
++  int64_t lower = (intptr_t)target & 0xffffffff;
++  lower = lower - ((lower << 44) >> 44);
++  int64_t tmp_imm = ((uint64_t)((intptr_t)target & 0xffffffff00000000)) + (uint64_t)lower;
++  int32_t upper =  (tmp_imm - (int32_t)lower) >> 32;
++  int64_t tmp_upper = upper, tmp_lower = upper;
++  tmp_lower = (tmp_lower << 52) >> 52;
++  tmp_upper -= tmp_lower;
++  tmp_upper >>= 12;
++  // Load upper 32 bits. Upper = target[63:32], but if target[31] = 1 or (target[31:28] == 0x7ff && target[19] == 1),
++  // upper = target[63:32] + 1.
++  Assembler::patch(branch + 0,  31, 12, tmp_upper & 0xfffff);                       // Lui.
++  Assembler::patch(branch + 4,  31, 20, tmp_lower & 0xfff);                         // Addi.
++  // Load the rest 32 bits.
++  Assembler::patch(branch + 12, 31, 20, ((int32_t)lower >> 20) & 0xfff);            // Addi.
++  Assembler::patch(branch + 20, 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff);  // Addi.
++  Assembler::patch(branch + 28, 31, 20, (intptr_t)target & 0xff);                   // Addi.
++  return LI64_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
 +}
 +
++static int patch_imm_in_li32(address branch, int32_t target) {
++  const int LI32_INSTRUCTIONS_NUM = 2;                                          // lui + addiw
++  int64_t upper = (intptr_t)target;
++  int32_t lower = (((int32_t)target) << 20) >> 20;
++  upper -= lower;
++  upper = (int32_t)upper;
++  Assembler::patch(branch + 0,  31, 12, (upper >> 12) & 0xfffff);               // Lui.
++  Assembler::patch(branch + 4,  31, 20, lower & 0xfff);                         // Addiw.
++  return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
++}
 +
-+void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in,
-+                                                       int constant) {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+  addi(mdp_in, mdp_in, (unsigned)constant);
-+  sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
++static long get_offset_of_jal(address insn_addr) {
++  assert_cond(insn_addr != NULL);
++  long offset = 0;
++  unsigned insn = *(unsigned*)insn_addr;
++  long val = (long)Assembler::sextract(insn, 31, 12);
++  offset |= ((val >> 19) & 0x1) << 20;
++  offset |= (val & 0xff) << 12;
++  offset |= ((val >> 8) & 0x1) << 11;
++  offset |= ((val >> 9) & 0x3ff) << 1;
++  offset = (offset << 43) >> 43;
++  return offset;
 +}
 +
++static long get_offset_of_conditional_branch(address insn_addr) {
++  long offset = 0;
++  assert_cond(insn_addr != NULL);
++  unsigned insn = *(unsigned*)insn_addr;
++  offset = (long)Assembler::sextract(insn, 31, 31);
++  offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11);
++  offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5);
++  offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1);
++  offset = (offset << 41) >> 41;
++  return offset;
++}
 +
-+void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
++static long get_offset_of_pc_relative(address insn_addr) {
++  long offset = 0;
++  assert_cond(insn_addr != NULL);
++  offset = ((long)(Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12))) << 12;                                  // Auipc.
++  offset += ((long)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20));                                         // Addi/Jalr/Load.
++  offset = (offset << 32) >> 32;
++  return offset;
++}
 +
-+  // save/restore across call_VM
-+  addi(sp, sp, -2 * wordSize);
-+  sd(zr, Address(sp, 0));
-+  sd(return_bci, Address(sp, wordSize));
-+  call_VM(noreg,
-+          CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret),
-+          return_bci);
-+  ld(zr, Address(sp, 0));
-+  ld(return_bci, Address(sp, wordSize));
-+  addi(sp, sp, 2 * wordSize);
++static address get_target_of_movptr(address insn_addr) {
++  assert_cond(insn_addr != NULL);
++  intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 29;    // Lui.
++  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 17;                        // Addi.
++  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 6;                         // Addi.
++  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20));                              // Addi/Jalr/Load.
++  return (address) target_address;
 +}
 +
-+void InterpreterMacroAssembler::profile_taken_branch(Register mdp,
-+                                                     Register bumped_count) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
++static address get_target_of_li64(address insn_addr) {
++  assert_cond(insn_addr != NULL);
++  intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 44;    // Lui.
++  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 32;                        // Addi.
++  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 20;                        // Addi.
++  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)) << 8;                         // Addi.
++  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[7], 31, 20));                              // Addi.
++  return (address)target_address;
++}
 +
-+    // If no method data exists, go to profile_continue.
-+    // Otherwise, assign to mdp
-+    test_method_data_pointer(mdp, profile_continue);
++static address get_target_of_li32(address insn_addr) {
++  assert_cond(insn_addr != NULL);
++  intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 12;    // Lui.
++  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20));                              // Addiw.
++  return (address)target_address;
++}
 +
-+    // We are taking a branch.  Increment the taken count.
-+    Address data(mdp, in_bytes(JumpData::taken_offset()));
-+    ld(bumped_count, data);
-+    assert(DataLayout::counter_increment == 1,
-+            "flow-free idiom only works with 1");
-+    addi(bumped_count, bumped_count, DataLayout::counter_increment);
-+    Label L;
-+    // eg: bumped_count=0x7fff ffff ffff ffff  + 1 < 0. so we use <= 0;
-+    blez(bumped_count, L);       // skip store if counter overflow,
-+    sd(bumped_count, data);
-+    bind(L);
-+    // The method data pointer needs to be updated to reflect the new target.
-+    update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()));
-+    bind(profile_continue);
++// Patch any kind of instruction; there may be several instructions.
++// Return the total length (in bytes) of the instructions.
++int MacroAssembler::pd_patch_instruction_size(address branch, address target) {
++  assert_cond(branch != NULL);
++  int64_t offset = target - branch;
++  if (NativeInstruction::is_jal_at(branch)) {                         // jal
++    return patch_offset_in_jal(branch, offset);
++  } else if (NativeInstruction::is_branch_at(branch)) {               // beq/bge/bgeu/blt/bltu/bne
++    return patch_offset_in_conditional_branch(branch, offset);
++  } else if (NativeInstruction::is_pc_relative_at(branch)) {          // auipc, addi/jalr/load
++    return patch_offset_in_pc_relative(branch, offset);
++  } else if (NativeInstruction::is_movptr_at(branch)) {               // movptr
++    return patch_addr_in_movptr(branch, target);
++  } else if (NativeInstruction::is_li64_at(branch)) {                 // li64
++    return patch_imm_in_li64(branch, target);
++  } else if (NativeInstruction::is_li32_at(branch)) {                 // li32
++    int64_t imm = (intptr_t)target;
++    return patch_imm_in_li32(branch, (int32_t)imm);
++  } else {
++#ifdef ASSERT
++    tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n",
++                  *(unsigned*)branch, p2i(branch));
++    Disassembler::decode(branch - 16, branch + 16);
++#endif
++    ShouldNotReachHere();
++    return -1;
 +  }
 +}
 +
-+void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
-+
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
-+
-+    // We are taking a branch.  Increment the not taken count.
-+    increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()));
-+
-+    // The method data pointer needs to be updated to correspond to
-+    // the next bytecode
-+    update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size()));
-+    bind(profile_continue);
++address MacroAssembler::target_addr_for_insn(address insn_addr) {
++  long offset = 0;
++  assert_cond(insn_addr != NULL);
++  if (NativeInstruction::is_jal_at(insn_addr)) {                     // jal
++    offset = get_offset_of_jal(insn_addr);
++  } else if (NativeInstruction::is_branch_at(insn_addr)) {           // beq/bge/bgeu/blt/bltu/bne
++    offset = get_offset_of_conditional_branch(insn_addr);
++  } else if (NativeInstruction::is_pc_relative_at(insn_addr)) {      // auipc, addi/jalr/load
++    offset = get_offset_of_pc_relative(insn_addr);
++  } else if (NativeInstruction::is_movptr_at(insn_addr)) {           // movptr
++    return get_target_of_movptr(insn_addr);
++  } else if (NativeInstruction::is_li64_at(insn_addr)) {             // li64
++    return get_target_of_li64(insn_addr);
++  } else if (NativeInstruction::is_li32_at(insn_addr)) {             // li32
++    return get_target_of_li32(insn_addr);
++  } else {
++    ShouldNotReachHere();
 +  }
++  return address(((uintptr_t)insn_addr + offset));
 +}
 +
-+void InterpreterMacroAssembler::profile_call(Register mdp) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
-+
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
-+
-+    // We are making a call.  Increment the count.
-+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
-+
-+    // The method data pointer needs to be updated to reflect the new target.
-+    update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size()));
-+    bind(profile_continue);
++int MacroAssembler::patch_oop(address insn_addr, address o) {
++  // OOPs are either narrow (32 bits) or wide (48 bits).  We encode
++  // narrow OOPs by setting the upper 16 bits in the first
++  // instruction.
++  if (NativeInstruction::is_li32_at(insn_addr)) {
++    // Move narrow OOP
++    narrowOop n = CompressedOops::encode((oop)o);
++    return patch_imm_in_li32(insn_addr, (int32_t)n);
++  } else if (NativeInstruction::is_movptr_at(insn_addr)) {
++    // Move wide OOP
++    return patch_addr_in_movptr(insn_addr, o);
 +  }
++  ShouldNotReachHere();
++  return -1;
 +}
 +
-+void InterpreterMacroAssembler::profile_final_call(Register mdp) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
-+
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
-+
-+    // We are making a call.  Increment the count.
-+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
-+
-+    // The method data pointer needs to be updated to reflect the new target.
-+    update_mdp_by_constant(mdp,
-+                           in_bytes(VirtualCallData::
-+                                    virtual_call_data_size()));
-+    bind(profile_continue);
++void MacroAssembler::reinit_heapbase() {
++  if (UseCompressedOops) {
++    if (Universe::is_fully_initialized()) {
++      mv(xheapbase, Universe::narrow_ptrs_base());
++    } else {
++      int32_t offset = 0;
++      la_patchable(xheapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()), offset);
++      ld(xheapbase, Address(xheapbase, offset));
++    }
 +  }
 +}
 +
-+
-+void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
-+                                                     Register mdp,
-+                                                     Register reg2,
-+                                                     bool receiver_can_be_null) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
-+
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
-+
-+    Label skip_receiver_profile;
-+    if (receiver_can_be_null) {
-+      Label not_null;
-+      // We are making a call.  Increment the count for null receiver.
-+      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
-+      j(skip_receiver_profile);
-+      bind(not_null);
-+    }
-+
-+    // Record the receiver type.
-+    record_klass_in_profile(receiver, mdp, reg2, true);
-+    bind(skip_receiver_profile);
-+
-+    // The method data pointer needs to be updated to reflect the new target.
-+
-+    update_mdp_by_constant(mdp,
-+                           in_bytes(VirtualCallData::
-+                                    virtual_call_data_size()));
-+    bind(profile_continue);
-+  }
++void MacroAssembler::mv(Register Rd, Address dest) {
++  assert(dest.getMode() == Address::literal, "Address mode should be Address::literal");
++  code_section()->relocate(pc(), dest.rspec());
++  movptr(Rd, dest.target());
 +}
 +
-+// This routine creates a state machine for updating the multi-row
-+// type profile at a virtual call site (or other type-sensitive bytecode).
-+// The machine visits each row (of receiver/count) until the receiver type
-+// is found, or until it runs out of rows.  At the same time, it remembers
-+// the location of the first empty row.  (An empty row records null for its
-+// receiver, and can be allocated for a newly-observed receiver type.)
-+// Because there are two degrees of freedom in the state, a simple linear
-+// search will not work; it must be a decision tree.  Hence this helper
-+// function is recursive, to generate the required tree structured code.
-+// It's the interpreter, so we are trading off code space for speed.
-+// See below for example code.
-+void InterpreterMacroAssembler::record_klass_in_profile_helper(
-+                                Register receiver, Register mdp,
-+                                Register reg2,
-+                                Label& done, bool is_virtual_call) {
-+  if (TypeProfileWidth == 0) {
-+    if (is_virtual_call) {
-+      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
-+    }
++void MacroAssembler::mv(Register Rd, address addr) {
++  // Here in case of use with relocation, use fix length instruction
++  // movptr instead of li
++  movptr(Rd, addr);
++}
 +
++void MacroAssembler::mv(Register Rd, RegisterOrConstant src) {
++  if (src.is_register()) {
++    mv(Rd, src.as_register());
 +  } else {
-+    int non_profiled_offset = -1;
-+    if (is_virtual_call) {
-+      non_profiled_offset = in_bytes(CounterData::count_offset());
-+    }
-+
-+    record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth,
-+      &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset);
++    mv(Rd, src.as_constant());
 +  }
 +}
 +
-+void InterpreterMacroAssembler::record_item_in_profile_helper(
-+  Register item, Register mdp, Register reg2, int start_row, Label& done, int total_rows,
-+  OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, int non_profiled_offset) {
-+  int last_row = total_rows - 1;
-+  assert(start_row <= last_row, "must be work left to do");
-+  // Test this row for both the item and for null.
-+  // Take any of three different outcomes:
-+  //   1. found item => increment count and goto done
-+  //   2. found null => keep looking for case 1, maybe allocate this cell
-+  //   3. found something else => keep looking for cases 1 and 2
-+  // Case 3 is handled by a recursive call.
-+  for (int row = start_row; row <= last_row; row++) {
-+    Label next_test;
-+    bool test_for_null_also = (row == start_row);
-+
-+    // See if the item is item[n].
-+    int item_offset = in_bytes(item_offset_fn(row));
-+    test_mdp_data_at(mdp, item_offset, item,
-+                     (test_for_null_also ? reg2 : noreg),
-+                     next_test);
-+    // (Reg2 now contains the item from the CallData.)
-+
-+    // The item is item[n].  Increment count[n].
-+    int count_offset = in_bytes(item_count_offset_fn(row));
-+    increment_mdp_data_at(mdp, count_offset);
-+    j(done);
-+    bind(next_test);
-+
-+    if (test_for_null_also) {
-+      Label found_null;
-+      // Failed the equality check on item[n]...  Test for null.
-+      if (start_row == last_row) {
-+        // The only thing left to do is handle the null case.
-+        if (non_profiled_offset >= 0) {
-+          beqz(reg2, found_null);
-+          // Item did not match any saved item and there is no empty row for it.
-+          // Increment total counter to indicate polymorphic case.
-+          increment_mdp_data_at(mdp, non_profiled_offset);
-+          j(done);
-+          bind(found_null);
-+        } else {
-+          bnez(reg2, done);
-+        }
-+        break;
-+      }
-+      // Since null is rare, make it be the branch-taken case.
-+      beqz(reg2, found_null);
-+
-+      // Put all the "Case 3" tests here.
-+      record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows,
-+        item_offset_fn, item_count_offset_fn, non_profiled_offset);
-+
-+      // Found a null.  Keep searching for a matching item,
-+      // but remember that this is an empty (unused) slot.
-+      bind(found_null);
-+    }
-+  }
-+
-+  // In the fall-through case, we found no matching item, but we
-+  // observed the item[start_row] is NULL.
-+  // Fill in the item field and increment the count.
-+  int item_offset = in_bytes(item_offset_fn(start_row));
-+  set_mdp_data_at(mdp, item_offset, item);
-+  int count_offset = in_bytes(item_count_offset_fn(start_row));
-+  mv(reg2, DataLayout::counter_increment);
-+  set_mdp_data_at(mdp, count_offset, reg2);
-+  if (start_row > 0) {
-+    j(done);
-+  }
++void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) {
++  andr(Rd, Rs1, Rs2);
++  // addw: The result is clipped to 32 bits, then the sign bit is extended,
++  // and the result is stored in Rd
++  addw(Rd, Rd, zr);
 +}
 +
-+// Example state machine code for three profile rows:
-+//   # main copy of decision tree, rooted at row[1]
-+//   if (row[0].rec == rec) then [
-+//     row[0].incr()
-+//     goto done
-+//   ]
-+//   if (row[0].rec != NULL) then [
-+//     # inner copy of decision tree, rooted at row[1]
-+//     if (row[1].rec == rec) then [
-+//       row[1].incr()
-+//       goto done
-+//     ]
-+//     if (row[1].rec != NULL) then [
-+//       # degenerate decision tree, rooted at row[2]
-+//       if (row[2].rec == rec) then [
-+//         row[2].incr()
-+//         goto done
-+//       ]
-+//       if (row[2].rec != NULL) then [
-+//         count.incr()
-+//         goto done
-+//       ] # overflow
-+//       row[2].init(rec)
-+//       goto done
-+//     ] else [
-+//       # remember row[1] is empty
-+//       if (row[2].rec == rec) then [
-+//         row[2].incr()
-+//         goto done
-+//       ]
-+//       row[1].init(rec)
-+//       goto done
-+//     ]
-+//   else [
-+//     # remember row[0] is empty
-+//     if (row[1].rec == rec) then [
-+//       row[1].incr()
-+//       goto done
-+//     ]
-+//     if (row[2].rec == rec) then [
-+//       row[2].incr()
-+//       goto done
-+//     ]
-+//     row[0].init(rec)
-+//     goto done
-+//   ]
-+//   done:
-+
-+void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
-+                                                        Register mdp, Register reg2,
-+                                                        bool is_virtual_call) {
-+  assert(ProfileInterpreter, "must be profiling");
-+  Label done;
-+
-+  record_klass_in_profile_helper(receiver, mdp, reg2, done, is_virtual_call);
-+
-+  bind(done);
++void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) {
++  orr(Rd, Rs1, Rs2);
++  // addw: The result is clipped to 32 bits, then the sign bit is extended,
++  // and the result is stored in Rd
++  addw(Rd, Rd, zr);
 +}
 +
-+void InterpreterMacroAssembler::profile_ret(Register return_bci, Register mdp) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
-+
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
-+
-+    // Update the total ret count.
-+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
-+
-+    for (uint row = 0; row < RetData::row_limit(); row++) {
-+      Label next_test;
++void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) {
++  xorr(Rd, Rs1, Rs2);
++  // addw: The result is clipped to 32 bits, then the sign bit is extended,
++  // and the result is stored in Rd
++  addw(Rd, Rd, zr);
++}
 +
-+      // See if return_bci is equal to bci[n]:
-+      test_mdp_data_at(mdp,
-+                       in_bytes(RetData::bci_offset(row)),
-+                       return_bci, noreg,
-+                       next_test);
++// Note: load_unsigned_short used to be called load_unsigned_word.
++int MacroAssembler::load_unsigned_short(Register dst, Address src) {
++  int off = offset();
++  lhu(dst, src);
++  return off;
++}
 +
-+      // return_bci is equal to bci[n].  Increment the count.
-+      increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)));
++int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
++  int off = offset();
++  lbu(dst, src);
++  return off;
++}
 +
-+      // The method data pointer needs to be updated to reflect the new target.
-+      update_mdp_by_offset(mdp,
-+                           in_bytes(RetData::bci_displacement_offset(row)));
-+      j(profile_continue);
-+      bind(next_test);
-+    }
++int MacroAssembler::load_signed_short(Register dst, Address src) {
++  int off = offset();
++  lh(dst, src);
++  return off;
++}
 +
-+    update_mdp_for_ret(return_bci);
++int MacroAssembler::load_signed_byte(Register dst, Address src) {
++  int off = offset();
++  lb(dst, src);
++  return off;
++}
 +
-+    bind(profile_continue);
++void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
++  switch (size_in_bytes) {
++    case  8:  ld(dst, src); break;
++    case  4:  is_signed ? lw(dst, src) : lwu(dst, src); break;
++    case  2:  is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
++    case  1:  is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
++    default:  ShouldNotReachHere();
 +  }
 +}
 +
-+void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
-+
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
-+
-+    set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
-+
-+    // The method data pointer needs to be updated.
-+    int mdp_delta = in_bytes(BitData::bit_data_size());
-+    if (TypeProfileCasts) {
-+      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
-+    }
-+    update_mdp_by_constant(mdp, mdp_delta);
-+
-+    bind(profile_continue);
++void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
++  switch (size_in_bytes) {
++    case  8:  sd(src, dst); break;
++    case  4:  sw(src, dst); break;
++    case  2:  sh(src, dst); break;
++    case  1:  sb(src, dst); break;
++    default:  ShouldNotReachHere();
 +  }
 +}
 +
-+void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) {
-+    if (ProfileInterpreter && TypeProfileCasts) {
-+    Label profile_continue;
-+
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
-+
-+    int count_offset = in_bytes(CounterData::count_offset());
-+    // Back up the address, since we have already bumped the mdp.
-+    count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
-+
-+    // *Decrement* the counter.  We expect to see zero or small negatives.
-+    increment_mdp_data_at(mdp, count_offset, true);
-+
-+    bind (profile_continue);
++// reverse bytes in halfword in lower 16 bits and sign-extend
++// Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits)
++void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) {
++  if (UseZbb) {
++    rev8(Rd, Rs);
++    srai(Rd, Rd, 48);
++    return;
 +  }
++  assert_different_registers(Rs, tmp);
++  assert_different_registers(Rd, tmp);
++  srli(tmp, Rs, 8);
++  andi(tmp, tmp, 0xFF);
++  slli(Rd, Rs, 56);
++  srai(Rd, Rd, 48); // sign-extend
++  orr(Rd, Rd, tmp);
 +}
 +
-+void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
-+
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
-+
-+    // The method data pointer needs to be updated.
-+    int mdp_delta = in_bytes(BitData::bit_data_size());
-+    if (TypeProfileCasts) {
-+      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
-+
-+      // Record the object type.
-+      record_klass_in_profile(klass, mdp, reg2, false);
-+    }
-+    update_mdp_by_constant(mdp, mdp_delta);
-+
-+    bind(profile_continue);
++// reverse bytes in lower word and sign-extend
++// Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits)
++void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
++  if (UseZbb) {
++    rev8(Rd, Rs);
++    srai(Rd, Rd, 32);
++    return;
 +  }
++  assert_different_registers(Rs, tmp1, tmp2);
++  assert_different_registers(Rd, tmp1, tmp2);
++  revb_h_w_u(Rd, Rs, tmp1, tmp2);
++  slli(tmp2, Rd, 48);
++  srai(tmp2, tmp2, 32); // sign-extend
++  srli(Rd, Rd, 16);
++  orr(Rd, Rd, tmp2);
 +}
 +
-+void InterpreterMacroAssembler::profile_switch_default(Register mdp) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
-+
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
-+
-+    // Update the default case count
-+    increment_mdp_data_at(mdp,
-+                          in_bytes(MultiBranchData::default_count_offset()));
-+
-+    // The method data pointer needs to be updated.
-+    update_mdp_by_offset(mdp,
-+                         in_bytes(MultiBranchData::
-+                                  default_displacement_offset()));
++// reverse bytes in halfword in lower 16 bits and zero-extend
++// Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
++void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) {
++  if (UseZbb) {
++    rev8(Rd, Rs);
++    srli(Rd, Rd, 48);
++    return;
++  }
++  assert_different_registers(Rs, tmp);
++  assert_different_registers(Rd, tmp);
++  srli(tmp, Rs, 8);
++  andi(tmp, tmp, 0xFF);
++  andi(Rd, Rs, 0xFF);
++  slli(Rd, Rd, 8);
++  orr(Rd, Rd, tmp);
++}
 +
-+    bind(profile_continue);
++// reverse bytes in halfwords in lower 32 bits and zero-extend
++// Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
++void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) {
++  if (UseZbb) {
++    rev8(Rd, Rs);
++    rori(Rd, Rd, 32);
++    roriw(Rd, Rd, 16);
++    zero_extend(Rd, Rd, 32);
++    return;
 +  }
++  assert_different_registers(Rs, tmp1, tmp2);
++  assert_different_registers(Rd, tmp1, tmp2);
++  srli(tmp2, Rs, 16);
++  revb_h_h_u(tmp2, tmp2, tmp1);
++  revb_h_h_u(Rd, Rs, tmp1);
++  slli(tmp2, tmp2, 16);
++  orr(Rd, Rd, tmp2);
 +}
 +
-+void InterpreterMacroAssembler::profile_switch_case(Register index,
-+                                                    Register mdp,
-+                                                    Register reg2) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
-+
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
-+
-+    // Build the base (index * per_case_size_in_bytes()) +
-+    // case_array_offset_in_bytes()
-+    mvw(reg2, in_bytes(MultiBranchData::per_case_size()));
-+    mvw(t0, in_bytes(MultiBranchData::case_array_offset()));
-+    Assembler::mul(index, index, reg2);
-+    Assembler::add(index, index, t0);
-+
-+    // Update the case count
-+    increment_mdp_data_at(mdp,
-+                          index,
-+                          in_bytes(MultiBranchData::relative_count_offset()));
-+
-+    // The method data pointer need to be updated.
-+    update_mdp_by_offset(mdp,
-+                         index,
-+                         in_bytes(MultiBranchData::
-+                                  relative_displacement_offset()));
++// This method is only used for revb_h
++// Rd = Rs[47:0] Rs[55:48] Rs[63:56]
++void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) {
++  assert_different_registers(Rs, tmp1, tmp2);
++  assert_different_registers(Rd, tmp1);
++  srli(tmp1, Rs, 48);
++  andi(tmp2, tmp1, 0xFF);
++  slli(tmp2, tmp2, 8);
++  srli(tmp1, tmp1, 8);
++  orr(tmp1, tmp1, tmp2);
++  slli(Rd, Rs, 16);
++  orr(Rd, Rd, tmp1);
++}
 +
-+    bind(profile_continue);
++// reverse bytes in each halfword
++// Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8]
++void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) {
++  if (UseZbb) {
++    assert_different_registers(Rs, tmp1);
++    assert_different_registers(Rd, tmp1);
++    rev8(Rd, Rs);
++    zero_extend(tmp1, Rd, 32);
++    roriw(tmp1, tmp1, 16);
++    slli(tmp1, tmp1, 32);
++    srli(Rd, Rd, 32);
++    roriw(Rd, Rd, 16);
++    zero_extend(Rd, Rd, 32);
++    orr(Rd, Rd, tmp1);
++    return;
++  }
++  assert_different_registers(Rs, tmp1, tmp2);
++  assert_different_registers(Rd, tmp1, tmp2);
++  revb_h_helper(Rd, Rs, tmp1, tmp2);
++  for (int i = 0; i < 3; ++i) {
++    revb_h_helper(Rd, Rd, tmp1, tmp2);
 +  }
 +}
 +
-+void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ; }
-+
-+void InterpreterMacroAssembler::notify_method_entry() {
-+  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
-+  // track stack depth.  If it is possible to enter interp_only_mode we add
-+  // the code to check if the event should be sent.
-+  if (JvmtiExport::can_post_interpreter_events()) {
-+    Label L;
-+    lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset()));
-+    beqz(x13, L);
-+    call_VM(noreg, CAST_FROM_FN_PTR(address,
-+                                    InterpreterRuntime::post_method_entry));
-+    bind(L);
++// reverse bytes in each word
++// Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24]
++void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
++  if (UseZbb) {
++    rev8(Rd, Rs);
++    rori(Rd, Rd, 32);
++    return;
 +  }
++  assert_different_registers(Rs, tmp1, tmp2);
++  assert_different_registers(Rd, tmp1, tmp2);
++  revb(Rd, Rs, tmp1, tmp2);
++  ror_imm(Rd, Rd, 32);
++}
 +
-+  {
-+    SkipIfEqual skip(this, &DTraceMethodProbes, false);
-+    get_method(c_rarg1);
-+    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
-+                 xthread, c_rarg1);
++// reverse bytes in doubleword
++// Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56]
++void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) {
++  if (UseZbb) {
++    rev8(Rd, Rs);
++    return;
 +  }
-+
-+  // RedefineClasses() tracing support for obsolete method entry
-+  if (log_is_enabled(Trace, redefine, class, obsolete)) {
-+    get_method(c_rarg1);
-+    call_VM_leaf(
-+      CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
-+      xthread, c_rarg1);
++  assert_different_registers(Rs, tmp1, tmp2);
++  assert_different_registers(Rd, tmp1, tmp2);
++  andi(tmp1, Rs, 0xFF);
++  slli(tmp1, tmp1, 8);
++  for (int step = 8; step < 56; step += 8) {
++    srli(tmp2, Rs, step);
++    andi(tmp2, tmp2, 0xFF);
++    orr(tmp1, tmp1, tmp2);
++    slli(tmp1, tmp1, 8);
 +  }
++  srli(Rd, Rs, 56);
++  andi(Rd, Rd, 0xFF);
++  orr(Rd, tmp1, Rd);
 +}
 +
++// rotate right with shift bits
++void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp)
++{
++  if (UseZbb) {
++    rori(dst, src, shift);
++    return;
++  }
 +
-+void InterpreterMacroAssembler::notify_method_exit(
-+    TosState state, NotifyMethodExitMode mode) {
-+  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
-+  // track stack depth.  If it is possible to enter interp_only_mode we add
-+  // the code to check if the event should be sent.
-+  if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
-+    Label L;
-+    // Note: frame::interpreter_frame_result has a dependency on how the
-+    // method result is saved across the call to post_method_exit. If this
-+    // is changed then the interpreter_frame_result implementation will
-+    // need to be updated too.
++  assert_different_registers(dst, tmp);
++  assert_different_registers(src, tmp);
++  assert(shift < 64, "shift amount must be < 64");
++  slli(tmp, src, 64 - shift);
++  srli(dst, src, shift);
++  orr(dst, dst, tmp);
++}
 +
-+    // template interpreter will leave the result on the top of the stack.
-+    push(state);
-+    lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset()));
-+    beqz(x13, L);
-+    call_VM(noreg,
-+            CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
-+    bind(L);
-+    pop(state);
++void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) {
++  if (is_imm_in_range(imm, 12, 0)) {
++    and_imm12(Rd, Rn, imm);
++  } else {
++    assert_different_registers(Rn, tmp);
++    li(tmp, imm);
++    andr(Rd, Rn, tmp);
 +  }
++}
 +
-+  {
-+    SkipIfEqual skip(this, &DTraceMethodProbes, false);
-+    push(state);
-+    get_method(c_rarg1);
-+    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
-+                 xthread, c_rarg1);
-+    pop(state);
++void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) {
++  ld(tmp1, adr);
++  if (src.is_register()) {
++    orr(tmp1, tmp1, src.as_register());
++  } else {
++    if (is_imm_in_range(src.as_constant(), 12, 0)) {
++      ori(tmp1, tmp1, src.as_constant());
++    } else {
++      assert_different_registers(tmp1, tmp2);
++      li(tmp2, src.as_constant());
++      orr(tmp1, tmp1, tmp2);
++    }
 +  }
++  sd(tmp1, adr);
 +}
 +
-+
-+// Jump if ((*counter_addr += increment) & mask) satisfies the condition.
-+void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr,
-+                                                        int increment, Address mask,
-+                                                        Register tmp1, Register tmp2,
-+                                                        bool preloaded, Label* where) {
-+  Label done;
-+  if (!preloaded) {
-+    lwu(tmp1, counter_addr);
++void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L) {
++  if (UseCompressedClassPointers) {
++      lwu(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
++    if (Universe::narrow_klass_base() == NULL) {
++      slli(tmp, tmp, Universe::narrow_klass_shift());
++      beq(trial_klass, tmp, L);
++      return;
++    }
++    decode_klass_not_null(tmp);
++  } else {
++    ld(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
 +  }
-+  add(tmp1, tmp1, increment);
-+  sw(tmp1, counter_addr);
-+  lwu(tmp2, mask);
-+  andr(tmp1, tmp1, tmp2);
-+  bnez(tmp1, done);
-+  j(*where); // offset is too large so we have to use j instead of beqz here
-+  bind(done);
++  beq(trial_klass, tmp, L);
 +}
 +
-+void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point,
-+                                                  int number_of_arguments) {
-+  // interpreter specific
-+  //
-+  // Note: No need to save/restore rbcp & rlocals pointer since these
-+  //       are callee saved registers and no blocking/ GC can happen
-+  //       in leaf calls.
++// Move an oop into a register.  immediate is true if we want
++// immediate instructions, i.e. we are not going to patch this
++// instruction while the code is being executed by another thread.  In
++// that case we can use move immediates rather than the constant pool.
++void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
++  int oop_index;
++  if (obj == NULL) {
++    oop_index = oop_recorder()->allocate_oop_index(obj);
++  } else {
 +#ifdef ASSERT
-+  {
-+   Label L;
-+   ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
-+   beqz(t0, L);
-+   stop("InterpreterMacroAssembler::call_VM_leaf_base:"
-+        " last_sp != NULL");
-+   bind(L);
++    {
++      ThreadInVMfromUnknown tiv;
++      assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop");
++    }
++#endif
++    oop_index = oop_recorder()->find_index(obj);
 +  }
-+#endif /* ASSERT */
-+  // super call
-+  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
++  RelocationHolder rspec = oop_Relocation::spec(oop_index);
++  if (!immediate) {
++    address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address
++    ld_constant(dst, Address(dummy, rspec));
++  } else
++    mv(dst, Address((address)obj, rspec));
 +}
 +
-+void InterpreterMacroAssembler::call_VM_base(Register oop_result,
-+                                             Register java_thread,
-+                                             Register last_java_sp,
-+                                             address  entry_point,
-+                                             int      number_of_arguments,
-+                                             bool     check_exceptions) {
-+  // interpreter specific
-+  //
-+  // Note: Could avoid restoring locals ptr (callee saved) - however doesn't
-+  //       really make a difference for these runtime calls, since they are
-+  //       slow anyway. Btw., bcp must be saved/restored since it may change
-+  //       due to GC.
-+  save_bcp();
-+#ifdef ASSERT
-+  {
-+    Label L;
-+    ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
-+    beqz(t0, L);
-+    stop("InterpreterMacroAssembler::call_VM_base:"
-+         " last_sp != NULL");
-+    bind(L);
++// Move a metadata address into a register.
++void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
++  int oop_index;
++  if (obj == NULL) {
++    oop_index = oop_recorder()->allocate_metadata_index(obj);
++  } else {
++    oop_index = oop_recorder()->find_index(obj);
 +  }
-+#endif /* ASSERT */
-+  // super call
-+  MacroAssembler::call_VM_base(oop_result, noreg, last_java_sp,
-+                               entry_point, number_of_arguments,
-+                               check_exceptions);
-+// interpreter specific
-+  restore_bcp();
-+  restore_locals();
++  RelocationHolder rspec = metadata_Relocation::spec(oop_index);
++  mv(dst, Address((address)obj, rspec));
 +}
 +
-+void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr, Register tmp) {
-+  assert_different_registers(obj, tmp, t0, mdo_addr.base());
-+  Label update, next, none;
++// Writes to stack successive pages until offset reached to check for
++// stack overflow + shadow pages.  This clobbers tmp.
++void MacroAssembler::bang_stack_size(Register size, Register tmp) {
++  assert_different_registers(tmp, size, t0);
++  // Bang stack for total size given plus shadow page size.
++  // Bang one page at a time because large size can bang beyond yellow and
++  // red zones.
++  mv(t0, os::vm_page_size());
++  Label loop;
++  bind(loop);
++  sub(tmp, sp, t0);
++  subw(size, size, t0);
++  sd(size, Address(tmp));
++  bgtz(size, loop);
 +
-+  verify_oop(obj);
++  // Bang down shadow pages too.
++  // At this point, (tmp-0) is the last address touched, so don't
++  // touch it again.  (It was touched as (tmp-pagesize) but then tmp
++  // was post-decremented.)  Skip this address by starting at i=1, and
++  // touch a few more pages below.  N.B.  It is important to touch all
++  // the way down to and including i=StackShadowPages.
++  for (int i = 0; i < (int)(JavaThread::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) {
++    // this could be any sized move but this is can be a debugging crumb
++    // so the bigger the better.
++    sub(tmp, tmp, os::vm_page_size());
++    sd(size, Address(tmp, 0));
++  }
++}
 +
-+  bnez(obj, update);
-+  orptr(mdo_addr, TypeEntries::null_seen, t0, tmp);
-+  j(next);
++SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) {
++  assert_cond(masm != NULL);
++  int32_t offset = 0;
++  _masm = masm;
++  _masm->la_patchable(t0, ExternalAddress((address)flag_addr), offset);
++  _masm->lbu(t0, Address(t0, offset));
++  _masm->beqz(t0, _label);
++}
 +
-+  bind(update);
-+  load_klass(obj, obj);
++SkipIfEqual::~SkipIfEqual() {
++  assert_cond(_masm != NULL);
++  _masm->bind(_label);
++  _masm = NULL;
++}
 +
-+  ld(t0, mdo_addr);
-+  xorr(obj, obj, t0);
-+  andi(t0, obj, TypeEntries::type_klass_mask);
-+  beqz(t0, next); // klass seen before, nothing to
-+                  // do. The unknown bit may have been
-+                  // set already but no need to check.
++void MacroAssembler::load_mirror(Register dst, Register method, Register tmp) {
++  const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++  ld(dst, Address(xmethod, Method::const_offset()));
++  ld(dst, Address(dst, ConstMethod::constants_offset()));
++  ld(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes()));
++  ld(dst, Address(dst, mirror_offset));
++  resolve_oop_handle(dst, tmp);
++}
 +
-+  andi(t0, obj, TypeEntries::type_unknown);
-+  bnez(t0, next);
-+  // already unknown. Nothing to do anymore.
++void MacroAssembler::resolve_oop_handle(Register result, Register tmp) {
++  // OopHandle::resolve is an indirection.
++  assert_different_registers(result, tmp);
++  access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, noreg);
++}
 +
-+  ld(t0, mdo_addr);
-+  beqz(t0, none);
-+  li(tmp, (u1)TypeEntries::null_seen);
-+  beq(t0, tmp, none);
-+  // There is a chance that the checks above (re-reading profiling
-+  // data from memory) fail if another thread has just set the
-+  // profiling to this obj's klass
-+  ld(t0, mdo_addr);
-+  xorr(obj, obj, t0);
-+  andi(t0, obj, TypeEntries::type_klass_mask);
-+  beqz(t0, next);
++void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
++                                    Register dst, Address src,
++                                    Register tmp1, Register thread_tmp) {
++  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  decorators = AccessInternal::decorator_fixup(decorators);
++  bool as_raw = (decorators & AS_RAW) != 0;
++  if (as_raw) {
++    bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  } else {
++    bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  }
++}
 +
-+  // different than before. Cannot keep accurate profile.
-+  orptr(mdo_addr, TypeEntries::type_unknown, t0, tmp);
-+  j(next);
++void MacroAssembler::null_check(Register reg, int offset) {
++  if (needs_explicit_null_check(offset)) {
++    // provoke OS NULL exception if reg = NULL by
++    // accessing M[reg] w/o changing any registers
++    // NOTE: this is plenty to provoke a segv
++    ld(zr, Address(reg, 0));
++  } else {
++    // nothing to do, (later) access of M[reg + offset]
++    // will provoke OS NULL exception if reg = NULL
++  }
++}
 +
-+  bind(none);
-+  // first time here. Set profile type.
-+  sd(obj, mdo_addr);
++void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
++                                     Address dst, Register src,
++                                     Register tmp1, Register thread_tmp) {
++  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  decorators = AccessInternal::decorator_fixup(decorators);
++  bool as_raw = (decorators & AS_RAW) != 0;
++  if (as_raw) {
++    bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  } else {
++    bs->store_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  }
++}
 +
-+  bind(next);
++// Algorithm must match CompressedOops::encode.
++void MacroAssembler::encode_heap_oop(Register d, Register s) {
++  verify_oop(s, "broken oop in encode_heap_oop");
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      srli(d, s, LogMinObjAlignmentInBytes);
++    } else {
++      mv(d, s);
++    }
++  } else {
++    Label notNull;
++    sub(d, s, xheapbase);
++    bgez(d, notNull);
++    mv(d, zr);
++    bind(notNull);
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      srli(d, d, Universe::narrow_oop_shift());
++    }
++  }
 +}
 +
-+void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) {
-+  if (!ProfileInterpreter) {
-+    return;
++void MacroAssembler::load_klass(Register dst, Register src) {
++  if (UseCompressedClassPointers) {
++    lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
++    decode_klass_not_null(dst);
++  } else {
++    ld(dst, Address(src, oopDesc::klass_offset_in_bytes()));
 +  }
++}
 +
-+  if (MethodData::profile_arguments() || MethodData::profile_return()) {
-+    Label profile_continue;
++void MacroAssembler::store_klass(Register dst, Register src) {
++  // FIXME: Should this be a store release? concurrent gcs assumes
++  // klass length is valid if klass field is not null.
++  if (UseCompressedClassPointers) {
++    encode_klass_not_null(src);
++    sw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
++  } else {
++    sd(src, Address(dst, oopDesc::klass_offset_in_bytes()));
++  }
++}
 +
-+    test_method_data_pointer(mdp, profile_continue);
++void MacroAssembler::store_klass_gap(Register dst, Register src) {
++  if (UseCompressedClassPointers) {
++    // Store to klass gap in destination
++    sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
++  }
++}
 +
-+    int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
++void  MacroAssembler::decode_klass_not_null(Register r) {
++  decode_klass_not_null(r, r);
++}
 +
-+    lbu(t0, Address(mdp, in_bytes(DataLayout::tag_offset()) - off_to_start));
-+    if (is_virtual) {
-+      li(tmp, (u1)DataLayout::virtual_call_type_data_tag);
-+      bne(t0, tmp, profile_continue);
++void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) {
++  assert(UseCompressedClassPointers, "should only be used for compressed headers");
++
++  if (Universe::narrow_klass_base() == NULL) {
++    if (Universe::narrow_klass_shift() != 0) {
++      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++      slli(dst, src, LogKlassAlignmentInBytes);
 +    } else {
-+      li(tmp, (u1)DataLayout::call_type_data_tag);
-+      bne(t0, tmp, profile_continue);
++      mv(dst, src);
 +    }
++    return;
++  }
 +
-+    // calculate slot step
-+    static int stack_slot_offset0 = in_bytes(TypeEntriesAtCall::stack_slot_offset(0));
-+    static int slot_step = in_bytes(TypeEntriesAtCall::stack_slot_offset(1)) - stack_slot_offset0;
++  Register xbase = dst;
++  if (dst == src) {
++    xbase = tmp;
++  }
 +
-+    // calculate type step
-+    static int argument_type_offset0 = in_bytes(TypeEntriesAtCall::argument_type_offset(0));
-+    static int type_step = in_bytes(TypeEntriesAtCall::argument_type_offset(1)) - argument_type_offset0;
++  assert_different_registers(src, xbase);
++  li(xbase, (uintptr_t)Universe::narrow_klass_base());
 +
-+    if (MethodData::profile_arguments()) {
-+      Label done, loop, loopEnd, profileArgument, profileReturnType;
-+      RegSet pushed_registers;
-+      pushed_registers += x15;
-+      pushed_registers += x16;
-+      pushed_registers += x17;
-+      Register mdo_addr = x15;
-+      Register index = x16;
-+      Register off_to_args = x17;
-+      push_reg(pushed_registers, sp);
++  if (Universe::narrow_klass_shift() != 0) {
++    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++    assert_different_registers(t0, xbase);
++    shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes);
++  } else {
++    add(dst, xbase, src);
++  }
 +
-+      mv(off_to_args, in_bytes(TypeEntriesAtCall::args_data_offset()));
-+      mv(t0, TypeProfileArgsLimit);
-+      beqz(t0, loopEnd);
++  if (xbase == xheapbase) { reinit_heapbase(); }
++}
 +
-+      mv(index, zr); // index < TypeProfileArgsLimit
-+      bind(loop);
-+      bgtz(index, profileReturnType);
-+      li(t0, (int)MethodData::profile_return());
-+      beqz(t0, profileArgument); // (index > 0 || MethodData::profile_return()) == false
-+      bind(profileReturnType);
-+      // If return value type is profiled we may have no argument to profile
-+      ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())));
-+      mv(t1, - TypeStackSlotEntries::per_arg_count());
-+      mul(t1, index, t1);
-+      add(tmp, tmp, t1);
-+      li(t1, TypeStackSlotEntries::per_arg_count());
-+      add(t0, mdp, off_to_args);
-+      blt(tmp, t1, done);
-+
-+      bind(profileArgument);
-+
-+      ld(tmp, Address(callee, Method::const_offset()));
-+      load_unsigned_short(tmp, Address(tmp, ConstMethod::size_of_parameters_offset()));
-+      // stack offset o (zero based) from the start of the argument
-+      // list, for n arguments translates into offset n - o - 1 from
-+      // the end of the argument list
-+      li(t0, stack_slot_offset0);
-+      li(t1, slot_step);
-+      mul(t1, index, t1);
-+      add(t0, t0, t1);
-+      add(t0, mdp, t0);
-+      ld(t0, Address(t0));
-+      sub(tmp, tmp, t0);
-+      addi(tmp, tmp, -1);
-+      Address arg_addr = argument_address(tmp);
-+      ld(tmp, arg_addr);
-+
-+      li(t0, argument_type_offset0);
-+      li(t1, type_step);
-+      mul(t1, index, t1);
-+      add(t0, t0, t1);
-+      add(mdo_addr, mdp, t0);
-+      Address mdo_arg_addr(mdo_addr, 0);
-+      profile_obj_type(tmp, mdo_arg_addr, t1);
-+
-+      int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
-+      addi(off_to_args, off_to_args, to_add);
-+
-+      // increment index by 1
-+      addi(index, index, 1);
-+      li(t1, TypeProfileArgsLimit);
-+      blt(index, t1, loop);
-+      bind(loopEnd);
-+
-+      if (MethodData::profile_return()) {
-+        ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())));
-+        addi(tmp, tmp, -TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count());
-+      }
-+
-+      add(t0, mdp, off_to_args);
-+      bind(done);
-+      mv(mdp, t0);
++void MacroAssembler::encode_klass_not_null(Register r) {
++  encode_klass_not_null(r, r);
++}
 +
-+      // unspill the clobbered registers
-+      pop_reg(pushed_registers, sp);
++void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) {
++  assert(UseCompressedClassPointers, "should only be used for compressed headers");
 +
-+      if (MethodData::profile_return()) {
-+        // We're right after the type profile for the last
-+        // argument. tmp is the number of cells left in the
-+        // CallTypeData/VirtualCallTypeData to reach its end. Non null
-+        // if there's a return to profile.
-+        assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
-+        shadd(mdp, tmp, mdp, tmp, exact_log2(DataLayout::cell_size));
-+      }
-+      sd(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
++  if (Universe::narrow_klass_base() == NULL) {
++    if (Universe::narrow_klass_shift() != 0) {
++      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++      srli(dst, src, LogKlassAlignmentInBytes);
 +    } else {
-+      assert(MethodData::profile_return(), "either profile call args or call ret");
-+      update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size()));
++      mv(dst, src);
 +    }
++    return;
++  }
 +
-+    // mdp points right after the end of the
-+    // CallTypeData/VirtualCallTypeData, right after the cells for the
-+    // return value type if there's one
-+
-+    bind(profile_continue);
++  if (((uint64_t)(uintptr_t)Universe::narrow_klass_base() & 0xffffffff) == 0 &&
++      Universe::narrow_klass_shift() == 0) {
++    zero_extend(dst, src, 32);
++    return;
 +  }
-+}
 +
-+void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) {
-+  assert_different_registers(mdp, ret, tmp, xbcp, t0, t1);
-+  if (ProfileInterpreter && MethodData::profile_return()) {
-+    Label profile_continue, done;
++  Register xbase = dst;
++  if (dst == src) {
++    xbase = tmp;
++  }
 +
-+    test_method_data_pointer(mdp, profile_continue);
++  assert_different_registers(src, xbase);
++  li(xbase, (intptr_t)Universe::narrow_klass_base());
++  sub(dst, src, xbase);
++  if (Universe::narrow_klass_shift() != 0) {
++    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++    srli(dst, dst, LogKlassAlignmentInBytes);
++  }
++  if (xbase == xheapbase) {
++    reinit_heapbase();
++  }
++}
 +
-+    if (MethodData::profile_return_jsr292_only()) {
-+      assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
++void  MacroAssembler::decode_heap_oop_not_null(Register r) {
++  decode_heap_oop_not_null(r, r);
++}
 +
-+      // If we don't profile all invoke bytecodes we must make sure
-+      // it's a bytecode we indeed profile. We can't go back to the
-+      // begining of the ProfileData we intend to update to check its
-+      // type because we're right after it and we don't known its
-+      // length
-+      Label do_profile;
-+      lbu(t0, Address(xbcp, 0));
-+      li(tmp, (u1)Bytecodes::_invokedynamic);
-+      beq(t0, tmp, do_profile);
-+      li(tmp, (u1)Bytecodes::_invokehandle);
-+      beq(t0, tmp, do_profile);
-+      get_method(tmp);
-+      lhu(t0, Address(tmp, Method::intrinsic_id_offset_in_bytes()));
-+      li(t1, static_cast<int>(vmIntrinsics::_compiledLambdaForm));
-+      bne(t0, t1, profile_continue);
-+      bind(do_profile);
++void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
++  assert(UseCompressedOops, "should only be used for compressed headers");
++  assert(Universe::heap() != NULL, "java heap should be initialized");
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    slli(dst, src, LogMinObjAlignmentInBytes);
++    if (Universe::narrow_oop_base() != NULL) {
++      add(dst, xheapbase, dst);
 +    }
-+
-+    Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size()));
-+    mv(tmp, ret);
-+    profile_obj_type(tmp, mdo_ret_addr, t1);
-+
-+    bind(profile_continue);
++  } else {
++    assert(Universe::narrow_oop_base() == NULL, "sanity");
++    mv(dst, src);
 +  }
 +}
 +
-+void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3) {
-+  assert_different_registers(t0, t1, mdp, tmp1, tmp2, tmp3);
-+  if (ProfileInterpreter && MethodData::profile_parameters()) {
-+    Label profile_continue, done;
-+
-+    test_method_data_pointer(mdp, profile_continue);
-+
-+    // Load the offset of the area within the MDO used for
-+    // parameters. If it's negative we're not profiling any parameters
-+    lwu(tmp1, Address(mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset())));
-+    srli(tmp2, tmp1, 31);
-+    bnez(tmp2, profile_continue);  // i.e. sign bit set
-+
-+    // Compute a pointer to the area for parameters from the offset
-+    // and move the pointer to the slot for the last
-+    // parameters. Collect profiling from last parameter down.
-+    // mdo start + parameters offset + array length - 1
-+    add(mdp, mdp, tmp1);
-+    ld(tmp1, Address(mdp, ArrayData::array_len_offset()));
-+    add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count());
++void  MacroAssembler::decode_heap_oop(Register d, Register s) {
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0 || d != s) {
++      slli(d, s, Universe::narrow_oop_shift());
++    }
++  } else {
++    Label done;
++    mv(d, s);
++    beqz(s, done);
++    shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes);
++    bind(done);
++  }
++  verify_oop(d, "broken oop in decode_heap_oop");
++}
 +
-+    Label loop;
-+    bind(loop);
++void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
++                                    Register thread_tmp, DecoratorSet decorators) {
++  access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
++}
 +
-+    int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0));
-+    int type_base = in_bytes(ParametersTypeData::type_offset(0));
-+    int per_arg_scale = exact_log2(DataLayout::cell_size);
-+    add(t0, mdp, off_base);
-+    add(t1, mdp, type_base);
++void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
++                                   Register thread_tmp, DecoratorSet decorators) {
++  access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
++}
 +
-+    shadd(tmp2, tmp1, t0, tmp2, per_arg_scale);
-+    // load offset on the stack from the slot for this parameter
-+    ld(tmp2, Address(tmp2, 0));
-+    neg(tmp2, tmp2);
++void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
++                                            Register thread_tmp, DecoratorSet decorators) {
++  access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, thread_tmp);
++}
 +
-+    // read the parameter from the local area
-+    shadd(tmp2, tmp2, xlocals, tmp2, Interpreter::logStackElementSize);
-+    ld(tmp2, Address(tmp2, 0));
++// Used for storing NULLs.
++void MacroAssembler::store_heap_oop_null(Address dst) {
++  access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg);
++}
 +
-+    // profile the parameter
-+    shadd(t1, tmp1, t1, t0, per_arg_scale);
-+    Address arg_type(t1, 0);
-+    profile_obj_type(tmp2, arg_type, tmp3);
++int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2,
++                                    bool want_remainder)
++{
++  // Full implementation of Java idiv and irem.  The function
++  // returns the (pc) offset of the div instruction - may be needed
++  // for implicit exceptions.
++  //
++  // input : rs1: dividend
++  //         rs2: divisor
++  //
++  // result: either
++  //         quotient  (= rs1 idiv rs2)
++  //         remainder (= rs1 irem rs2)
 +
-+    // go to next parameter
-+    add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count());
-+    bgez(tmp1, loop);
 +
-+    bind(profile_continue);
++  int idivl_offset = offset();
++  if (!want_remainder) {
++    divw(result, rs1, rs2);
++  } else {
++    remw(result, rs1, rs2); // result = rs1 % rs2;
 +  }
++  return idivl_offset;
 +}
 +
-+void InterpreterMacroAssembler::get_method_counters(Register method,
-+                                                    Register mcs, Label& skip) {
-+  Label has_counters;
-+  ld(mcs, Address(method, Method::method_counters_offset()));
-+  bnez(mcs, has_counters);
-+  call_VM(noreg, CAST_FROM_FN_PTR(address,
-+          InterpreterRuntime::build_method_counters), method);
-+  ld(mcs, Address(method, Method::method_counters_offset()));
-+  beqz(mcs, skip); // No MethodCounters allocated, OutOfMemory
-+  bind(has_counters);
-+}
++int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2,
++                                    bool want_remainder)
++{
++  // Full implementation of Java ldiv and lrem.  The function
++  // returns the (pc) offset of the div instruction - may be needed
++  // for implicit exceptions.
++  //
++  // input : rs1: dividend
++  //         rs2: divisor
++  //
++  // result: either
++  //         quotient  (= rs1 idiv rs2)
++  //         remainder (= rs1 irem rs2)
 +
-+#ifdef ASSERT
-+void InterpreterMacroAssembler::verify_access_flags(Register access_flags, uint32_t flag_bits,
-+                                                    const char* msg, bool stop_by_hit) {
-+  Label L;
-+  andi(t0, access_flags, flag_bits);
-+  if (stop_by_hit) {
-+    beqz(t0, L);
++  int idivq_offset = offset();
++  if (!want_remainder) {
++    div(result, rs1, rs2);
 +  } else {
-+    bnez(t0, L);
++    rem(result, rs1, rs2); // result = rs1 % rs2;
 +  }
-+  stop(msg);
-+  bind(L);
++  return idivq_offset;
 +}
 +
-+void InterpreterMacroAssembler::verify_frame_setup() {
-+  Label L;
-+  const Address monitor_block_top(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
-+  ld(t0, monitor_block_top);
-+  beq(esp, t0, L);
-+  stop("broken stack frame setup in interpreter");
-+  bind(L);
-+}
-+#endif
-diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
-new file mode 100644
-index 00000000000..4d8cb086f82
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
-@@ -0,0 +1,285 @@
-+/*
-+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++// Look up the method for a megamorpic invkkeinterface call.
++// The target method is determined by <intf_klass, itable_index>.
++// The receiver klass is in recv_klass.
++// On success, the result will be in method_result, and execution falls through.
++// On failure, execution transfers to the given label.
++void MacroAssembler::lookup_interface_method(Register recv_klass,
++                                             Register intf_klass,
++                                             RegisterOrConstant itable_index,
++                                             Register method_result,
++                                             Register scan_tmp,
++                                             Label& L_no_such_interface,
++                                             bool return_method) {
++  assert_different_registers(recv_klass, intf_klass, scan_tmp);
++  assert_different_registers(method_result, intf_klass, scan_tmp);
++  assert(recv_klass != method_result || !return_method,
++         "recv_klass can be destroyed when mehtid isn't needed");
++  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
++         "caller must be same register for non-constant itable index as for method");
 +
-+#ifndef CPU_RISCV_INTERP_MASM_RISCV_HPP
-+#define CPU_RISCV_INTERP_MASM_RISCV_HPP
++  // Compute start of first itableOffsetEntry (which is at the end of the vtable).
++  int vtable_base = in_bytes(Klass::vtable_start_offset());
++  int itentry_off = itableMethodEntry::method_offset_in_bytes();
++  int scan_step   = itableOffsetEntry::size() * wordSize;
++  int vte_size    = vtableEntry::size_in_bytes();
++  assert(vte_size == wordSize, "else adjust times_vte_scale");
 +
-+#include "asm/macroAssembler.hpp"
-+#include "interpreter/invocationCounter.hpp"
-+#include "runtime/frame.hpp"
++  lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset()));
 +
-+// This file specializes the assember with interpreter-specific macros
++  // %%% Could store the aligned, prescaled offset in the klassoop.
++  shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3);
++  add(scan_tmp, scan_tmp, vtable_base);
 +
-+typedef ByteSize (*OffsetFunction)(uint);
++  if (return_method) {
++    // Adjust recv_klass by scaled itable_index, so we can free itable_index.
++    assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
++    if (itable_index.is_register()) {
++      slli(t0, itable_index.as_register(), 3);
++    } else {
++      li(t0, itable_index.as_constant() << 3);
++    }
++    add(recv_klass, recv_klass, t0);
++    if (itentry_off) {
++      add(recv_klass, recv_klass, itentry_off);
++    }
++  }
 +
-+class InterpreterMacroAssembler: public MacroAssembler {
-+ protected:
-+  // Interpreter specific version of call_VM_base
-+  using MacroAssembler::call_VM_leaf_base;
++  Label search, found_method;
 +
-+  virtual void call_VM_leaf_base(address entry_point,
-+                                 int number_of_arguments);
++  ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes()));
++  beq(intf_klass, method_result, found_method);
++  bind(search);
++  // Check that the previous entry is non-null. A null entry means that
++  // the receiver class doens't implement the interface, and wasn't the
++  // same as when the caller was compiled.
++  beqz(method_result, L_no_such_interface, /* is_far */ true);
++  addi(scan_tmp, scan_tmp, scan_step);
++  ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes()));
++  bne(intf_klass, method_result, search);
 +
-+  virtual void call_VM_base(Register oop_result,
-+                            Register java_thread,
-+                            Register last_java_sp,
-+                            address  entry_point,
-+                            int number_of_arguments,
-+                            bool check_exceptions);
++  bind(found_method);
 +
-+  // base routine for all dispatches
-+  void dispatch_base(TosState state, address* table, bool verifyoop = true,
-+                     bool generate_poll = false, Register Rs = t0);
++  // Got a hit.
++  if (return_method) {
++    lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset_in_bytes()));
++    add(method_result, recv_klass, scan_tmp);
++    ld(method_result, Address(method_result));
++  }
++}
 +
-+ public:
-+  InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {}
-+  virtual ~InterpreterMacroAssembler() {}
++// virtual method calling
++void MacroAssembler::lookup_virtual_method(Register recv_klass,
++                                           RegisterOrConstant vtable_index,
++                                           Register method_result) {
++  const int base = in_bytes(Klass::vtable_start_offset());
++  assert(vtableEntry::size() * wordSize == 8,
++         "adjust the scaling in the code below");
++  int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes();
 +
-+  void load_earlyret_value(TosState state);
++  if (vtable_index.is_register()) {
++    shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord);
++    ld(method_result, Address(method_result, vtable_offset_in_bytes));
++  } else {
++    vtable_offset_in_bytes += vtable_index.as_constant() * wordSize;
++    ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes));
++  }
++}
 +
-+  void jump_to_entry(address entry);
++void MacroAssembler::membar(uint32_t order_constraint) {
++  address prev = pc() - NativeMembar::instruction_size;
++  address last = code()->last_insn();
 +
-+  virtual void check_and_handle_popframe(Register java_thread);
-+  virtual void check_and_handle_earlyret(Register java_thread);
++  if (last != NULL && nativeInstruction_at(last)->is_membar() && prev == last) {
++    NativeMembar *bar = NativeMembar_at(prev);
++    // We are merging two memory barrier instructions.  On RISCV we
++    // can do this simply by ORing them together.
++    bar->set_kind(bar->get_kind() | order_constraint);
++    BLOCK_COMMENT("merged membar");
++  } else {
++    code()->set_last_insn(pc());
 +
-+  // Interpreter-specific registers
-+  void save_bcp() {
-+    sd(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize));
-+  }
++    uint32_t predecessor = 0;
++    uint32_t successor = 0;
 +
-+  void restore_bcp() {
-+    ld(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize));
++    membar_mask_to_pred_succ(order_constraint, predecessor, successor);
++    fence(predecessor, successor);
 +  }
++}
 +
-+  void restore_locals() {
-+    ld(xlocals, Address(fp, frame::interpreter_frame_locals_offset * wordSize));
++// Form an addres from base + offset in Rd. Rd my or may not
++// actually be used: you must use the Address that is returned. It
++// is up to you to ensure that the shift provided mathces the size
++// of your data.
++Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset) {
++  if (is_offset_in_range(byte_offset, 12)) { // 12: imm in range 2^12
++    return Address(base, byte_offset);
 +  }
 +
-+  void restore_constant_pool_cache() {
-+    ld(xcpool, Address(fp, frame::interpreter_frame_cache_offset * wordSize));
-+  }
++  // Do it the hard way
++  mv(Rd, byte_offset);
++  add(Rd, base, Rd);
++  return Address(Rd);
++}
 +
-+  void get_dispatch();
++void MacroAssembler::check_klass_subtype(Register sub_klass,
++                                         Register super_klass,
++                                         Register tmp_reg,
++                                         Label& L_success) {
++  Label L_failure;
++  check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, NULL);
++  check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, NULL);
++  bind(L_failure);
++}
 +
-+  // Helpers for runtime call arguments/results
-+  void get_method(Register reg) {
-+    ld(reg, Address(fp, frame::interpreter_frame_method_offset * wordSize));
++void MacroAssembler::safepoint_poll(Label& slow_path) {
++  if (SafepointMechanism::uses_thread_local_poll()) {
++    ld(t1, Address(xthread, Thread::polling_page_offset()));
++    andi(t0, t1, SafepointMechanism::poll_bit());
++    bnez(t0, slow_path);
++  } else {
++    int32_t offset = 0;
++    la_patchable(t0, ExternalAddress(SafepointSynchronize::address_of_state()), offset);
++    lwu(t0, Address(t0, offset));
++    assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code");
++    bnez(t0, slow_path);
 +  }
++}
 +
-+  void get_const(Register reg) {
-+    get_method(reg);
-+    ld(reg, Address(reg, in_bytes(Method::const_offset())));
-+  }
++void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
++                                Label &succeed, Label *fail) {
++  // oldv holds comparison value
++  // newv holds value to write in exchange
++  // addr identifies memory word to compare against/update
++  Label retry_load, nope;
++  bind(retry_load);
++  // Load reserved from the memory location
++  lr_d(tmp, addr, Assembler::aqrl);
++  // Fail and exit if it is not what we expect
++  bne(tmp, oldv, nope);
++  // If the store conditional succeeds, tmp will be zero
++  sc_d(tmp, newv, addr, Assembler::rl);
++  beqz(tmp, succeed);
++  // Retry only when the store conditional failed
++  j(retry_load);
 +
-+  void get_constant_pool(Register reg) {
-+    get_const(reg);
-+    ld(reg, Address(reg, in_bytes(ConstMethod::constants_offset())));
++  bind(nope);
++  membar(AnyAny);
++  mv(oldv, tmp);
++  if (fail != NULL) {
++    j(*fail);
 +  }
++}
 +
-+  void get_constant_pool_cache(Register reg) {
-+    get_constant_pool(reg);
-+    ld(reg, Address(reg, ConstantPool::cache_offset_in_bytes()));
-+  }
++void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp,
++                                        Label &succeed, Label *fail) {
++  assert(oopDesc::mark_offset_in_bytes() == 0, "assumption");
++  cmpxchgptr(oldv, newv, obj, tmp, succeed, fail);
++}
 +
-+  void get_cpool_and_tags(Register cpool, Register tags) {
-+    get_constant_pool(cpool);
-+    ld(tags, Address(cpool, ConstantPool::tags_offset_in_bytes()));
++void MacroAssembler::load_reserved(Register addr,
++                                   enum operand_size size,
++                                   Assembler::Aqrl acquire) {
++  switch (size) {
++    case int64:
++      lr_d(t0, addr, acquire);
++      break;
++    case int32:
++      lr_w(t0, addr, acquire);
++      break;
++    case uint32:
++      lr_w(t0, addr, acquire);
++      zero_extend(t0, t0, 32);
++      break;
++    default:
++      ShouldNotReachHere();
 +  }
++}
 +
-+  void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset);
-+  void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2));
-+  void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2));
-+  void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2));
-+  void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2));
-+  void get_method_counters(Register method, Register mcs, Label& skip);
++void MacroAssembler::store_conditional(Register addr,
++                                       Register new_val,
++                                       enum operand_size size,
++                                       Assembler::Aqrl release) {
++  switch (size) {
++    case int64:
++      sc_d(t0, new_val, addr, release);
++      break;
++    case int32:
++    case uint32:
++      sc_w(t0, new_val, addr, release);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
 +
-+  // Load cpool->resolved_references(index).
-+  void load_resolved_reference_at_index(Register result, Register index, Register tmp = x15);
 +
-+  // Load cpool->resolved_klass_at(index).
-+  void load_resolved_klass_at_offset(Register cpool, Register index, Register klass, Register temp);
++void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected,
++                                                 Register new_val,
++                                                 enum operand_size size,
++                                                 Register tmp1, Register tmp2, Register tmp3) {
++  assert(size == int8 || size == int16, "unsupported operand size");
 +
-+  void load_resolved_method_at_index(int byte_no, Register method, Register cache);
++  Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3;
 +
-+  void pop_ptr(Register r = x10);
-+  void pop_i(Register r = x10);
-+  void pop_l(Register r = x10);
-+  void pop_f(FloatRegister r = f10);
-+  void pop_d(FloatRegister r = f10);
-+  void push_ptr(Register r = x10);
-+  void push_i(Register r = x10);
-+  void push_l(Register r = x10);
-+  void push_f(FloatRegister r = f10);
-+  void push_d(FloatRegister r = f10);
++  andi(shift, addr, 3);
++  slli(shift, shift, 3);
 +
-+  void pop(TosState state); // transition vtos -> state
-+  void push(TosState state); // transition state -> vtos
++  andi(aligned_addr, addr, ~3);
 +
-+  void empty_expression_stack() {
-+    ld(esp, Address(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize));
-+    // NULL last_sp until next java call
-+    sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++  if (size == int8) {
++    addi(mask, zr, 0xff);
++  } else {
++    // size == int16 case
++    addi(mask, zr, -1);
++    zero_extend(mask, mask, 16);
 +  }
++  sll(mask, mask, shift);
 +
-+  // Helpers for swap and dup
-+  void load_ptr(int n, Register val);
-+  void store_ptr(int n, Register val);
-+
-+  // Load float value from 'address'. The value is loaded onto the FPU register v0.
-+  void load_float(Address src);
-+  void load_double(Address src);
-+
-+  // Generate a subtype check: branch to ok_is_subtype if sub_klass is
-+  // a subtype of super_klass.
-+  void gen_subtype_check( Register sub_klass, Label &ok_is_subtype );
-+
-+  // Dispatching
-+  void dispatch_prolog(TosState state, int step = 0);
-+  void dispatch_epilog(TosState state, int step = 0);
-+  // dispatch via t0
-+  void dispatch_only(TosState state, bool generate_poll = false, Register Rs = t0);
-+  // dispatch normal table via t0 (assume t0 is loaded already)
-+  void dispatch_only_normal(TosState state, Register Rs = t0);
-+  void dispatch_only_noverify(TosState state, Register Rs = t0);
-+  // load t0 from [xbcp + step] and dispatch via t0
-+  void dispatch_next(TosState state, int step = 0, bool generate_poll = false);
-+  // load t0 from [xbcp] and dispatch via t0 and table
-+  void dispatch_via (TosState state, address* table);
-+
-+  // jump to an invoked target
-+  void prepare_to_jump_from_interpreted();
-+  void jump_from_interpreted(Register method);
-+
++  xori(not_mask, mask, -1);
 +
-+  // Returning from interpreted functions
-+  //
-+  // Removes the current activation (incl. unlocking of monitors)
-+  // and sets up the return address.  This code is also used for
-+  // exception unwindwing. In that case, we do not want to throw
-+  // IllegalMonitorStateExceptions, since that might get us into an
-+  // infinite rethrow exception loop.
-+  // Additionally this code is used for popFrame and earlyReturn.
-+  // In popFrame case we want to skip throwing an exception,
-+  // installing an exception, and notifying jvmdi.
-+  // In earlyReturn case we only want to skip throwing an exception
-+  // and installing an exception.
-+  void remove_activation(TosState state,
-+                         bool throw_monitor_exception = true,
-+                         bool install_monitor_exception = true,
-+                         bool notify_jvmdi = true);
++  sll(expected, expected, shift);
++  andr(expected, expected, mask);
 +
-+  // FIXME: Give us a valid frame at a null check.
-+  virtual void null_check(Register reg, int offset = -1) {
-+        MacroAssembler::null_check(reg, offset);
-+  }
++  sll(new_val, new_val, shift);
++  andr(new_val, new_val, mask);
++}
 +
-+  // Object locking
-+  void lock_object  (Register lock_reg);
-+  void unlock_object(Register lock_reg);
++// cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps.
++// It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w,
++// which are forced to work with 4-byte aligned address.
++void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected,
++                                          Register new_val,
++                                          enum operand_size size,
++                                          Assembler::Aqrl acquire, Assembler::Aqrl release,
++                                          Register result, bool result_as_bool,
++                                          Register tmp1, Register tmp2, Register tmp3) {
++  Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
++  assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
++  cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
 +
-+  // Interpreter profiling operations
-+  void set_method_data_pointer_for_bcp();
-+  void test_method_data_pointer(Register mdp, Label& zero_continue);
-+  void verify_method_data_pointer();
++  Label retry, fail, done;
 +
-+  void set_mdp_data_at(Register mdp_in, int constant, Register value);
-+  void increment_mdp_data_at(Address data, bool decrement = false);
-+  void increment_mdp_data_at(Register mdp_in, int constant,
-+                             bool decrement = false);
-+  void increment_mdp_data_at(Register mdp_in, Register reg, int constant,
-+                             bool decrement = false);
-+  void increment_mask_and_jump(Address counter_addr,
-+                               int increment, Address mask,
-+                               Register tmp1, Register tmp2,
-+                               bool preloaded, Label* where);
++  bind(retry);
++  lr_w(old, aligned_addr, acquire);
++  andr(tmp, old, mask);
++  bne(tmp, expected, fail);
 +
-+  void set_mdp_flag_at(Register mdp_in, int flag_constant);
-+  void test_mdp_data_at(Register mdp_in, int offset, Register value,
-+                        Register test_value_out,
-+                        Label& not_equal_continue);
++  andr(tmp, old, not_mask);
++  orr(tmp, tmp, new_val);
++  sc_w(tmp, tmp, aligned_addr, release);
++  bnez(tmp, retry);
 +
-+  void record_klass_in_profile(Register receiver, Register mdp,
-+                               Register reg2, bool is_virtual_call);
-+  void record_klass_in_profile_helper(Register receiver, Register mdp,
-+                                      Register reg2,
-+                                      Label& done, bool is_virtual_call);
-+  void record_item_in_profile_helper(Register item, Register mdp,
-+                                     Register reg2, int start_row, Label& done, int total_rows,
-+                                     OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
-+                                     int non_profiled_offset);
++  if (result_as_bool) {
++    addi(result, zr, 1);
++    j(done);
 +
-+  void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
-+  void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
-+  void update_mdp_by_constant(Register mdp_in, int constant);
-+  void update_mdp_for_ret(Register return_bci);
++    bind(fail);
++    mv(result, zr);
 +
-+  // narrow int return value
-+  void narrow(Register result);
++    bind(done);
++  } else {
++    andr(tmp, old, mask);
 +
-+  void profile_taken_branch(Register mdp, Register bumped_count);
-+  void profile_not_taken_branch(Register mdp);
-+  void profile_call(Register mdp);
-+  void profile_final_call(Register mdp);
-+  void profile_virtual_call(Register receiver, Register mdp,
-+                            Register t1,
-+                            bool receiver_can_be_null = false);
-+  void profile_ret(Register return_bci, Register mdp);
-+  void profile_null_seen(Register mdp);
-+  void profile_typecheck(Register mdp, Register klass, Register temp);
-+  void profile_typecheck_failed(Register mdp);
-+  void profile_switch_default(Register mdp);
-+  void profile_switch_case(Register index_in_scratch, Register mdp,
-+                           Register temp);
++    bind(fail);
++    srl(result, tmp, shift);
 +
-+  void profile_obj_type(Register obj, const Address& mdo_addr, Register tmp);
-+  void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual);
-+  void profile_return_type(Register mdp, Register ret, Register tmp);
-+  void profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3);
++    if (size == int8) {
++      sign_extend(result, result, 8);
++    } else {
++      // size == int16 case
++      sign_extend(result, result, 16);
++    }
++  }
++}
 +
-+  // Debugging
-+  // only if +VerifyFPU  && (state == ftos || state == dtos)
-+  void verify_FPU(int stack_depth, TosState state = ftos);
++// weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement
++// the weak CAS stuff. The major difference is that it just failed when store conditional
++// failed.
++void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected,
++                                               Register new_val,
++                                               enum operand_size size,
++                                               Assembler::Aqrl acquire, Assembler::Aqrl release,
++                                               Register result,
++                                               Register tmp1, Register tmp2, Register tmp3) {
++  Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
++  assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
++  cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
 +
-+  typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode;
++  Label succ, fail, done;
 +
-+  // support for jvmti/dtrace
-+  void notify_method_entry();
-+  void notify_method_exit(TosState state, NotifyMethodExitMode mode);
++  lr_w(old, aligned_addr, acquire);
++  andr(tmp, old, mask);
++  bne(tmp, expected, fail);
 +
-+  virtual void _call_Unimplemented(address call_site) {
-+    save_bcp();
-+    set_last_Java_frame(esp, fp, (address) pc(), t0);
-+    MacroAssembler::_call_Unimplemented(call_site);
-+  }
++  andr(tmp, old, not_mask);
++  orr(tmp, tmp, new_val);
++  sc_w(tmp, tmp, aligned_addr, release);
++  beqz(tmp, succ);
 +
-+#ifdef ASSERT
-+  void verify_access_flags(Register access_flags, uint32_t flag_bits,
-+                           const char* msg, bool stop_by_hit = true);
-+  void verify_frame_setup();
-+#endif
-+};
++  bind(fail);
++  addi(result, zr, 1);
++  j(done);
 +
-+#endif // CPU_RISCV_INTERP_MASM_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
-new file mode 100644
-index 00000000000..d93530d8564
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
-@@ -0,0 +1,295 @@
-+/*
-+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  bind(succ);
++  mv(result, zr);
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "interpreter/interp_masm.hpp"
-+#include "interpreter/interpreter.hpp"
-+#include "interpreter/interpreterRuntime.hpp"
-+#include "memory/allocation.inline.hpp"
-+#include "memory/universe.hpp"
-+#include "oops/method.hpp"
-+#include "oops/oop.inline.hpp"
-+#include "runtime/handles.inline.hpp"
-+#include "runtime/icache.hpp"
-+#include "runtime/interfaceSupport.inline.hpp"
-+#include "runtime/signature.hpp"
++  bind(done);
++}
 +
-+#define __ _masm->
++void MacroAssembler::cmpxchg(Register addr, Register expected,
++                             Register new_val,
++                             enum operand_size size,
++                             Assembler::Aqrl acquire, Assembler::Aqrl release,
++                             Register result, bool result_as_bool) {
++  assert(size != int8 && size != int16, "unsupported operand size");
 +
-+// Implementation of SignatureHandlerGenerator
-+Register InterpreterRuntime::SignatureHandlerGenerator::from() { return xlocals; }
-+Register InterpreterRuntime::SignatureHandlerGenerator::to()   { return sp; }
-+Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return t0; }
++  Label retry_load, done, ne_done;
++  bind(retry_load);
++  load_reserved(addr, size, acquire);
++  bne(t0, expected, ne_done);
++  store_conditional(addr, new_val, size, release);
++  bnez(t0, retry_load);
 +
-+Register InterpreterRuntime::SignatureHandlerGenerator::next_gpr() {
-+  if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) {
-+    return g_INTArgReg[++_num_reg_int_args];
++  // equal, succeed
++  if (result_as_bool) {
++    li(result, 1);
++  } else {
++    mv(result, expected);
 +  }
-+  return noreg;
-+}
++  j(done);
 +
-+FloatRegister InterpreterRuntime::SignatureHandlerGenerator::next_fpr() {
-+  if (_num_reg_fp_args < Argument::n_float_register_parameters_c) {
-+    return g_FPArgReg[_num_reg_fp_args++];
++  // not equal, failed
++  bind(ne_done);
++  if (result_as_bool) {
++    mv(result, zr);
 +  } else {
-+    return fnoreg;
++    mv(result, t0);
 +  }
-+}
 +
-+int InterpreterRuntime::SignatureHandlerGenerator::next_stack_offset() {
-+  int ret = _stack_offset;
-+  _stack_offset += wordSize;
-+  return ret;
++  bind(done);
 +}
 +
-+InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator(
-+  const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
-+  _masm = new MacroAssembler(buffer); // allocate on resourse area by default
-+  _num_reg_int_args = (method->is_static() ? 1 : 0);
-+  _num_reg_fp_args = 0;
-+  _stack_offset = 0;
-+}
++void MacroAssembler::cmpxchg_weak(Register addr, Register expected,
++                                  Register new_val,
++                                  enum operand_size size,
++                                  Assembler::Aqrl acquire, Assembler::Aqrl release,
++                                  Register result) {
++  Label fail, done, sc_done;
++  load_reserved(addr, size, acquire);
++  bne(t0, expected, fail);
++  store_conditional(addr, new_val, size, release);
++  beqz(t0, sc_done);
 +
-+void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
-+  const Address src(from(), Interpreter::local_offset_in_bytes(offset()));
++  // fail
++  bind(fail);
++  li(result, 1);
++  j(done);
 +
-+  Register reg = next_gpr();
-+  if (reg != noreg) {
-+    __ lw(reg, src);
-+  } else {
-+    __ lw(x10, src);
-+    __ sw(x10, Address(to(), next_stack_offset()));
-+  }
++  // sc_done
++  bind(sc_done);
++  mv(result, 0);
++  bind(done);
 +}
 +
-+void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
-+  const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1));
++#define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE)                                              \
++void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \
++  prev = prev->is_valid() ? prev : zr;                                                      \
++  if (incr.is_register()) {                                                                 \
++    AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE));              \
++  } else {                                                                                  \
++    mv(t0, incr.as_constant());                                                             \
++    AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE));                              \
++  }                                                                                         \
++  return;                                                                                   \
++}
 +
-+  Register reg = next_gpr();
-+  if (reg != noreg) {
-+    __ ld(reg, src);
-+  } else  {
-+    __ ld(x10, src);
-+    __ sd(x10, Address(to(), next_stack_offset()));
-+  }
++ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed)
++ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed)
++ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl)
++ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl)
++
++#undef ATOMIC_OP
++
++#define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE)                                       \
++void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) {      \
++  prev = prev->is_valid() ? prev : zr;                                               \
++  AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE));                       \
++  return;                                                                            \
 +}
 +
-+void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
-+  const Address src(from(), Interpreter::local_offset_in_bytes(offset()));
++ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed)
++ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed)
++ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl)
++ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl)
 +
-+  FloatRegister reg = next_fpr();
-+  if (reg != fnoreg) {
-+    __ flw(reg, src);
-+  } else {
-+    // a floating-point argument is passed according to the integer calling
-+    // convention if no floating-point argument register available
-+    pass_int();
-+  }
++#undef ATOMIC_XCHG
++
++#define ATOMIC_XCHGU(OP1, OP2)                                                       \
++void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) {     \
++  atomic_##OP2(prev, newv, addr);                                                    \
++  zero_extend(prev, prev, 32);                                                       \
++  return;                                                                            \
 +}
 +
-+void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
-+  const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1));
++ATOMIC_XCHGU(xchgwu, xchgw)
++ATOMIC_XCHGU(xchgalwu, xchgalw)
 +
-+  FloatRegister reg = next_fpr();
-+  if (reg != fnoreg) {
-+    __ fld(reg, src);
++#undef ATOMIC_XCHGU
++
++void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
++  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
++  assert(CodeCache::find_blob(entry.target()) != NULL,
++         "destination of far call not found in code cache");
++  int32_t offset = 0;
++  if (far_branches()) {
++    // We can use auipc + jalr here because we know that the total size of
++    // the code cache cannot exceed 2Gb.
++    la_patchable(tmp, entry, offset);
++    if (cbuf != NULL) { cbuf->set_insts_mark(); }
++    jalr(x0, tmp, offset);
 +  } else {
-+    // a floating-point argument is passed according to the integer calling
-+    // convention if no floating-point argument register available
-+    pass_long();
++    if (cbuf != NULL) { cbuf->set_insts_mark(); }
++    j(entry);
 +  }
 +}
 +
-+void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
-+  Register reg = next_gpr();
-+  if (reg == c_rarg1) {
-+    assert(offset() == 0, "argument register 1 can only be (non-null) receiver");
-+    __ addi(c_rarg1, from(), Interpreter::local_offset_in_bytes(offset()));
-+  } else if (reg != noreg) {
-+      // c_rarg2-c_rarg7
-+      __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset()));
-+      __ mv(reg, zr); //_num_reg_int_args:c_rarg -> 1:c_rarg2,  2:c_rarg3...
-+      __ ld(temp(), x10);
-+      Label L;
-+      __ beqz(temp(), L);
-+      __ mv(reg, x10);
-+      __ bind(L);
++void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
++  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
++  assert(CodeCache::find_blob(entry.target()) != NULL,
++         "destination of far call not found in code cache");
++  int32_t offset = 0;
++  if (far_branches()) {
++    // We can use auipc + jalr here because we know that the total size of
++    // the code cache cannot exceed 2Gb.
++    la_patchable(tmp, entry, offset);
++    if (cbuf != NULL) { cbuf->set_insts_mark(); }
++    jalr(x1, tmp, offset); // link
 +  } else {
-+    //to stack
-+    __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset()));
-+    __ ld(temp(), x10);
-+    Label L;
-+    __ bnez(temp(), L);
-+    __ mv(x10, zr);
-+    __ bind(L);
-+    assert(sizeof(jobject) == wordSize, "");
-+    __ sd(x10, Address(to(), next_stack_offset()));
++    if (cbuf != NULL) { cbuf->set_insts_mark(); }
++    jal(entry); // link
 +  }
 +}
 +
-+void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
-+  // generate code to handle arguments
-+  iterate(fingerprint);
-+
-+  // return result handler
-+  __ la(x10, ExternalAddress(Interpreter::result_handler(method()->result_type())));
-+  __ ret();
++void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
++                                                   Register super_klass,
++                                                   Register tmp_reg,
++                                                   Label* L_success,
++                                                   Label* L_failure,
++                                                   Label* L_slow_path,
++                                                   Register super_check_offset) {
++  assert_different_registers(sub_klass, super_klass, tmp_reg);
++  bool must_load_sco = (super_check_offset == noreg);
++  if (must_load_sco) {
++    assert(tmp_reg != noreg, "supply either a temp or a register offset");
++  } else {
++    assert_different_registers(sub_klass, super_klass, super_check_offset);
++  }
 +
-+  __ flush();
-+}
++  Label L_fallthrough;
++  int label_nulls = 0;
++  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
++  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
++  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
++  assert(label_nulls <= 1, "at most one NULL in batch");
 +
++  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
++  int sco_offset = in_bytes(Klass::super_check_offset_offset());
++  Address super_check_offset_addr(super_klass, sco_offset);
 +
-+// Implementation of SignatureHandlerLibrary
++  // Hacked jmp, which may only be used just before L_fallthrough.
++#define final_jmp(label)                                                \
++  if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
++  else                            j(label)             /*omit semi*/
 +
-+void SignatureHandlerLibrary::pd_set_handler(address handler) {}
++  // If the pointers are equal, we are done (e.g., String[] elements).
++  // This self-check enables sharing of secondary supertype arrays among
++  // non-primary types such as array-of-interface. Otherwise, each such
++  // type would need its own customized SSA.
++  // We move this check to the front fo the fast path because many
++  // type checks are in fact trivially successful in this manner,
++  // so we get a nicely predicted branch right at the start of the check.
++  beq(sub_klass, super_klass, *L_success);
 +
++  // Check the supertype display:
++  if (must_load_sco) {
++    lwu(tmp_reg, super_check_offset_addr);
++    super_check_offset = tmp_reg;
++  }
++  add(t0, sub_klass, super_check_offset);
++  Address super_check_addr(t0);
++  ld(t0, super_check_addr); // load displayed supertype
 +
-+class SlowSignatureHandler
-+  : public NativeSignatureIterator {
-+ private:
-+  address   _from;
-+  intptr_t* _to;
-+  intptr_t* _int_args;
-+  intptr_t* _fp_args;
-+  intptr_t* _fp_identifiers;
-+  unsigned int _num_reg_int_args;
-+  unsigned int _num_reg_fp_args;
++  // Ths check has worked decisively for primary supers.
++  // Secondary supers are sought in the super_cache ('super_cache_addr').
++  // (Secondary supers are interfaces and very deeply nested subtypes.)
++  // This works in the same check above because of a tricky aliasing
++  // between the super_Cache and the primary super dispaly elements.
++  // (The 'super_check_addr' can address either, as the case requires.)
++  // Note that the cache is updated below if it does not help us find
++  // what we need immediately.
++  // So if it was a primary super, we can just fail immediately.
++  // Otherwise, it's the slow path for us (no success at this point).
 +
-+  intptr_t* single_slot_addr() {
-+    intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0));
-+    _from -= Interpreter::stackElementSize;
-+    return from_addr;
++  beq(super_klass, t0, *L_success);
++  mv(t1, sc_offset);
++  if (L_failure == &L_fallthrough) {
++    beq(super_check_offset, t1, *L_slow_path);
++  } else {
++    bne(super_check_offset, t1, *L_failure, /* is_far */ true);
++    final_jmp(*L_slow_path);
 +  }
 +
-+  intptr_t* double_slot_addr() {
-+    intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(1));
-+    _from -= 2 * Interpreter::stackElementSize;
-+    return from_addr;
-+  }
++  bind(L_fallthrough);
 +
-+  int pass_gpr(intptr_t value) {
-+    if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) {
-+      *_int_args++ = value;
-+      return _num_reg_int_args++;
-+    }
-+    return -1;
-+  }
++#undef final_jmp
++}
 +
-+  int pass_fpr(intptr_t value) {
-+    if (_num_reg_fp_args < Argument::n_float_register_parameters_c) {
-+      *_fp_args++ = value;
-+      return _num_reg_fp_args++;
-+    }
-+    return -1;
-+  }
++// Scans count pointer sized words at [addr] for occurence of value,
++// generic
++void MacroAssembler::repne_scan(Register addr, Register value, Register count,
++                                Register tmp) {
++  Label Lloop, Lexit;
++  beqz(count, Lexit);
++  bind(Lloop);
++  ld(tmp, addr);
++  beq(value, tmp, Lexit);
++  add(addr, addr, wordSize);
++  sub(count, count, 1);
++  bnez(count, Lloop);
++  bind(Lexit);
++}
 +
-+  void pass_stack(intptr_t value) {
-+    *_to++ = value;
++void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
++                                                   Register super_klass,
++                                                   Register tmp1_reg,
++                                                   Register tmp2_reg,
++                                                   Label* L_success,
++                                                   Label* L_failure) {
++  assert_different_registers(sub_klass, super_klass, tmp1_reg);
++  if (tmp2_reg != noreg) {
++    assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0);
 +  }
++#define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg)
 +
-+  virtual void pass_int() {
-+    jint value = *(jint*)single_slot_addr();
-+    if (pass_gpr(value) < 0) {
-+      pass_stack(value);
-+    }
-+  }
++  Label L_fallthrough;
++  int label_nulls = 0;
++  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
++  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
 +
-+  virtual void pass_long() {
-+    intptr_t value = *double_slot_addr();
-+    if (pass_gpr(value) < 0) {
-+      pass_stack(value);
-+    }
-+  }
++  assert(label_nulls <= 1, "at most one NULL in the batch");
 +
-+  virtual void pass_object() {
-+    intptr_t* addr = single_slot_addr();
-+    intptr_t value = *addr == 0 ? NULL : (intptr_t)addr;
-+    if (pass_gpr(value) < 0) {
-+      pass_stack(value);
-+    }
-+  }
++  // A couple of usefule fields in sub_klass:
++  int ss_offset = in_bytes(Klass::secondary_supers_offset());
++  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
++  Address secondary_supers_addr(sub_klass, ss_offset);
++  Address super_cache_addr(     sub_klass, sc_offset);
 +
-+  virtual void pass_float() {
-+    jint value = *(jint*) single_slot_addr();
-+    // a floating-point argument is passed according to the integer calling
-+    // convention if no floating-point argument register available
-+    if (pass_fpr(value) < 0 && pass_gpr(value) < 0) {
-+      pass_stack(value);
-+    }
++  BLOCK_COMMENT("check_klass_subtype_slow_path");
++
++  // Do a linear scan of the secondary super-klass chain.
++  // This code is rarely used, so simplicity is a virtue here.
++  // The repne_scan instruction uses fixed registers, which we must spill.
++  // Don't worry too much about pre-existing connecitons with the input regs.
++
++  assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super)
++  assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter)
++
++  RegSet pushed_registers;
++  if (!IS_A_TEMP(x12)) {
++    pushed_registers += x12;
++  }
++  if (!IS_A_TEMP(x15)) {
++    pushed_registers += x15;
 +  }
 +
-+  virtual void pass_double() {
-+    intptr_t value = *double_slot_addr();
-+    int arg = pass_fpr(value);
-+    if (0 <= arg) {
-+      *_fp_identifiers |= (1ull << arg); // mark as double
-+    } else if (pass_gpr(value) < 0) { // no need to mark if passing by integer registers or stack
-+      pass_stack(value);
++  if (super_klass != x10 || UseCompressedOops) {
++    if (!IS_A_TEMP(x10)) {
++      pushed_registers += x10;
 +    }
 +  }
 +
-+ public:
-+  SlowSignatureHandler(const methodHandle& method, address from, intptr_t* to)
-+    : NativeSignatureIterator(method)
-+  {
-+    _from = from;
-+    _to   = to;
++  push_reg(pushed_registers, sp);
 +
-+    _int_args = to - (method->is_static() ? 16 : 17);
-+    _fp_args  = to - 8;
-+    _fp_identifiers = to - 9;
-+    *(int*) _fp_identifiers = 0;
-+    _num_reg_int_args = (method->is_static() ? 1 : 0);
-+    _num_reg_fp_args = 0;
-+  }
++  // Get super_klass value into x10 (even if it was in x15 or x12)
++  mv(x10, super_klass);
 +
-+  ~SlowSignatureHandler()
-+  {
-+    _from           = NULL;
-+    _to             = NULL;
-+    _int_args       = NULL;
-+    _fp_args        = NULL;
-+    _fp_identifiers = NULL;
-+  }
-+};
++#ifndef PRODUCT
++  mv(t1, (address)&SharedRuntime::_partial_subtype_ctr);
++  Address pst_counter_addr(t1);
++  ld(t0, pst_counter_addr);
++  add(t0, t0, 1);
++  sd(t0, pst_counter_addr);
++#endif // PRODUCT
 +
++  // We will consult the secondary-super array.
++  ld(x15, secondary_supers_addr);
++  // Load the array length.
++  lwu(x12, Address(x15, Array<Klass*>::length_offset_in_bytes()));
++  // Skip to start of data.
++  add(x15, x15, Array<Klass*>::base_offset_in_bytes());
 +
-+JRT_ENTRY(address,
-+          InterpreterRuntime::slow_signature_handler(JavaThread* current,
-+                                                     Method* method,
-+                                                     intptr_t* from,
-+                                                     intptr_t* to))
-+  methodHandle m(current, (Method*)method);
-+  assert(m->is_native(), "sanity check");
++  // Set t0 to an obvious invalid value, falling through by default
++  li(t0, -1);
++  // Scan X12 words at [X15] for an occurrence of X10.
++  repne_scan(x15, x10, x12, t0);
 +
-+  // handle arguments
-+  SlowSignatureHandler ssh(m, (address)from, to);
-+  ssh.iterate(UCONST64(-1));
++  // pop will restore x10, so we should use a temp register to keep its value
++  mv(t1, x10);
 +
-+  // return result handler
-+  return Interpreter::result_handler(m->result_type());
-+JRT_END
-diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp
-new file mode 100644
-index 00000000000..05df63ba2ae
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp
-@@ -0,0 +1,68 @@
-+/*
-+ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  // Unspill the temp registers:
++  pop_reg(pushed_registers, sp);
 +
-+#ifndef CPU_RISCV_INTERPRETERRT_RISCV_HPP
-+#define CPU_RISCV_INTERPRETERRT_RISCV_HPP
++  bne(t1, t0, *L_failure);
 +
-+// This is included in the middle of class Interpreter.
-+// Do not include files here.
++  // Success. Cache the super we found an proceed in triumph.
++  sd(super_klass, super_cache_addr);
 +
-+// native method calls
++  if (L_success != &L_fallthrough) {
++    j(*L_success);
++  }
 +
-+class SignatureHandlerGenerator: public NativeSignatureIterator {
-+ private:
-+  MacroAssembler* _masm;
-+  unsigned int _num_reg_fp_args;
-+  unsigned int _num_reg_int_args;
-+  int _stack_offset;
++#undef IS_A_TEMP
 +
-+  void pass_int();
-+  void pass_long();
-+  void pass_float();
-+  void pass_double();
-+  void pass_object();
++  bind(L_fallthrough);
++}
 +
-+  Register next_gpr();
-+  FloatRegister next_fpr();
-+  int next_stack_offset();
++// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
++void MacroAssembler::tlab_allocate(Register obj,
++                                   Register var_size_in_bytes,
++                                   int con_size_in_bytes,
++                                   Register tmp1,
++                                   Register tmp2,
++                                   Label& slow_case,
++                                   bool is_far) {
++  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far);
++}
 +
-+ public:
-+  // Creation
-+  SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer);
-+  virtual ~SignatureHandlerGenerator() {
-+    _masm = NULL;
-+  }
++// Defines obj, preserves var_size_in_bytes
++void MacroAssembler::eden_allocate(Register obj,
++                                   Register var_size_in_bytes,
++                                   int con_size_in_bytes,
++                                   Register tmp,
++                                   Label& slow_case,
++                                   bool is_far) {
++  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp, slow_case, is_far);
++}
 +
-+  // Code generation
-+  void generate(uint64_t fingerprint);
 +
-+  // Code generation support
-+  static Register from();
-+  static Register to();
-+  static Register temp();
-+};
++// get_thread() can be called anywhere inside generated code so we
++// need to save whatever non-callee save context might get clobbered
++// by the call to Thread::current() or, indeed, the call setup code.
++void MacroAssembler::get_thread(Register thread) {
++  // save all call-clobbered regs except thread
++  RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) +
++                      RegSet::range(x28, x31) + ra - thread;
++  push_reg(saved_regs, sp);
 +
-+#endif // CPU_RISCV_INTERPRETERRT_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
-new file mode 100644
-index 00000000000..9a6084afa1d
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
-@@ -0,0 +1,86 @@
-+/*
-+ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  int32_t offset = 0;
++  movptr_with_offset(ra, CAST_FROM_FN_PTR(address, Thread::current), offset);
++  jalr(ra, ra, offset);
++  if (thread != x10) {
++    mv(thread, x10);
++  }
 +
-+#ifndef CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
-+#define CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
++  // restore pushed registers
++  pop_reg(saved_regs, sp);
++}
 +
-+private:
++void MacroAssembler::load_byte_map_base(Register reg) {
++  jbyte *byte_map_base =
++    ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
++  li(reg, (uint64_t)byte_map_base);
++}
 +
-+  // FP value associated with _last_Java_sp:
-+  intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to
++void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) {
++  relocInfo::relocType rtype = dest.rspec().reloc()->type();
++  unsigned long low_address = (uintptr_t)CodeCache::low_bound();
++  unsigned long high_address = (uintptr_t)CodeCache::high_bound();
++  unsigned long dest_address = (uintptr_t)dest.target();
++  long offset_low = dest_address - low_address;
++  long offset_high = dest_address - high_address;
 +
-+public:
-+  // Each arch must define reset, save, restore
-+  // These are used by objects that only care about:
-+  //  1 - initializing a new state (thread creation, javaCalls)
-+  //  2 - saving a current state (javaCalls)
-+  //  3 - restoring an old state (javaCalls)
++  assert(is_valid_riscv64_address(dest.target()), "bad address");
++  assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address");
 +
-+  void clear(void) {
-+    // clearing _last_Java_sp must be first
-+    _last_Java_sp = NULL;
-+    OrderAccess::release();
-+    _last_Java_fp = NULL;
-+    _last_Java_pc = NULL;
++  InstructionMark im(this);
++  code_section()->relocate(inst_mark(), dest.rspec());
++  // RISC-V doesn't compute a page-aligned address, in order to partially
++  // compensate for the use of *signed* offsets in its base+disp12
++  // addressing mode (RISC-V's PC-relative reach remains asymmetric
++  // [-(2G + 2K), 2G - 2k).
++  if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) {
++    int64_t distance = dest.target() - pc();
++    auipc(reg1, (int32_t)distance + 0x800);
++    offset = ((int32_t)distance << 20) >> 20;
++  } else {
++    movptr_with_offset(reg1, dest.target(), offset);
 +  }
++}
 +
-+  void copy(JavaFrameAnchor* src) {
-+    // In order to make sure the transition state is valid for "this"
-+    // We must clear _last_Java_sp before copying the rest of the new data
-+    //
-+    // Hack Alert: Temporary bugfix for 4717480/4721647
-+    // To act like previous version (pd_cache_state) don't NULL _last_Java_sp
-+    // unless the value is changing
-+    //
-+    assert(src != NULL, "Src should not be NULL.");
-+    if (_last_Java_sp != src->_last_Java_sp) {
-+      _last_Java_sp = NULL;
-+      OrderAccess::release();
-+    }
-+    _last_Java_fp = src->_last_Java_fp;
-+    _last_Java_pc = src->_last_Java_pc;
-+    // Must be last so profiler will always see valid frame if has_last_frame() is true
-+    _last_Java_sp = src->_last_Java_sp;
-+  }
++void MacroAssembler::build_frame(int framesize) {
++  assert(framesize >= 2, "framesize must include space for FP/RA");
++  assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
++  sub(sp, sp, framesize);
++  sd(fp, Address(sp, framesize - 2 * wordSize));
++  sd(ra, Address(sp, framesize - wordSize));
++  if (PreserveFramePointer) { add(fp, sp, framesize); }
++}
 +
-+  bool walkable(void)                            { return _last_Java_sp != NULL && _last_Java_pc != NULL; }
-+  void make_walkable(JavaThread* thread);
-+  void capture_last_Java_pc(void);
++void MacroAssembler::remove_frame(int framesize) {
++  assert(framesize >= 2, "framesize must include space for FP/RA");
++  assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
++  ld(fp, Address(sp, framesize - 2 * wordSize));
++  ld(ra, Address(sp, framesize - wordSize));
++  add(sp, sp, framesize);
++}
 +
-+  intptr_t* last_Java_sp(void) const             { return _last_Java_sp; }
++void MacroAssembler::reserved_stack_check() {
++    // testing if reserved zone needs to be enabled
++    Label no_reserved_zone_enabling;
 +
-+  const address last_Java_pc(void)               { return _last_Java_pc; }
++    ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset()));
++    bltu(sp, t0, no_reserved_zone_enabling);
 +
-+private:
++    enter();   // RA and FP are live.
++    mv(c_rarg0, xthread);
++    int32_t offset = 0;
++    la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)), offset);
++    jalr(x1, t0, offset);
++    leave();
 +
-+  static ByteSize last_Java_fp_offset()          { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); }
++    // We have already removed our own frame.
++    // throw_delayed_StackOverflowError will think that it's been
++    // called by our caller.
++    offset = 0;
++    la_patchable(t0, RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()), offset);
++    jalr(x0, t0, offset);
++    should_not_reach_here();
 +
-+public:
++    bind(no_reserved_zone_enabling);
++}
 +
-+  void set_last_Java_sp(intptr_t* java_sp)       { _last_Java_sp = java_sp; OrderAccess::release(); }
++void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) {
++  Label retry_load;
++  bind(retry_load);
++  // flush and load exclusive from the memory location
++  lr_w(tmp, counter_addr);
++  addw(tmp, tmp, 1);
++  // if we store+flush with no intervening write tmp wil be zero
++  sc_w(tmp, tmp, counter_addr);
++  bnez(tmp, retry_load);
++}
 +
-+  intptr_t* last_Java_fp(void)                   { return _last_Java_fp; }
++void MacroAssembler::load_prototype_header(Register dst, Register src) {
++  load_klass(dst, src);
++  ld(dst, Address(dst, Klass::prototype_header_offset()));
++}
 +
-+#endif // CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
-new file mode 100644
-index 00000000000..814ed23e471
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
-@@ -0,0 +1,214 @@
-+/*
-+ * Copyright (c) 2004, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.hpp"
-+#include "gc/shared/barrierSet.hpp"
-+#include "gc/shared/barrierSetAssembler.hpp"
-+#include "memory/resourceArea.hpp"
-+#include "prims/jniFastGetField.hpp"
-+#include "prims/jvm_misc.hpp"
-+#include "prims/jvmtiExport.hpp"
-+#include "runtime/safepoint.hpp"
-+
-+#define __ masm->
-+
-+#define BUFFER_SIZE 30*wordSize
-+
-+// Instead of issuing a LoadLoad barrier we create an address
-+// dependency between loads; this might be more efficient.
++int MacroAssembler::biased_locking_enter(Register lock_reg,
++                                         Register obj_reg,
++                                         Register swap_reg,
++                                         Register tmp_reg,
++                                         bool swap_reg_contains_mark,
++                                         Label& done,
++                                         Label* slow_case,
++                                         BiasedLockingCounters* counters,
++                                         Register flag) {
++  assert(UseBiasedLocking, "why call this otherwise?");
++  assert_different_registers(lock_reg, obj_reg, swap_reg);
 +
-+// Common register usage:
-+// x10/f10:      result
-+// c_rarg0:    jni env
-+// c_rarg1:    obj
-+// c_rarg2:    jfield id
++  if (PrintBiasedLockingStatistics && counters == NULL)
++    counters = BiasedLocking::counters();
 +
-+static const Register robj          = x13;
-+static const Register rcounter      = x14;
-+static const Register roffset       = x15;
-+static const Register rcounter_addr = x16;
-+static const Register result        = x17;
++  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, t0);
++  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
++  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
 +
-+address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
-+  const char *name;
-+  switch (type) {
-+    case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break;
-+    case T_BYTE:    name = "jni_fast_GetByteField";    break;
-+    case T_CHAR:    name = "jni_fast_GetCharField";    break;
-+    case T_SHORT:   name = "jni_fast_GetShortField";   break;
-+    case T_INT:     name = "jni_fast_GetIntField";     break;
-+    case T_LONG:    name = "jni_fast_GetLongField";    break;
-+    case T_FLOAT:   name = "jni_fast_GetFloatField";   break;
-+    case T_DOUBLE:  name = "jni_fast_GetDoubleField";  break;
-+    default:        ShouldNotReachHere();
-+      name = NULL;  // unreachable
++  // Biased locking
++  // See whether the lock is currently biased toward our thread and
++  // whether the epoch is still valid
++  // Note that the runtime guarantees sufficient alignment of JavaThread
++  // pointers to allow age to be placed into low bits
++  // First check to see whether biasing is even enabled for this object
++  Label cas_label;
++  int null_check_offset = -1;
++  if (!swap_reg_contains_mark) {
++    null_check_offset = offset();
++    ld(swap_reg, mark_addr);
 +  }
-+  ResourceMark rm;
-+  BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE);
-+  CodeBuffer cbuf(blob);
-+  MacroAssembler* masm = new MacroAssembler(&cbuf);
-+  address fast_entry = __ pc();
-+
-+  Label slow;
-+  int32_t offset = 0;
-+  __ la_patchable(rcounter_addr, SafepointSynchronize::safepoint_counter_addr(), offset);
-+  __ addi(rcounter_addr, rcounter_addr, offset);
-+
-+  Address safepoint_counter_addr(rcounter_addr, 0);
-+  __ lwu(rcounter, safepoint_counter_addr);
-+  // An even value means there are no ongoing safepoint operations
-+  __ andi(t0, rcounter, 1);
-+  __ bnez(t0, slow);
-+
-+  if (JvmtiExport::can_post_field_access()) {
-+    // Using barrier to order wrt. JVMTI check and load of result.
-+    __ membar(MacroAssembler::LoadLoad);
-+
-+    // Check to see if a field access watch has been set before we
-+    // take the fast path.
-+    int32_t offset2;
-+    __ la_patchable(result,
-+                    ExternalAddress((address) JvmtiExport::get_field_access_count_addr()),
-+                    offset2);
-+    __ lwu(result, Address(result, offset2));
-+    __ bnez(result, slow);
-+
-+    __ mv(robj, c_rarg1);
++  andi(tmp_reg, swap_reg, markOopDesc::biased_lock_mask_in_place);
++  li(t0, markOopDesc::biased_lock_pattern);
++  bne(t0, tmp_reg, cas_label);
++  // The bias pattern is present in the object's header. Need to check
++  // whether the bias owner and the epoch are both still current.
++  load_prototype_header(tmp_reg, obj_reg);
++  orr(tmp_reg, tmp_reg, xthread);
++  xorr(tmp_reg, swap_reg, tmp_reg);
++  andi(tmp_reg, tmp_reg, ~((int) markOopDesc::age_mask_in_place));
++  if (flag->is_valid()) {
++    mv(flag, tmp_reg);
++  }
++  if (counters != NULL) {
++    Label around;
++    bnez(tmp_reg, around);
++    atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, t0);
++    j(done);
++    bind(around);
 +  } else {
-+    // Using address dependency to order wrt. load of result.
-+    __ xorr(robj, c_rarg1, rcounter);
-+    __ xorr(robj, robj, rcounter);               // obj, since
-+                                                 // robj ^ rcounter ^ rcounter == robj
-+                                                 // robj is address dependent on rcounter.
++    beqz(tmp_reg, done);
 +  }
 +
-+  // Both robj and t0 are clobbered by try_resolve_jobject_in_native.
-+  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+  assert_cond(bs != NULL);
-+  bs->try_resolve_jobject_in_native(masm, c_rarg0, robj, t0, slow);
++  Label try_revoke_bias;
++  Label try_rebias;
 +
-+  __ srli(roffset, c_rarg2, 2);                // offset
++  // At this point we know that the header has the bias pattern and
++  // that we are not the bias owner in the current epoch. We need to
++  // figure out more details about the state of the header in order to
++  // know what operations can be legally performed on the object's
++  // header.
 +
-+  assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
-+  speculative_load_pclist[count] = __ pc();   // Used by the segfault handler
-+  __ add(roffset, robj, roffset);
++  // If the low three bits in the xor result aren't clear, that means
++  // the prototype header is no longer biased and we have to revoke
++  // the bias on this object.
++  andi(t0, tmp_reg, markOopDesc::biased_lock_mask_in_place);
++  bnez(t0, try_revoke_bias);
 +
-+  switch (type) {
-+    case T_BOOLEAN: __ lbu(result, Address(roffset, 0)); break;
-+    case T_BYTE:    __ lb(result, Address(roffset, 0)); break;
-+    case T_CHAR:    __ lhu(result, Address(roffset, 0)); break;
-+    case T_SHORT:   __ lh(result, Address(roffset, 0)); break;
-+    case T_INT:     __ lw(result, Address(roffset, 0)); break;
-+    case T_LONG:    __ ld(result, Address(roffset, 0)); break;
-+    case T_FLOAT: {
-+      __ flw(f28, Address(roffset, 0)); // f28 as temporaries
-+      __ fmv_x_w(result, f28); // f{31--0}-->x
-+      break;
++  // Biasing is still enabled for this data type. See whether the
++  // epoch of the current bias is still valid, meaning that the epoch
++  // bits of the mark word are equal to the epoch bits of the
++  // prototype header. (Note that the prototype header's epoch bits
++  // only change at a safepoint.) If not, attempt to rebias the object
++  // toward the current thread. Note that we must be absolutely sure
++  // that the current epoch is invalid in order to do this because
++  // otherwise the manipulations it performs on the mark word are
++  // illegal.
++  andi(t0, tmp_reg, markOopDesc::epoch_mask_in_place);
++  bnez(t0, try_rebias);
++
++  // The epoch of the current bias is still valid but we know nothing
++  // about the owner; it might be set or it might be clear. Try to
++  // acquire the bias of the object using an atomic operation. If this
++  // fails we will go in to the runtime to revoke the object's bias.
++  // Note that we first construct the presumed unbiased header so we
++  // don't accidentally blow away another thread's valid bias.
++  {
++    Label cas_success;
++    Label counter;
++    mv(t0, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
++    andr(swap_reg, swap_reg, t0);
++    orr(tmp_reg, swap_reg, xthread);
++    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case);
++    // cas failed here if slow_cass == NULL
++    if (flag->is_valid()) {
++      mv(flag, 1);
++      j(counter);
 +    }
-+    case T_DOUBLE: {
-+      __ fld(f28, Address(roffset, 0)); // f28 as temporaries
-+      __ fmv_x_d(result, f28); // d{63--0}-->x
-+      break;
++    // If the biasing toward our thread failed, this means that
++    // another thread succeeded in biasing it toward itself and we
++    // need to revoke that bias. The revocation will occur in the
++    // interpreter runtime in the slow case.
++    bind(cas_success);
++    if (flag->is_valid()) {
++      mv(flag, 0);
++      bind(counter);
++    }
++    if (counters != NULL) {
++      atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()),
++                  tmp_reg, t0);
 +    }
-+    default:        ShouldNotReachHere();
 +  }
++  j(done);
 +
-+  // Using acquire: Order JVMTI check and load of result wrt. succeeding check
-+  // (LoadStore for volatile field).
-+  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
-+
-+  __ lw(t0, safepoint_counter_addr);
-+  __ bne(rcounter, t0, slow);
-+
-+  switch (type) {
-+    case T_FLOAT:   __ fmv_w_x(f10, result); break;
-+    case T_DOUBLE:  __ fmv_d_x(f10, result); break;
-+    default:        __ mv(x10, result);   break;
-+  }
-+  __ ret();
++  bind(try_rebias);
++  // At this point we know the epoch has expired, meaning that the
++  // current "bias owner", if any, is actually invalid. Under these
++  // circumstances _only_, we are allowed to use the current header's
++  // value as the comparison value when doing the cas to acquire the
++  // bias in the current epoch. In other words, we allow transfer of
++  // the bias from one thread to another directly in this situation.
++  //
++  // FIXME: due to a lack of registers we currently blow away the age
++  // bits in this situation. Should attempt to preserve them.
++  {
++    Label cas_success;
++    Label counter;
++    load_prototype_header(tmp_reg, obj_reg);
++    orr(tmp_reg, xthread, tmp_reg);
++    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case);
++    // cas failed here if slow_cass == NULL
++    if (flag->is_valid()) {
++      mv(flag, 1);
++      j(counter);
++    }
 +
-+  slowcase_entry_pclist[count++] = __ pc();
-+  __ bind(slow);
-+  address slow_case_addr;
-+  switch (type) {
-+    case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break;
-+    case T_BYTE:    slow_case_addr = jni_GetByteField_addr();    break;
-+    case T_CHAR:    slow_case_addr = jni_GetCharField_addr();    break;
-+    case T_SHORT:   slow_case_addr = jni_GetShortField_addr();   break;
-+    case T_INT:     slow_case_addr = jni_GetIntField_addr();     break;
-+    case T_LONG:    slow_case_addr = jni_GetLongField_addr();    break;
-+    case T_FLOAT:   slow_case_addr = jni_GetFloatField_addr();   break;
-+    case T_DOUBLE:  slow_case_addr = jni_GetDoubleField_addr();  break;
-+    default:        ShouldNotReachHere();
-+      slow_case_addr = NULL;  // unreachable
++    // If the biasing toward our thread failed, then another thread
++    // succeeded in biasing it toward itself and we need to revoke that
++    // bias. The revocation will occur in the runtime in the slow case.
++    bind(cas_success);
++    if (flag->is_valid()) {
++      mv(flag, 0);
++      bind(counter);
++    }
++    if (counters != NULL) {
++      atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()),
++                  tmp_reg, t0);
++    }
 +  }
++  j(done);
 +
++  bind(try_revoke_bias);
++  // The prototype mark in the klass doesn't have the bias bit set any
++  // more, indicating that objects of this data type are not supposed
++  // to be biased any more. We are going to try to reset the mark of
++  // this object to the prototype value and fall through to the
++  // CAS-based locking scheme. Note that if our CAS fails, it means
++  // that another thread raced us for the privilege of revoking the
++  // bias of this particular object, so it's okay to continue in the
++  // normal locking code.
++  //
++  // FIXME: due to a lack of registers we currently blow away the age
++  // bits in this situation. Should attempt to preserve them.
 +  {
-+    __ enter();
-+    int32_t tmp_offset = 0;
-+    __ la_patchable(t0, ExternalAddress(slow_case_addr), tmp_offset);
-+    __ jalr(x1, t0, tmp_offset);
-+    __ leave();
-+    __ ret();
-+  }
-+  __ flush();
-+
-+  return fast_entry;
-+}
++    Label cas_success, nope;
++    load_prototype_header(tmp_reg, obj_reg);
++    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, &nope);
++    bind(cas_success);
 +
++    // Fall through to the normal CAS-based lock, because no matter what
++    // the result of the above CAS, some thread must have succeeded in
++    // removing the bias bit from the object's header.
++    if (counters != NULL) {
++      atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg,
++                  t0);
++    }
++    bind(nope);
++  }
 +
-+address JNI_FastGetField::generate_fast_get_boolean_field() {
-+  return generate_fast_get_int_field0(T_BOOLEAN);
-+}
++  bind(cas_label);
 +
-+address JNI_FastGetField::generate_fast_get_byte_field() {
-+  return generate_fast_get_int_field0(T_BYTE);
++  return null_check_offset;
 +}
 +
-+address JNI_FastGetField::generate_fast_get_char_field() {
-+  return generate_fast_get_int_field0(T_CHAR);
-+}
++void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag) {
++  assert(UseBiasedLocking, "why call this otherwise?");
 +
-+address JNI_FastGetField::generate_fast_get_short_field() {
-+  return generate_fast_get_int_field0(T_SHORT);
++  // Check for biased locking unlock case, which is a no-op
++  // Note: we do not have to check the thread ID for two reasons.
++  // First, the interpreter checks for IllegalMonitorStateException at
++  // a higher level. Second, if the bias was revoked while we held the
++  // lock, the object could not be rebiased toward another thread, so
++  // the bias bit would be clear.
++  ld(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
++  andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
++  sub(tmp_reg, tmp_reg, markOopDesc::biased_lock_pattern);
++  if (flag->is_valid()) { mv(flag, tmp_reg); }
++  beqz(tmp_reg, done);
 +}
 +
-+address JNI_FastGetField::generate_fast_get_int_field() {
-+  return generate_fast_get_int_field0(T_INT);
++// Move the address of the polling page into dest.
++void MacroAssembler::get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype) {
++  if (SafepointMechanism::uses_thread_local_poll()) {
++    ld(dest, Address(xthread, Thread::polling_page_offset()));
++  } else {
++    uint64_t align = (uint64_t)page & 0xfff;
++    assert(align == 0, "polling page must be page aligned");
++    la_patchable(dest, Address(page, rtype), offset);
++  }
 +}
 +
-+address JNI_FastGetField::generate_fast_get_long_field() {
-+  return generate_fast_get_int_field0(T_LONG);
++// Read the polling page.  The address of the polling page must
++// already be in r.
++void MacroAssembler::read_polling_page(Register dest, address page, relocInfo::relocType rtype) {
++  int32_t offset = 0;
++  get_polling_page(dest, page, offset, rtype);
++  read_polling_page(dest, offset, rtype);
 +}
 +
-+address JNI_FastGetField::generate_fast_get_float_field() {
-+  return generate_fast_get_int_field0(T_FLOAT);
++// Read the polling page.  The address of the polling page must
++// already be in r.
++void MacroAssembler::read_polling_page(Register dest, int32_t offset, relocInfo::relocType rtype) {
++  code_section()->relocate(pc(), rtype);
++  lwu(zr, Address(dest, offset));
 +}
 +
-+address JNI_FastGetField::generate_fast_get_double_field() {
-+  return generate_fast_get_int_field0(T_DOUBLE);
++void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
++#ifdef ASSERT
++  {
++    ThreadInVMfromUnknown tiv;
++    assert (UseCompressedOops, "should only be used for compressed oops");
++    assert (Universe::heap() != NULL, "java heap should be initialized");
++    assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
++    assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop");
++  }
++#endif
++  int oop_index = oop_recorder()->find_index(obj);
++  InstructionMark im(this);
++  RelocationHolder rspec = oop_Relocation::spec(oop_index);
++  code_section()->relocate(inst_mark(), rspec);
++  li32(dst, 0xDEADBEEF);
++  zero_extend(dst, dst, 32);
 +}
-diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
-new file mode 100644
-index 00000000000..83ffcc55d83
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
-@@ -0,0 +1,106 @@
-+/*
-+ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#ifndef CPU_RISCV_JNITYPES_RISCV_HPP
-+#define CPU_RISCV_JNITYPES_RISCV_HPP
-+
-+#include "jni.h"
-+#include "memory/allStatic.hpp"
-+#include "oops/oop.hpp"
 +
-+// This file holds platform-dependent routines used to write primitive jni
-+// types to the array of arguments passed into JavaCalls::call
++void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
++  assert (UseCompressedClassPointers, "should only be used for compressed headers");
++  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
++  int index = oop_recorder()->find_index(k);
++  assert(!Universe::heap()->is_in_reserved(k), "should not be an oop");
 +
-+class JNITypes : private AllStatic {
-+  // These functions write a java primitive type (in native format)
-+  // to a java stack slot array to be passed as an argument to JavaCalls:calls.
-+  // I.e., they are functionally 'push' operations if they have a 'pos'
-+  // formal parameter.  Note that jlong's and jdouble's are written
-+  // _in reverse_ of the order in which they appear in the interpreter
-+  // stack.  This is because call stubs (see stubGenerator_sparc.cpp)
-+  // reverse the argument list constructed by JavaCallArguments (see
-+  // javaCalls.hpp).
++  InstructionMark im(this);
++  RelocationHolder rspec = metadata_Relocation::spec(index);
++  code_section()->relocate(inst_mark(), rspec);
++  narrowKlass nk = Klass::encode_klass(k);
++  li32(dst, nk);
++  zero_extend(dst, dst, 32);
++}
 +
-+public:
-+  // Ints are stored in native format in one JavaCallArgument slot at *to.
-+  static inline void    put_int(jint  from, intptr_t *to)           { *(jint *)(to +   0  ) =  from; }
-+  static inline void    put_int(jint  from, intptr_t *to, int& pos) { *(jint *)(to + pos++) =  from; }
-+  static inline void    put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; }
++// Maybe emit a call via a trampoline.  If the code cache is small
++// trampolines won't be emitted.
++address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) {
++  assert(JavaThread::current()->is_Compiler_thread(), "just checking");
++  assert(entry.rspec().type() == relocInfo::runtime_call_type ||
++         entry.rspec().type() == relocInfo::opt_virtual_call_type ||
++         entry.rspec().type() == relocInfo::static_call_type ||
++         entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
 +
-+  // Longs are stored in native format in one JavaCallArgument slot at
-+  // *(to+1).
-+  static inline void put_long(jlong  from, intptr_t *to) {
-+    *(jlong*) (to + 1) = from;
++  // We need a trampoline if branches are far.
++  if (far_branches()) {
++    bool in_scratch_emit_size = false;
++#ifdef COMPILER2
++    // We don't want to emit a trampoline if C2 is generating dummy
++    // code during its branch shortening phase.
++    CompileTask* task = ciEnv::current()->task();
++    in_scratch_emit_size =
++      (task != NULL && is_c2_compile(task->comp_level()) &&
++       Compile::current()->in_scratch_emit_size());
++#endif
++    if (!in_scratch_emit_size) {
++      address stub = emit_trampoline_stub(offset(), entry.target());
++      if (stub == NULL) {
++        postcond(pc() == badAddress);
++        return NULL; // CodeCache is full
++      }
++    }
 +  }
 +
-+  static inline void put_long(jlong  from, intptr_t *to, int& pos) {
-+    *(jlong*) (to + 1 + pos) = from;
-+    pos += 2;
++  if (cbuf != NULL) { cbuf->set_insts_mark(); }
++  relocate(entry.rspec());
++  if (!far_branches()) {
++    jal(entry.target());
++  } else {
++    jal(pc());
 +  }
++  // just need to return a non-null address
++  postcond(pc() != badAddress);
++  return pc();
++}
 +
-+  static inline void put_long(jlong *from, intptr_t *to, int& pos) {
-+    *(jlong*) (to + 1 + pos) = *from;
-+    pos += 2;
++address MacroAssembler::ic_call(address entry, jint method_index) {
++  RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
++  movptr(t1, (address)Universe::non_oop_word());
++  assert_cond(entry != NULL);
++  return trampoline_call(Address(entry, rh));
++}
++
++// Emit a trampoline stub for a call to a target which is too far away.
++//
++// code sequences:
++//
++// call-site:
++//   branch-and-link to <destination> or <trampoline stub>
++//
++// Related trampoline stub for this call site in the stub section:
++//   load the call target from the constant pool
++//   branch (RA still points to the call site above)
++
++address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
++                                             address dest) {
++  address stub = start_a_stub(NativeInstruction::instruction_size
++                            + NativeCallTrampolineStub::instruction_size);
++  if (stub == NULL) {
++    return NULL;  // CodeBuffer::expand failed
 +  }
 +
-+  // Oops are stored in native format in one JavaCallArgument slot at *to.
-+  static inline void    put_obj(const Handle& from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle.raw_value(); }
-+  static inline void    put_obj(jobject       from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle; }
++  // Create a trampoline stub relocation which relates this trampoline stub
++  // with the call instruction at insts_call_instruction_offset in the
++  // instructions code-section.
 +
-+  // Floats are stored in native format in one JavaCallArgument slot at *to.
-+  static inline void    put_float(jfloat  from, intptr_t *to)           { *(jfloat *)(to +   0  ) =  from;  }
-+  static inline void    put_float(jfloat  from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) =  from; }
-+  static inline void    put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; }
-+
-+#undef _JNI_SLOT_OFFSET
-+#define _JNI_SLOT_OFFSET 1
-+  // Doubles are stored in native word format in one JavaCallArgument
-+  // slot at *(to+1).
-+  static inline void put_double(jdouble  from, intptr_t *to) {
-+    *(jdouble*) (to + 1) = from;
-+  }
-+
-+  static inline void put_double(jdouble  from, intptr_t *to, int& pos) {
-+    *(jdouble*) (to + 1 + pos) = from;
-+    pos += 2;
-+  }
++  // make sure 4 byte aligned here, so that the destination address would be
++  // 8 byte aligned after 3 intructions
++  // when we reach here we may get a 2-byte alignment so need to align it
++  align(wordSize, NativeCallTrampolineStub::data_offset);
 +
-+  static inline void put_double(jdouble *from, intptr_t *to, int& pos) {
-+    *(jdouble*) (to + 1 + pos) = *from;
-+    pos += 2;
-+  }
++  relocate(trampoline_stub_Relocation::spec(code()->insts()->start() +
++                                            insts_call_instruction_offset));
++  const int stub_start_offset = offset();
 +
-+  // The get_xxx routines, on the other hand, actually _do_ fetch
-+  // java primitive types from the interpreter stack.
-+  // No need to worry about alignment on Intel.
-+  static inline jint    get_int   (intptr_t *from) { return *(jint *)   from; }
-+  static inline jlong   get_long  (intptr_t *from) { return *(jlong *)  (from + _JNI_SLOT_OFFSET); }
-+  static inline oop     get_obj   (intptr_t *from) { return *(oop *)    from; }
-+  static inline jfloat  get_float (intptr_t *from) { return *(jfloat *) from; }
-+  static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); }
-+#undef _JNI_SLOT_OFFSET
-+};
++  // Now, create the trampoline stub's code:
++  // - load the call
++  // - call
++  Label target;
++  ld(t0, target);  // auipc + ld
++  jr(t0);          // jalr
++  bind(target);
++  assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
++         "should be");
++  assert(offset() % wordSize == 0, "bad alignment");
++  emit_int64((intptr_t)dest);
 +
-+#endif // CPU_RISCV_JNITYPES_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-new file mode 100644
-index 00000000000..86710295444
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -0,0 +1,4016 @@
-+/*
-+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  const address stub_start_addr = addr_at(stub_start_offset);
 +
-+#include "precompiled.hpp"
-+#include "asm/assembler.hpp"
-+#include "asm/assembler.inline.hpp"
-+#include "compiler/disassembler.hpp"
-+#include "gc/shared/barrierSet.hpp"
-+#include "gc/shared/barrierSetAssembler.hpp"
-+#include "gc/shared/cardTable.hpp"
-+#include "gc/shared/cardTableBarrierSet.hpp"
-+#include "interpreter/bytecodeHistogram.hpp"
-+#include "interpreter/interpreter.hpp"
-+#include "memory/resourceArea.hpp"
-+#include "memory/universe.hpp"
-+#include "nativeInst_riscv.hpp"
-+#include "oops/accessDecorators.hpp"
-+#include "oops/compressedOops.inline.hpp"
-+#include "oops/klass.inline.hpp"
-+#include "oops/oop.hpp"
-+#include "runtime/interfaceSupport.inline.hpp"
-+#include "runtime/jniHandles.inline.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "runtime/stubRoutines.hpp"
-+#include "runtime/thread.hpp"
-+#include "utilities/powerOfTwo.hpp"
-+#ifdef COMPILER2
-+#include "opto/compile.hpp"
-+#include "opto/node.hpp"
-+#include "opto/output.hpp"
-+#endif
++  assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
 +
-+#ifdef PRODUCT
-+#define BLOCK_COMMENT(str) /* nothing */
-+#else
-+#define BLOCK_COMMENT(str) block_comment(str)
-+#endif
-+#define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":")
++  end_a_stub();
++  return stub_start_addr;
++}
 +
-+static void pass_arg0(MacroAssembler* masm, Register arg) {
-+  if (c_rarg0 != arg) {
-+    assert_cond(masm != NULL);
-+    masm->mv(c_rarg0, arg);
++Address MacroAssembler::add_memory_helper(const Address dst) {
++  switch (dst.getMode()) {
++    case Address::base_plus_offset:
++      // This is the expected mode, although we allow all the other
++      // forms below.
++      return form_address(t1, dst.base(), dst.offset());
++    default:
++      la(t1, dst);
++      return Address(t1);
 +  }
 +}
 +
-+static void pass_arg1(MacroAssembler* masm, Register arg) {
-+  if (c_rarg1 != arg) {
-+    assert_cond(masm != NULL);
-+    masm->mv(c_rarg1, arg);
-+  }
++void MacroAssembler::add_memory_int64(const Address dst, int64_t imm) {
++  Address adr = add_memory_helper(dst);
++  assert_different_registers(adr.base(), t0);
++  ld(t0, adr);
++  addi(t0, t0, imm);
++  sd(t0, adr);
 +}
 +
-+static void pass_arg2(MacroAssembler* masm, Register arg) {
-+  if (c_rarg2 != arg) {
-+    assert_cond(masm != NULL);
-+    masm->mv(c_rarg2, arg);
-+  }
++void MacroAssembler::add_memory_int32(const Address dst, int32_t imm) {
++  Address adr = add_memory_helper(dst);
++  assert_different_registers(adr.base(), t0);
++  lwu(t0, adr);
++  addiw(t0, t0, imm);
++  sw(t0, adr);
 +}
 +
-+static void pass_arg3(MacroAssembler* masm, Register arg) {
-+  if (c_rarg3 != arg) {
-+    assert_cond(masm != NULL);
-+    masm->mv(c_rarg3, arg);
-+  }
++void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) {
++  assert_different_registers(src1, t0);
++  int32_t offset;
++  la_patchable(t0, src2, offset);
++  ld(t0, Address(t0, offset));
++  beq(src1, t0, equal);
 +}
 +
-+void MacroAssembler::align(int modulus, int extra_offset) {
-+  CompressibleRegion cr(this);
-+  while ((offset() + extra_offset) % modulus != 0) { nop(); }
++// string indexof
++// compute index by trailing zeros
++void MacroAssembler::compute_index(Register haystack, Register trailing_zeros,
++                                   Register match_mask, Register result,
++                                   Register ch2, Register tmp,
++                                   bool haystack_isL)
++{
++  int haystack_chr_shift = haystack_isL ? 0 : 1;
++  srl(match_mask, match_mask, trailing_zeros);
++  srli(match_mask, match_mask, 1);
++  srli(tmp, trailing_zeros, LogBitsPerByte);
++  if (!haystack_isL) andi(tmp, tmp, 0xE);
++  add(haystack, haystack, tmp);
++  ld(ch2, Address(haystack));
++  if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift);
++  add(result, result, tmp);
 +}
 +
-+void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
-+  call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions);
++// string indexof
++// Find pattern element in src, compute match mask,
++// only the first occurrence of 0x80/0x8000 at low bits is the valid match index
++// match mask patterns and corresponding indices would be like:
++// - 0x8080808080808080 (Latin1)
++// -   7 6 5 4 3 2 1 0  (match index)
++// - 0x8000800080008000 (UTF16)
++// -   3   2   1   0    (match index)
++void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask,
++                                        Register mask1, Register mask2)
++{
++  xorr(src, pattern, src);
++  sub(match_mask, src, mask1);
++  orr(src, src, mask2);
++  notr(src, src);
++  andr(match_mask, match_mask, src);
 +}
 +
-+// Implementation of call_VM versions
++#ifdef COMPILER2
++// Code for BigInteger::mulAdd instrinsic
++// out     = x10
++// in      = x11
++// offset  = x12  (already out.length-offset)
++// len     = x13
++// k       = x14
++// tmp     = x28
++//
++// pseudo code from java implementation:
++// long kLong = k & LONG_MASK;
++// carry = 0;
++// offset = out.length-offset - 1;
++// for (int j = len - 1; j >= 0; j--) {
++//     product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry;
++//     out[offset--] = (int)product;
++//     carry = product >>> 32;
++// }
++// return (int)carry;
++void MacroAssembler::mul_add(Register out, Register in, Register offset,
++                             Register len, Register k, Register tmp) {
++  Label L_tail_loop, L_unroll, L_end;
++  mv(tmp, out);
++  mv(out, zr);
++  blez(len, L_end);
++  zero_extend(k, k, 32);
++  slliw(t0, offset, LogBytesPerInt);
++  add(offset, tmp, t0);
++  slliw(t0, len, LogBytesPerInt);
++  add(in, in, t0);
 +
-+void MacroAssembler::call_VM(Register oop_result,
-+                             address entry_point,
-+                             bool check_exceptions) {
-+  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
-+}
++  const int unroll = 8;
++  li(tmp, unroll);
++  blt(len, tmp, L_tail_loop);
++  bind(L_unroll);
++  for (int i = 0; i < unroll; i++) {
++    sub(in, in, BytesPerInt);
++    lwu(t0, Address(in, 0));
++    mul(t1, t0, k);
++    add(t0, t1, out);
++    sub(offset, offset, BytesPerInt);
++    lwu(t1, Address(offset, 0));
++    add(t0, t0, t1);
++    sw(t0, Address(offset, 0));
++    srli(out, t0, 32);
++  }
++  subw(len, len, tmp);
++  bge(len, tmp, L_unroll);
 +
-+void MacroAssembler::call_VM(Register oop_result,
-+                             address entry_point,
-+                             Register arg_1,
-+                             bool check_exceptions) {
-+  pass_arg1(this, arg_1);
-+  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
-+}
++  bind(L_tail_loop);
++  blez(len, L_end);
++  sub(in, in, BytesPerInt);
++  lwu(t0, Address(in, 0));
++  mul(t1, t0, k);
++  add(t0, t1, out);
++  sub(offset, offset, BytesPerInt);
++  lwu(t1, Address(offset, 0));
++  add(t0, t0, t1);
++  sw(t0, Address(offset, 0));
++  srli(out, t0, 32);
++  subw(len, len, 1);
++  j(L_tail_loop);
 +
-+void MacroAssembler::call_VM(Register oop_result,
-+                             address entry_point,
-+                             Register arg_1,
-+                             Register arg_2,
-+                             bool check_exceptions) {
-+  assert(arg_1 != c_rarg2, "smashed arg");
-+  pass_arg2(this, arg_2);
-+  pass_arg1(this, arg_1);
-+  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
++  bind(L_end);
 +}
 +
-+void MacroAssembler::call_VM(Register oop_result,
-+                             address entry_point,
-+                             Register arg_1,
-+                             Register arg_2,
-+                             Register arg_3,
-+                             bool check_exceptions) {
-+  assert(arg_1 != c_rarg3, "smashed arg");
-+  assert(arg_2 != c_rarg3, "smashed arg");
-+  pass_arg3(this, arg_3);
-+
-+  assert(arg_1 != c_rarg2, "smashed arg");
-+  pass_arg2(this, arg_2);
++// add two unsigned input and output carry
++void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry)
++{
++  assert_different_registers(dst, carry);
++  assert_different_registers(dst, src2);
++  add(dst, src1, src2);
++  sltu(carry, dst, src2);
++}
 +
-+  pass_arg1(this, arg_1);
-+  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
++// add two input with carry
++void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry)
++{
++  assert_different_registers(dst, carry);
++  add(dst, src1, src2);
++  add(dst, dst, carry);
 +}
 +
-+void MacroAssembler::call_VM(Register oop_result,
-+                             Register last_java_sp,
-+                             address entry_point,
-+                             int number_of_arguments,
-+                             bool check_exceptions) {
-+  call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
++// add two unsigned input with carry and output carry
++void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry)
++{
++  assert_different_registers(dst, src2);
++  adc(dst, src1, src2, carry);
++  sltu(carry, dst, src2);
 +}
 +
-+void MacroAssembler::call_VM(Register oop_result,
-+                             Register last_java_sp,
-+                             address entry_point,
-+                             Register arg_1,
-+                             bool check_exceptions) {
-+  pass_arg1(this, arg_1);
-+  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
++void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
++                                     Register src1, Register src2, Register carry)
++{
++  cad(dest_lo, dest_lo, src1, carry);
++  add(dest_hi, dest_hi, carry);
++  cad(dest_lo, dest_lo, src2, carry);
++  add(final_dest_hi, dest_hi, carry);
 +}
 +
-+void MacroAssembler::call_VM(Register oop_result,
-+                             Register last_java_sp,
-+                             address entry_point,
-+                             Register arg_1,
-+                             Register arg_2,
-+                             bool check_exceptions) {
++/**
++ * Multiply 32 bit by 32 bit first loop.
++ */
++void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
++                                           Register y, Register y_idx, Register z,
++                                           Register carry, Register product,
++                                           Register idx, Register kdx)
++{
++  // jlong carry, x[], y[], z[];
++  // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
++  //     long product = y[idx] * x[xstart] + carry;
++  //     z[kdx] = (int)product;
++  //     carry = product >>> 32;
++  // }
++  // z[xstart] = (int)carry;
 +
-+  assert(arg_1 != c_rarg2, "smashed arg");
-+  pass_arg2(this, arg_2);
-+  pass_arg1(this, arg_1);
-+  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
-+}
++  Label L_first_loop, L_first_loop_exit;
++  blez(idx, L_first_loop_exit);
 +
-+void MacroAssembler::call_VM(Register oop_result,
-+                             Register last_java_sp,
-+                             address entry_point,
-+                             Register arg_1,
-+                             Register arg_2,
-+                             Register arg_3,
-+                             bool check_exceptions) {
-+  assert(arg_1 != c_rarg3, "smashed arg");
-+  assert(arg_2 != c_rarg3, "smashed arg");
-+  pass_arg3(this, arg_3);
-+  assert(arg_1 != c_rarg2, "smashed arg");
-+  pass_arg2(this, arg_2);
-+  pass_arg1(this, arg_1);
-+  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
-+}
++  shadd(t0, xstart, x, t0, LogBytesPerInt);
++  lwu(x_xstart, Address(t0, 0));
 +
-+// these are no-ops overridden by InterpreterMacroAssembler
-+void MacroAssembler::check_and_handle_earlyret(Register java_thread) {}
-+void MacroAssembler::check_and_handle_popframe(Register java_thread) {}
++  bind(L_first_loop);
++  subw(idx, idx, 1);
++  shadd(t0, idx, y, t0, LogBytesPerInt);
++  lwu(y_idx, Address(t0, 0));
++  mul(product, x_xstart, y_idx);
++  add(product, product, carry);
++  srli(carry, product, 32);
++  subw(kdx, kdx, 1);
++  shadd(t0, kdx, z, t0, LogBytesPerInt);
++  sw(product, Address(t0, 0));
++  bgtz(idx, L_first_loop);
 +
-+// Calls to C land
-+//
-+// When entering C land, the fp, & esp of the last Java frame have to be recorded
-+// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
-+// has to be reset to 0. This is required to allow proper stack traversal.
-+void MacroAssembler::set_last_Java_frame(Register last_java_sp,
-+                                         Register last_java_fp,
-+                                         Register last_java_pc,
-+                                         Register tmp) {
++  bind(L_first_loop_exit);
++}
 +
-+  if (last_java_pc->is_valid()) {
-+      sd(last_java_pc, Address(xthread,
-+                               JavaThread::frame_anchor_offset() +
-+                               JavaFrameAnchor::last_Java_pc_offset()));
-+  }
++/**
++ * Multiply 64 bit by 64 bit first loop.
++ */
++void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
++                                           Register y, Register y_idx, Register z,
++                                           Register carry, Register product,
++                                           Register idx, Register kdx)
++{
++  //
++  //  jlong carry, x[], y[], z[];
++  //  for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
++  //    huge_128 product = y[idx] * x[xstart] + carry;
++  //    z[kdx] = (jlong)product;
++  //    carry  = (jlong)(product >>> 64);
++  //  }
++  //  z[xstart] = carry;
++  //
 +
-+  // determine last_java_sp register
-+  if (last_java_sp == sp) {
-+    mv(tmp, sp);
-+    last_java_sp = tmp;
-+  } else if (!last_java_sp->is_valid()) {
-+    last_java_sp = esp;
-+  }
++  Label L_first_loop, L_first_loop_exit;
++  Label L_one_x, L_one_y, L_multiply;
 +
-+  sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset()));
++  subw(xstart, xstart, 1);
++  bltz(xstart, L_one_x);
 +
-+  // last_java_fp is optional
-+  if (last_java_fp->is_valid()) {
-+    sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset()));
-+  }
-+}
++  shadd(t0, xstart, x, t0, LogBytesPerInt);
++  ld(x_xstart, Address(t0, 0));
++  ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian
 +
-+void MacroAssembler::set_last_Java_frame(Register last_java_sp,
-+                                         Register last_java_fp,
-+                                         address  last_java_pc,
-+                                         Register tmp) {
-+  assert(last_java_pc != NULL, "must provide a valid PC");
++  bind(L_first_loop);
++  subw(idx, idx, 1);
++  bltz(idx, L_first_loop_exit);
++  subw(idx, idx, 1);
++  bltz(idx, L_one_y);
 +
-+  la(tmp, last_java_pc);
-+  sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++  shadd(t0, idx, y, t0, LogBytesPerInt);
++  ld(y_idx, Address(t0, 0));
++  ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian
++  bind(L_multiply);
 +
-+  set_last_Java_frame(last_java_sp, last_java_fp, noreg, tmp);
-+}
++  mulhu(t0, x_xstart, y_idx);
++  mul(product, x_xstart, y_idx);
++  cad(product, product, carry, t1);
++  adc(carry, t0, zr, t1);
 +
-+void MacroAssembler::set_last_Java_frame(Register last_java_sp,
-+                                         Register last_java_fp,
-+                                         Label &L,
-+                                         Register tmp) {
-+  if (L.is_bound()) {
-+    set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp);
-+  } else {
-+    InstructionMark im(this);
-+    L.add_patch_at(code(), locator());
-+    set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp);
-+  }
-+}
++  subw(kdx, kdx, 2);
++  ror_imm(product, product, 32); // back to big-endian
++  shadd(t0, kdx, z, t0, LogBytesPerInt);
++  sd(product, Address(t0, 0));
 +
-+void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
-+  // we must set sp to zero to clear frame
-+  sd(zr, Address(xthread, JavaThread::last_Java_sp_offset()));
++  j(L_first_loop);
 +
-+  // must clear fp, so that compiled frames are not confused; it is
-+  // possible that we need it only for debugging
-+  if (clear_fp) {
-+    sd(zr, Address(xthread, JavaThread::last_Java_fp_offset()));
-+  }
++  bind(L_one_y);
++  lwu(y_idx, Address(y, 0));
++  j(L_multiply);
 +
-+  // Always clear the pc because it could have been set by make_walkable()
-+  sd(zr, Address(xthread, JavaThread::last_Java_pc_offset()));
++  bind(L_one_x);
++  lwu(x_xstart, Address(x, 0));
++  j(L_first_loop);
++
++  bind(L_first_loop_exit);
 +}
 +
-+void MacroAssembler::call_VM_base(Register oop_result,
-+                                  Register java_thread,
-+                                  Register last_java_sp,
-+                                  address  entry_point,
-+                                  int      number_of_arguments,
-+                                  bool     check_exceptions) {
-+   // determine java_thread register
-+  if (!java_thread->is_valid()) {
-+    java_thread = xthread;
-+  }
-+  // determine last_java_sp register
-+  if (!last_java_sp->is_valid()) {
-+    last_java_sp = esp;
-+  }
++/**
++ * Multiply 128 bit by 128 bit. Unrolled inner loop.
++ *
++ */
++void MacroAssembler::multiply_128_x_128_loop(Register y, Register z,
++                                             Register carry, Register carry2,
++                                             Register idx, Register jdx,
++                                             Register yz_idx1, Register yz_idx2,
++                                             Register tmp, Register tmp3, Register tmp4,
++                                             Register tmp6, Register product_hi)
++{
++  //   jlong carry, x[], y[], z[];
++  //   int kdx = xstart+1;
++  //   for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
++  //     huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry;
++  //     jlong carry2  = (jlong)(tmp3 >>> 64);
++  //     huge_128 tmp4 = (y[idx]   * product_hi) + z[kdx+idx] + carry2;
++  //     carry  = (jlong)(tmp4 >>> 64);
++  //     z[kdx+idx+1] = (jlong)tmp3;
++  //     z[kdx+idx] = (jlong)tmp4;
++  //   }
++  //   idx += 2;
++  //   if (idx > 0) {
++  //     yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry;
++  //     z[kdx+idx] = (jlong)yz_idx1;
++  //     carry  = (jlong)(yz_idx1 >>> 64);
++  //   }
++  //
 +
-+  // debugging support
-+  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
-+  assert(java_thread == xthread, "unexpected register");
++  Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
 +
-+  assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
-+  assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
++  srliw(jdx, idx, 2);
 +
-+  // push java thread (becomes first argument of C function)
-+  mv(c_rarg0, java_thread);
++  bind(L_third_loop);
 +
-+  // set last Java frame before call
-+  assert(last_java_sp != fp, "can't use fp");
++  subw(jdx, jdx, 1);
++  bltz(jdx, L_third_loop_exit);
++  subw(idx, idx, 4);
 +
-+  Label l;
-+  set_last_Java_frame(last_java_sp, fp, l, t0);
++  shadd(t0, idx, y, t0, LogBytesPerInt);
++  ld(yz_idx2, Address(t0, 0));
++  ld(yz_idx1, Address(t0, wordSize));
 +
-+  // do the call, remove parameters
-+  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l);
++  shadd(tmp6, idx, z, t0, LogBytesPerInt);
 +
-+  // reset last Java frame
-+  // Only interpreter should have to clear fp
-+  reset_last_Java_frame(true);
++  ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian
++  ror_imm(yz_idx2, yz_idx2, 32);
 +
-+   // C++ interp handles this in the interpreter
-+  check_and_handle_popframe(java_thread);
-+  check_and_handle_earlyret(java_thread);
++  ld(t1, Address(tmp6, 0));
++  ld(t0, Address(tmp6, wordSize));
 +
-+  if (check_exceptions) {
-+    // check for pending exceptions (java_thread is set upon return)
-+    ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset())));
-+    Label ok;
-+    beqz(t0, ok);
-+    int32_t offset = 0;
-+    la_patchable(t0, RuntimeAddress(StubRoutines::forward_exception_entry()), offset);
-+    jalr(x0, t0, offset);
-+    bind(ok);
-+  }
++  mul(tmp3, product_hi, yz_idx1); //  yz_idx1 * product_hi -> tmp4:tmp3
++  mulhu(tmp4, product_hi, yz_idx1);
 +
-+  // get oop result if there is one and reset the value in the thread
-+  if (oop_result->is_valid()) {
-+    get_vm_result(oop_result, java_thread);
-+  }
-+}
++  ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian
++  ror_imm(t1, t1, 32, tmp);
 +
-+void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
-+  ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
-+  sd(zr, Address(java_thread, JavaThread::vm_result_offset()));
-+  verify_oop(oop_result, "broken oop in call_VM_base");
-+}
++  mul(tmp, product_hi, yz_idx2); //  yz_idx2 * product_hi -> carry2:tmp
++  mulhu(carry2, product_hi, yz_idx2);
 +
-+void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
-+  ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
-+  sd(zr, Address(java_thread, JavaThread::vm_result_2_offset()));
-+}
++  cad(tmp3, tmp3, carry, carry);
++  adc(tmp4, tmp4, zr, carry);
++  cad(tmp3, tmp3, t0, t0);
++  cadc(tmp4, tmp4, tmp, t0);
++  adc(carry, carry2, zr, t0);
++  cad(tmp4, tmp4, t1, carry2);
++  adc(carry, carry, zr, carry2);
 +
-+void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) {
-+  assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required");
-+  assert_different_registers(klass, xthread, tmp);
++  ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian
++  ror_imm(tmp4, tmp4, 32);
++  sd(tmp4, Address(tmp6, 0));
++  sd(tmp3, Address(tmp6, wordSize));
 +
-+  Label L_fallthrough, L_tmp;
-+  if (L_fast_path == NULL) {
-+    L_fast_path = &L_fallthrough;
-+  } else if (L_slow_path == NULL) {
-+    L_slow_path = &L_fallthrough;
-+  }
++  j(L_third_loop);
 +
-+  // Fast path check: class is fully initialized
-+  lbu(tmp, Address(klass, InstanceKlass::init_state_offset()));
-+  sub(tmp, tmp, InstanceKlass::fully_initialized);
-+  beqz(tmp, *L_fast_path);
++  bind(L_third_loop_exit);
 +
-+  // Fast path check: current thread is initializer thread
-+  ld(tmp, Address(klass, InstanceKlass::init_thread_offset()));
++  andi(idx, idx, 0x3);
++  beqz(idx, L_post_third_loop_done);
 +
-+  if (L_slow_path == &L_fallthrough) {
-+    beq(xthread, tmp, *L_fast_path);
-+    bind(*L_slow_path);
-+  } else if (L_fast_path == &L_fallthrough) {
-+    bne(xthread, tmp, *L_slow_path);
-+    bind(*L_fast_path);
-+  } else {
-+    Unimplemented();
-+  }
-+}
++  Label L_check_1;
++  subw(idx, idx, 2);
++  bltz(idx, L_check_1);
 +
-+void MacroAssembler::verify_oop(Register reg, const char* s) {
-+  if (!VerifyOops) { return; }
++  shadd(t0, idx, y, t0, LogBytesPerInt);
++  ld(yz_idx1, Address(t0, 0));
++  ror_imm(yz_idx1, yz_idx1, 32);
 +
-+  // Pass register number to verify_oop_subroutine
-+  const char* b = NULL;
-+  {
-+    ResourceMark rm;
-+    stringStream ss;
-+    ss.print("verify_oop: %s: %s", reg->name(), s);
-+    b = code_string(ss.as_string());
-+  }
-+  BLOCK_COMMENT("verify_oop {");
++  mul(tmp3, product_hi, yz_idx1); //  yz_idx1 * product_hi -> tmp4:tmp3
++  mulhu(tmp4, product_hi, yz_idx1);
 +
-+  push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
++  shadd(t0, idx, z, t0, LogBytesPerInt);
++  ld(yz_idx2, Address(t0, 0));
++  ror_imm(yz_idx2, yz_idx2, 32, tmp);
 +
-+  mv(c_rarg0, reg); // c_rarg0 : x10
-+  li(t0, (uintptr_t)(address)b);
++  add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp);
 +
-+  // call indirectly to solve generation ordering problem
-+  int32_t offset = 0;
-+  la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset);
-+  ld(t1, Address(t1, offset));
-+  jalr(t1);
++  ror_imm(tmp3, tmp3, 32, tmp);
++  sd(tmp3, Address(t0, 0));
 +
-+  pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
++  bind(L_check_1);
 +
-+  BLOCK_COMMENT("} verify_oop");
-+}
++  andi(idx, idx, 0x1);
++  subw(idx, idx, 1);
++  bltz(idx, L_post_third_loop_done);
++  shadd(t0, idx, y, t0, LogBytesPerInt);
++  lwu(tmp4, Address(t0, 0));
++  mul(tmp3, tmp4, product_hi); //  tmp4 * product_hi -> carry2:tmp3
++  mulhu(carry2, tmp4, product_hi);
 +
-+void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
-+  if (!VerifyOops) {
-+    return;
-+  }
++  shadd(t0, idx, z, t0, LogBytesPerInt);
++  lwu(tmp4, Address(t0, 0));
 +
-+  const char* b = NULL;
-+  {
-+    ResourceMark rm;
-+    stringStream ss;
-+    ss.print("verify_oop_addr: %s", s);
-+    b = code_string(ss.as_string());
-+  }
-+  BLOCK_COMMENT("verify_oop_addr {");
++  add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0);
 +
-+  push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
++  shadd(t0, idx, z, t0, LogBytesPerInt);
++  sw(tmp3, Address(t0, 0));
 +
-+  if (addr.uses(sp)) {
-+    la(x10, addr);
-+    ld(x10, Address(x10, 4 * wordSize));
-+  } else {
-+    ld(x10, addr);
-+  }
++  slli(t0, carry2, 32);
++  srli(carry, tmp3, 32);
++  orr(carry, carry, t0);
 +
-+  li(t0, (uintptr_t)(address)b);
++  bind(L_post_third_loop_done);
++}
 +
-+  // call indirectly to solve generation ordering problem
-+  int32_t offset = 0;
-+  la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset);
-+  ld(t1, Address(t1, offset));
-+  jalr(t1);
++/**
++ * Code for BigInteger::multiplyToLen() intrinsic.
++ *
++ * x10: x
++ * x11: xlen
++ * x12: y
++ * x13: ylen
++ * x14: z
++ * x15: zlen
++ * x16: tmp1
++ * x17: tmp2
++ * x7:  tmp3
++ * x28: tmp4
++ * x29: tmp5
++ * x30: tmp6
++ * x31: tmp7
++ */
++void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen,
++                                     Register z, Register zlen,
++                                     Register tmp1, Register tmp2, Register tmp3, Register tmp4,
++                                     Register tmp5, Register tmp6, Register product_hi)
++{
++  assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
 +
-+  pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
++  const Register idx = tmp1;
++  const Register kdx = tmp2;
++  const Register xstart = tmp3;
 +
-+  BLOCK_COMMENT("} verify_oop_addr");
-+}
++  const Register y_idx = tmp4;
++  const Register carry = tmp5;
++  const Register product = xlen;
++  const Register x_xstart = zlen; // reuse register
 +
-+Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
-+                                         int extra_slot_offset) {
-+  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
-+  int stackElementSize = Interpreter::stackElementSize;
-+  int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
-+#ifdef ASSERT
-+  int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
-+  assert(offset1 - offset == stackElementSize, "correct arithmetic");
-+#endif
-+  if (arg_slot.is_constant()) {
-+    return Address(esp, arg_slot.as_constant() * stackElementSize + offset);
-+  } else {
-+    assert_different_registers(t0, arg_slot.as_register());
-+    shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize));
-+    return Address(t0, offset);
-+  }
-+}
++  mv(idx, ylen); // idx = ylen;
++  mv(kdx, zlen); // kdx = xlen+ylen;
++  mv(carry, zr); // carry = 0;
 +
-+#ifndef PRODUCT
-+extern "C" void findpc(intptr_t x);
-+#endif
++  Label L_multiply_64_x_64_loop, L_done;
 +
-+void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[])
-+{
-+  // In order to get locks to work, we need to fake a in_VM state
-+  if (ShowMessageBoxOnError) {
-+    JavaThread* thread = JavaThread::current();
-+    JavaThreadState saved_state = thread->thread_state();
-+    thread->set_thread_state(_thread_in_vm);
-+#ifndef PRODUCT
-+    if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
-+      ttyLocker ttyl;
-+      BytecodeCounter::print();
-+    }
-+#endif
-+    if (os::message_box(msg, "Execution stopped, print registers?")) {
-+      ttyLocker ttyl;
-+      tty->print_cr(" pc = 0x%016lx", pc);
-+#ifndef PRODUCT
-+      tty->cr();
-+      findpc(pc);
-+      tty->cr();
-+#endif
-+      tty->print_cr(" x0 = 0x%016lx", regs[0]);
-+      tty->print_cr(" x1 = 0x%016lx", regs[1]);
-+      tty->print_cr(" x2 = 0x%016lx", regs[2]);
-+      tty->print_cr(" x3 = 0x%016lx", regs[3]);
-+      tty->print_cr(" x4 = 0x%016lx", regs[4]);
-+      tty->print_cr(" x5 = 0x%016lx", regs[5]);
-+      tty->print_cr(" x6 = 0x%016lx", regs[6]);
-+      tty->print_cr(" x7 = 0x%016lx", regs[7]);
-+      tty->print_cr(" x8 = 0x%016lx", regs[8]);
-+      tty->print_cr(" x9 = 0x%016lx", regs[9]);
-+      tty->print_cr("x10 = 0x%016lx", regs[10]);
-+      tty->print_cr("x11 = 0x%016lx", regs[11]);
-+      tty->print_cr("x12 = 0x%016lx", regs[12]);
-+      tty->print_cr("x13 = 0x%016lx", regs[13]);
-+      tty->print_cr("x14 = 0x%016lx", regs[14]);
-+      tty->print_cr("x15 = 0x%016lx", regs[15]);
-+      tty->print_cr("x16 = 0x%016lx", regs[16]);
-+      tty->print_cr("x17 = 0x%016lx", regs[17]);
-+      tty->print_cr("x18 = 0x%016lx", regs[18]);
-+      tty->print_cr("x19 = 0x%016lx", regs[19]);
-+      tty->print_cr("x20 = 0x%016lx", regs[20]);
-+      tty->print_cr("x21 = 0x%016lx", regs[21]);
-+      tty->print_cr("x22 = 0x%016lx", regs[22]);
-+      tty->print_cr("x23 = 0x%016lx", regs[23]);
-+      tty->print_cr("x24 = 0x%016lx", regs[24]);
-+      tty->print_cr("x25 = 0x%016lx", regs[25]);
-+      tty->print_cr("x26 = 0x%016lx", regs[26]);
-+      tty->print_cr("x27 = 0x%016lx", regs[27]);
-+      tty->print_cr("x28 = 0x%016lx", regs[28]);
-+      tty->print_cr("x30 = 0x%016lx", regs[30]);
-+      tty->print_cr("x31 = 0x%016lx", regs[31]);
-+      BREAKPOINT;
-+    }
-+  }
-+  fatal("DEBUG MESSAGE: %s", msg);
-+}
++  subw(xstart, xlen, 1);
++  bltz(xstart, L_done);
 +
-+void MacroAssembler::resolve_jobject(Register value, Register thread, Register tmp) {
-+  Label done, not_weak;
-+  beqz(value, done);           // Use NULL as-is.
++  const Register jdx = tmp1;
 +
-+  // Test for jweak tag.
-+  andi(t0, value, JNIHandles::weak_tag_mask);
-+  beqz(t0, not_weak);
++  if (AvoidUnalignedAccesses) {
++    // Check if x and y are both 8-byte aligned.
++    orr(t0, xlen, ylen);
++    andi(t0, t0, 0x1);
++    beqz(t0, L_multiply_64_x_64_loop);
 +
-+  // Resolve jweak.
-+  access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value,
-+                 Address(value, -JNIHandles::weak_tag_value), tmp, thread);
-+  verify_oop(value);
-+  j(done);
++    multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
++    shadd(t0, xstart, z, t0, LogBytesPerInt);
++    sw(carry, Address(t0, 0));
 +
-+  bind(not_weak);
-+  // Resolve (untagged) jobject.
-+  access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread);
-+  verify_oop(value);
-+  bind(done);
-+}
++    Label L_second_loop_unaligned;
++    bind(L_second_loop_unaligned);
++    mv(carry, zr);
++    mv(jdx, ylen);
++    subw(xstart, xstart, 1);
++    bltz(xstart, L_done);
++    sub(sp, sp, 2 * wordSize);
++    sd(z, Address(sp, 0));
++    sd(zr, Address(sp, wordSize));
++    shadd(t0, xstart, z, t0, LogBytesPerInt);
++    addi(z, t0, 4);
++    shadd(t0, xstart, x, t0, LogBytesPerInt);
++    lwu(product, Address(t0, 0));
++    Label L_third_loop, L_third_loop_exit;
 +
-+void MacroAssembler::stop(const char* msg) {
-+  address ip = pc();
-+  pusha();
-+  li(c_rarg0, (uintptr_t)(address)msg);
-+  li(c_rarg1, (uintptr_t)(address)ip);
-+  mv(c_rarg2, sp);
-+  mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64));
-+  jalr(c_rarg3);
-+  ebreak();
-+}
++    blez(jdx, L_third_loop_exit);
 +
-+void MacroAssembler::unimplemented(const char* what) {
-+  const char* buf = NULL;
-+  {
-+    ResourceMark rm;
-+    stringStream ss;
-+    ss.print("unimplemented: %s", what);
-+    buf = code_string(ss.as_string());
++    bind(L_third_loop);
++    subw(jdx, jdx, 1);
++    shadd(t0, jdx, y, t0, LogBytesPerInt);
++    lwu(t0, Address(t0, 0));
++    mul(t1, t0, product);
++    add(t0, t1, carry);
++    shadd(tmp6, jdx, z, t1, LogBytesPerInt);
++    lwu(t1, Address(tmp6, 0));
++    add(t0, t0, t1);
++    sw(t0, Address(tmp6, 0));
++    srli(carry, t0, 32);
++    bgtz(jdx, L_third_loop);
++
++    bind(L_third_loop_exit);
++    ld(z, Address(sp, 0));
++    addi(sp, sp, 2 * wordSize);
++    shadd(t0, xstart, z, t0, LogBytesPerInt);
++    sw(carry, Address(t0, 0));
++
++    j(L_second_loop_unaligned);
 +  }
-+  stop(buf);
-+}
 +
-+void MacroAssembler::emit_static_call_stub() {
-+  // CompiledDirectStaticCall::set_to_interpreted knows the
-+  // exact layout of this stub.
++  bind(L_multiply_64_x_64_loop);
++  multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
 +
-+  ifence();
-+  mov_metadata(xmethod, (Metadata*)NULL);
++  Label L_second_loop_aligned;
++  beqz(kdx, L_second_loop_aligned);
 +
-+  // Jump to the entry point of the i2c stub.
-+  int32_t offset = 0;
-+  movptr_with_offset(t0, 0, offset);
-+  jalr(x0, t0, offset);
-+}
++  Label L_carry;
++  subw(kdx, kdx, 1);
++  beqz(kdx, L_carry);
 +
-+void MacroAssembler::call_VM_leaf_base(address entry_point,
-+                                       int number_of_arguments,
-+                                       Label *retaddr) {
-+  call_native_base(entry_point, retaddr);
-+}
++  shadd(t0, kdx, z, t0, LogBytesPerInt);
++  sw(carry, Address(t0, 0));
++  srli(carry, carry, 32);
++  subw(kdx, kdx, 1);
 +
-+void MacroAssembler::call_native(address entry_point, Register arg_0) {
-+  pass_arg0(this, arg_0);
-+  call_native_base(entry_point);
-+}
++  bind(L_carry);
++  shadd(t0, kdx, z, t0, LogBytesPerInt);
++  sw(carry, Address(t0, 0));
 +
-+void MacroAssembler::call_native_base(address entry_point, Label *retaddr) {
-+  Label E, L;
-+  int32_t offset = 0;
-+  push_reg(0x80000040, sp);   // push << t0 & xmethod >> to sp
-+  movptr_with_offset(t0, entry_point, offset);
-+  jalr(x1, t0, offset);
-+  if (retaddr != NULL) {
-+    bind(*retaddr);
-+  }
-+  pop_reg(0x80000040, sp);   // pop << t0 & xmethod >> from sp
-+}
++  // Second and third (nested) loops.
++  //
++  // for (int i = xstart-1; i >= 0; i--) { // Second loop
++  //   carry = 0;
++  //   for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
++  //     long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
++  //                    (z[k] & LONG_MASK) + carry;
++  //     z[k] = (int)product;
++  //     carry = product >>> 32;
++  //   }
++  //   z[i] = (int)carry;
++  // }
++  //
++  // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi
 +
-+void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
-+  call_VM_leaf_base(entry_point, number_of_arguments);
-+}
++  bind(L_second_loop_aligned);
++  mv(carry, zr); // carry = 0;
++  mv(jdx, ylen); // j = ystart+1
 +
-+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
-+  pass_arg0(this, arg_0);
-+  call_VM_leaf_base(entry_point, 1);
-+}
++  subw(xstart, xstart, 1); // i = xstart-1;
++  bltz(xstart, L_done);
 +
-+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
-+  pass_arg0(this, arg_0);
-+  pass_arg1(this, arg_1);
-+  call_VM_leaf_base(entry_point, 2);
-+}
++  sub(sp, sp, 4 * wordSize);
++  sd(z, Address(sp, 0));
 +
-+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0,
-+                                  Register arg_1, Register arg_2) {
-+  pass_arg0(this, arg_0);
-+  pass_arg1(this, arg_1);
-+  pass_arg2(this, arg_2);
-+  call_VM_leaf_base(entry_point, 3);
-+}
++  Label L_last_x;
++  shadd(t0, xstart, z, t0, LogBytesPerInt);
++  addi(z, t0, 4);
++  subw(xstart, xstart, 1); // i = xstart-1;
++  bltz(xstart, L_last_x);
 +
-+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
-+  pass_arg0(this, arg_0);
-+  MacroAssembler::call_VM_leaf_base(entry_point, 1);
-+}
++  shadd(t0, xstart, x, t0, LogBytesPerInt);
++  ld(product_hi, Address(t0, 0));
++  ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian
 +
-+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
++  Label L_third_loop_prologue;
++  bind(L_third_loop_prologue);
 +
-+  assert(arg_0 != c_rarg1, "smashed arg");
-+  pass_arg1(this, arg_1);
-+  pass_arg0(this, arg_0);
-+  MacroAssembler::call_VM_leaf_base(entry_point, 2);
-+}
++  sd(ylen, Address(sp, wordSize));
++  sd(x, Address(sp, 2 * wordSize));
++  sd(xstart, Address(sp, 3 * wordSize));
++  multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product,
++                          tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi);
++  ld(z, Address(sp, 0));
++  ld(ylen, Address(sp, wordSize));
++  ld(x, Address(sp, 2 * wordSize));
++  ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen
++  addi(sp, sp, 4 * wordSize);
 +
-+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
-+  assert(arg_0 != c_rarg2, "smashed arg");
-+  assert(arg_1 != c_rarg2, "smashed arg");
-+  pass_arg2(this, arg_2);
-+  assert(arg_0 != c_rarg1, "smashed arg");
-+  pass_arg1(this, arg_1);
-+  pass_arg0(this, arg_0);
-+  MacroAssembler::call_VM_leaf_base(entry_point, 3);
-+}
++  addiw(tmp3, xlen, 1);
++  shadd(t0, tmp3, z, t0, LogBytesPerInt);
++  sw(carry, Address(t0, 0));
 +
-+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
-+  assert(arg_0 != c_rarg3, "smashed arg");
-+  assert(arg_1 != c_rarg3, "smashed arg");
-+  assert(arg_2 != c_rarg3, "smashed arg");
-+  pass_arg3(this, arg_3);
-+  assert(arg_0 != c_rarg2, "smashed arg");
-+  assert(arg_1 != c_rarg2, "smashed arg");
-+  pass_arg2(this, arg_2);
-+  assert(arg_0 != c_rarg1, "smashed arg");
-+  pass_arg1(this, arg_1);
-+  pass_arg0(this, arg_0);
-+  MacroAssembler::call_VM_leaf_base(entry_point, 4);
-+}
-+
-+void MacroAssembler::nop() {
-+  addi(x0, x0, 0);
-+}
-+
-+void MacroAssembler::mv(Register Rd, Register Rs) {
-+  if (Rd != Rs) {
-+    addi(Rd, Rs, 0);
-+  }
-+}
-+
-+void MacroAssembler::notr(Register Rd, Register Rs) {
-+  xori(Rd, Rs, -1);
-+}
-+
-+void MacroAssembler::neg(Register Rd, Register Rs) {
-+  sub(Rd, x0, Rs);
-+}
-+
-+void MacroAssembler::negw(Register Rd, Register Rs) {
-+  subw(Rd, x0, Rs);
-+}
-+
-+void MacroAssembler::sext_w(Register Rd, Register Rs) {
-+  addiw(Rd, Rs, 0);
-+}
-+
-+void MacroAssembler::zext_b(Register Rd, Register Rs) {
-+  andi(Rd, Rs, 0xFF);
-+}
-+
-+void MacroAssembler::seqz(Register Rd, Register Rs) {
-+  sltiu(Rd, Rs, 1);
-+}
++  subw(tmp3, tmp3, 1);
++  bltz(tmp3, L_done);
 +
-+void MacroAssembler::snez(Register Rd, Register Rs) {
-+  sltu(Rd, x0, Rs);
-+}
++  srli(carry, carry, 32);
++  shadd(t0, tmp3, z, t0, LogBytesPerInt);
++  sw(carry, Address(t0, 0));
++  j(L_second_loop_aligned);
 +
-+void MacroAssembler::sltz(Register Rd, Register Rs) {
-+  slt(Rd, Rs, x0);
-+}
++  // Next infrequent code is moved outside loops.
++  bind(L_last_x);
++  lwu(product_hi, Address(x, 0));
++  j(L_third_loop_prologue);
 +
-+void MacroAssembler::sgtz(Register Rd, Register Rs) {
-+  slt(Rd, x0, Rs);
++  bind(L_done);
 +}
++#endif
 +
-+void MacroAssembler::fmv_s(FloatRegister Rd, FloatRegister Rs) {
-+  if (Rd != Rs) {
-+    fsgnj_s(Rd, Rs, Rs);
++// Count bits of trailing zero chars from lsb to msb until first non-zero element.
++// For LL case, one byte for one element, so shift 8 bits once, and for other case,
++// shift 16 bits once.
++void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2)
++{
++  if (UseZbb) {
++    assert_different_registers(Rd, Rs, tmp1);
++    int step = isLL ? 8 : 16;
++    ctz(Rd, Rs);
++    andi(tmp1, Rd, step - 1);
++    sub(Rd, Rd, tmp1);
++    return;
 +  }
-+}
-+
-+void MacroAssembler::fabs_s(FloatRegister Rd, FloatRegister Rs) {
-+  fsgnjx_s(Rd, Rs, Rs);
-+}
++  assert_different_registers(Rd, Rs, tmp1, tmp2);
++  Label Loop;
++  int step = isLL ? 8 : 16;
++  li(Rd, -step);
++  mv(tmp2, Rs);
 +
-+void MacroAssembler::fneg_s(FloatRegister Rd, FloatRegister Rs) {
-+  fsgnjn_s(Rd, Rs, Rs);
++  bind(Loop);
++  addi(Rd, Rd, step);
++  andi(tmp1, tmp2, ((1 << step) - 1));
++  srli(tmp2, tmp2, step);
++  beqz(tmp1, Loop);
 +}
 +
-+void MacroAssembler::fmv_d(FloatRegister Rd, FloatRegister Rs) {
-+  if (Rd != Rs) {
-+    fsgnj_d(Rd, Rs, Rs);
++// This instruction reads adjacent 4 bytes from the lower half of source register,
++// inflate into a register, for example:
++// Rs: A7A6A5A4A3A2A1A0
++// Rd: 00A300A200A100A0
++void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2)
++{
++  assert_different_registers(Rd, Rs, tmp1, tmp2);
++  li(tmp1, 0xFF);
++  mv(Rd, zr);
++  for (int i = 0; i <= 3; i++)
++  {
++    andr(tmp2, Rs, tmp1);
++    if (i) {
++      slli(tmp2, tmp2, i * 8);
++    }
++    orr(Rd, Rd, tmp2);
++    if (i != 3) {
++      slli(tmp1, tmp1, 8);
++    }
 +  }
 +}
 +
-+void MacroAssembler::fabs_d(FloatRegister Rd, FloatRegister Rs) {
-+  fsgnjx_d(Rd, Rs, Rs);
-+}
-+
-+void MacroAssembler::fneg_d(FloatRegister Rd, FloatRegister Rs) {
-+  fsgnjn_d(Rd, Rs, Rs);
-+}
-+
-+void MacroAssembler::vmnot_m(VectorRegister vd, VectorRegister vs) {
-+  vmnand_mm(vd, vs, vs);
-+}
-+
-+void MacroAssembler::vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm) {
-+  vnsrl_wx(vd, vs, x0, vm);
++// This instruction reads adjacent 4 bytes from the upper half of source register,
++// inflate into a register, for example:
++// Rs: A7A6A5A4A3A2A1A0
++// Rd: 00A700A600A500A4
++void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2)
++{
++  assert_different_registers(Rd, Rs, tmp1, tmp2);
++  li(tmp1, 0xFF00000000);
++  mv(Rd, zr);
++  for (int i = 0; i <= 3; i++)
++  {
++    andr(tmp2, Rs, tmp1);
++    orr(Rd, Rd, tmp2);
++    srli(Rd, Rd, 8);
++    if (i != 3) {
++      slli(tmp1, tmp1, 8);
++    }
++  }
 +}
 +
-+void MacroAssembler::vfneg_v(VectorRegister vd, VectorRegister vs) {
-+  vfsgnjn_vv(vd, vs, vs);
-+}
++// The size of the blocks erased by the zero_blocks stub.  We must
++// handle anything smaller than this ourselves in zero_words().
++const int MacroAssembler::zero_words_block_size = 8;
 +
-+void MacroAssembler::la(Register Rd, const address &dest) {
-+  int64_t offset = dest - pc();
-+  if (is_offset_in_range(offset, 32)) {
-+    auipc(Rd, (int32_t)offset + 0x800);  //0x800, Note:the 11th sign bit
-+    addi(Rd, Rd, ((int64_t)offset << 52) >> 52);
-+  } else {
-+    movptr(Rd, dest);
-+  }
-+}
++// zero_words() is used by C2 ClearArray patterns.  It is as small as
++// possible, handling small word counts locally and delegating
++// anything larger to the zero_blocks stub.  It is expanded many times
++// in compiled code, so it is important to keep it short.
 +
-+void MacroAssembler::la(Register Rd, const Address &adr) {
-+  InstructionMark im(this);
-+  code_section()->relocate(inst_mark(), adr.rspec());
-+  relocInfo::relocType rtype = adr.rspec().reloc()->type();
++// ptr:   Address of a buffer to be zeroed.
++// cnt:   Count in HeapWords.
++//
++// ptr, cnt, and t0 are clobbered.
++address MacroAssembler::zero_words(Register ptr, Register cnt)
++{
++  assert(is_power_of_2(zero_words_block_size), "adjust this");
++  assert(ptr == x28 && cnt == x29, "mismatch in register usage");
++  assert_different_registers(cnt, t0);
 +
-+  switch (adr.getMode()) {
-+    case Address::literal: {
-+      if (rtype == relocInfo::none) {
-+        li(Rd, (intptr_t)(adr.target()));
-+      } else {
-+        movptr(Rd, adr.target());
++  BLOCK_COMMENT("zero_words {");
++  mv(t0, zero_words_block_size);
++  Label around, done, done16;
++  bltu(cnt, t0, around);
++  {
++    RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks());
++    assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated");
++    if (StubRoutines::riscv::complete()) {
++      address tpc = trampoline_call(zero_blocks);
++      if (tpc == NULL) {
++        DEBUG_ONLY(reset_labels1(around));
++        postcond(pc() == badAddress);
++        return NULL;
 +      }
-+      break;
++    } else {
++      jal(zero_blocks);
 +    }
-+    case Address::base_plus_offset: {
-+      int32_t offset = 0;
-+      baseOffset(Rd, adr, offset);
-+      addi(Rd, Rd, offset);
-+      break;
++  }
++  bind(around);
++  for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) {
++    Label l;
++    andi(t0, cnt, i);
++    beqz(t0, l);
++    for (int j = 0; j < i; j++) {
++      sd(zr, Address(ptr, 0));
++      addi(ptr, ptr, 8);
 +    }
-+    default:
-+      ShouldNotReachHere();
++    bind(l);
 +  }
++  {
++    Label l;
++    andi(t0, cnt, 1);
++    beqz(t0, l);
++    sd(zr, Address(ptr, 0));
++    bind(l);
++  }
++  BLOCK_COMMENT("} zero_words");
++  postcond(pc() != badAddress);
++  return pc();
 +}
 +
-+void MacroAssembler::la(Register Rd, Label &label) {
-+  la(Rd, target(label));
-+}
-+
-+#define INSN(NAME)                                                                \
-+  void MacroAssembler::NAME##z(Register Rs, const address &dest) {                \
-+    NAME(Rs, zr, dest);                                                           \
-+  }                                                                               \
-+  void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) {              \
-+    NAME(Rs, zr, l, is_far);                                                      \
-+  }                                                                               \
++#define SmallArraySize (18 * BytesPerLong)
 +
-+  INSN(beq);
-+  INSN(bne);
-+  INSN(blt);
-+  INSN(ble);
-+  INSN(bge);
-+  INSN(bgt);
++// base:  Address of a buffer to be zeroed, 8 bytes aligned.
++// cnt:   Immediate count in HeapWords.
++void MacroAssembler::zero_words(Register base, u_int64_t cnt)
++{
++  assert_different_registers(base, t0, t1);
 +
-+#undef INSN
++  BLOCK_COMMENT("zero_words {");
 +
-+// Float compare branch instructions
++  if (cnt <= SmallArraySize / BytesPerLong) {
++    for (int i = 0; i < (int)cnt; i++) {
++      sd(zr, Address(base, i * wordSize));
++    }
++  } else {
++    const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll
++    int remainder = cnt % unroll;
++    for (int i = 0; i < remainder; i++) {
++      sd(zr, Address(base, i * wordSize));
++    }
 +
-+#define INSN(NAME, FLOATCMP, BRANCH)                                                                                   \
-+  void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) {  \
-+    FLOATCMP##_s(t0, Rs1, Rs2);                                                                                        \
-+    BRANCH(t0, l, is_far);                                                                                             \
-+  }                                                                                                                    \
-+  void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \
-+    FLOATCMP##_d(t0, Rs1, Rs2);                                                                                        \
-+    BRANCH(t0, l, is_far);                                                                                             \
++    Label loop;
++    Register cnt_reg = t0;
++    Register loop_base = t1;
++    cnt = cnt - remainder;
++    li(cnt_reg, cnt);
++    add(loop_base, base, remainder * wordSize);
++    bind(loop);
++    sub(cnt_reg, cnt_reg, unroll);
++    for (int i = 0; i < unroll; i++) {
++      sd(zr, Address(loop_base, i * wordSize));
++    }
++    add(loop_base, loop_base, unroll * wordSize);
++    bnez(cnt_reg, loop);
 +  }
 +
-+  INSN(beq, feq, bnez);
-+  INSN(bne, feq, beqz);
++  BLOCK_COMMENT("} zero_words");
++}
 +
-+#undef INSN
++// base:   Address of a buffer to be filled, 8 bytes aligned.
++// cnt:    Count in 8-byte unit.
++// value:  Value to be filled with.
++// base will point to the end of the buffer after filling.
++void MacroAssembler::fill_words(Register base, Register cnt, Register value)
++{
++//  Algorithm:
++//
++//    t0 = cnt & 7
++//    cnt -= t0
++//    p += t0
++//    switch (t0):
++//      switch start:
++//      do while cnt
++//        cnt -= 8
++//          p[-8] = value
++//        case 7:
++//          p[-7] = value
++//        case 6:
++//          p[-6] = value
++//          // ...
++//        case 1:
++//          p[-1] = value
++//        case 0:
++//          p += 8
++//      do-while end
++//    switch end
 +
++  assert_different_registers(base, cnt, value, t0, t1);
 +
-+#define INSN(NAME, FLOATCMP1, FLOATCMP2)                                              \
-+  void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,   \
-+                                    bool is_far, bool is_unordered) {                 \
-+    if (is_unordered) {                                                               \
-+      /* jump if either source is NaN or condition is expected */                     \
-+      FLOATCMP2##_s(t0, Rs2, Rs1);                                                    \
-+      beqz(t0, l, is_far);                                                            \
-+    } else {                                                                          \
-+      /* jump if no NaN in source and condition is expected */                        \
-+      FLOATCMP1##_s(t0, Rs1, Rs2);                                                    \
-+      bnez(t0, l, is_far);                                                            \
-+    }                                                                                 \
-+  }                                                                                   \
-+  void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,  \
-+                                     bool is_far, bool is_unordered) {                \
-+    if (is_unordered) {                                                               \
-+      /* jump if either source is NaN or condition is expected */                     \
-+      FLOATCMP2##_d(t0, Rs2, Rs1);                                                    \
-+      beqz(t0, l, is_far);                                                            \
-+    } else {                                                                          \
-+      /* jump if no NaN in source and condition is expected */                        \
-+      FLOATCMP1##_d(t0, Rs1, Rs2);                                                    \
-+      bnez(t0, l, is_far);                                                            \
-+    }                                                                                 \
-+  }
++  Label fini, skip, entry, loop;
++  const int unroll = 8; // Number of sd instructions we'll unroll
 +
-+  INSN(ble, fle, flt);
-+  INSN(blt, flt, fle);
++  beqz(cnt, fini);
 +
-+#undef INSN
++  andi(t0, cnt, unroll - 1);
++  sub(cnt, cnt, t0);
++  // align 8, so first sd n % 8 = mod, next loop sd 8 * n.
++  shadd(base, t0, base, t1, 3);
++  la(t1, entry);
++  slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst)
++  sub(t1, t1, t0);
++  jr(t1);
 +
-+#define INSN(NAME, CMP)                                                              \
-+  void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,  \
-+                                    bool is_far, bool is_unordered) {                \
-+    float_##CMP(Rs2, Rs1, l, is_far, is_unordered);                                  \
-+  }                                                                                  \
-+  void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \
-+                                     bool is_far, bool is_unordered) {               \
-+    double_##CMP(Rs2, Rs1, l, is_far, is_unordered);                                 \
++  bind(loop);
++  add(base, base, unroll * 8);
++  for (int i = -unroll; i < 0; i++) {
++    sd(value, Address(base, i * 8));
 +  }
++  bind(entry);
++  sub(cnt, cnt, unroll);
++  bgez(cnt, loop);
 +
-+  INSN(bgt, blt);
-+  INSN(bge, ble);
-+
-+#undef INSN
-+
++  bind(fini);
++}
 +
-+#define INSN(NAME, CSR)                       \
-+  void MacroAssembler::NAME(Register Rd) {    \
-+    csrr(Rd, CSR);                            \
-+  }
++#define FCVT_SAFE(FLOATCVT, FLOATEQ)                                                             \
++void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) {           \
++  Label L_Okay;                                                                                  \
++  fscsr(zr);                                                                                     \
++  FLOATCVT(dst, src);                                                                            \
++  frcsr(tmp);                                                                                    \
++  andi(tmp, tmp, 0x1E);                                                                          \
++  beqz(tmp, L_Okay);                                                                             \
++  FLOATEQ(tmp, src, src);                                                                        \
++  bnez(tmp, L_Okay);                                                                             \
++  mv(dst, zr);                                                                                   \
++  bind(L_Okay);                                                                                  \
++}
 +
-+  INSN(rdinstret,  CSR_INSTERT);
-+  INSN(rdcycle,    CSR_CYCLE);
-+  INSN(rdtime,     CSR_TIME);
-+  INSN(frcsr,      CSR_FCSR);
-+  INSN(frrm,       CSR_FRM);
-+  INSN(frflags,    CSR_FFLAGS);
++FCVT_SAFE(fcvt_w_s, feq_s)
++FCVT_SAFE(fcvt_l_s, feq_s)
++FCVT_SAFE(fcvt_w_d, feq_d)
++FCVT_SAFE(fcvt_l_d, feq_d)
 +
-+#undef INSN
++#undef FCVT_SAFE
 +
-+void MacroAssembler::csrr(Register Rd, unsigned csr) {
-+  csrrs(Rd, csr, x0);
++#define FCMP(FLOATTYPE, FLOATSIG)                                                       \
++void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1,            \
++                                         FloatRegister Rs2, int unordered_result) {     \
++  Label Ldone;                                                                          \
++  if (unordered_result < 0) {                                                           \
++    /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */    \
++    /* installs 1 if gt else 0 */                                                       \
++    flt_##FLOATSIG(result, Rs2, Rs1);                                                   \
++    /* Rs1 > Rs2, install 1 */                                                          \
++    bgtz(result, Ldone);                                                                \
++    feq_##FLOATSIG(result, Rs1, Rs2);                                                   \
++    addi(result, result, -1);                                                           \
++    /* Rs1 = Rs2, install 0 */                                                          \
++    /* NaN or Rs1 < Rs2, install -1 */                                                  \
++    bind(Ldone);                                                                        \
++  } else {                                                                              \
++    /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */    \
++    /* installs 1 if gt or unordered else 0 */                                          \
++    flt_##FLOATSIG(result, Rs1, Rs2);                                                   \
++    /* Rs1 < Rs2, install -1 */                                                         \
++    bgtz(result, Ldone);                                                                \
++    feq_##FLOATSIG(result, Rs1, Rs2);                                                   \
++    addi(result, result, -1);                                                           \
++    /* Rs1 = Rs2, install 0 */                                                          \
++    /* NaN or Rs1 > Rs2, install 1 */                                                   \
++    bind(Ldone);                                                                        \
++    neg(result, result);                                                                \
++  }                                                                                     \
 +}
 +
-+#define INSN(NAME, OPFUN)                                      \
-+  void MacroAssembler::NAME(unsigned csr, Register Rs) {       \
-+    OPFUN(x0, csr, Rs);                                        \
-+  }
++FCMP(float, s);
++FCMP(double, d);
 +
-+  INSN(csrw, csrrw);
-+  INSN(csrs, csrrs);
-+  INSN(csrc, csrrc);
++#undef FCMP
 +
-+#undef INSN
++// Zero words; len is in bytes
++// Destroys all registers except addr
++// len must be a nonzero multiple of wordSize
++void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) {
++  assert_different_registers(addr, len, tmp, t0, t1);
 +
-+#define INSN(NAME, OPFUN)                                      \
-+  void MacroAssembler::NAME(unsigned csr, unsigned imm) {      \
-+    OPFUN(x0, csr, imm);                                       \
++#ifdef ASSERT
++  {
++    Label L;
++    andi(t0, len, BytesPerWord - 1);
++    beqz(t0, L);
++    stop("len is not a multiple of BytesPerWord");
++    bind(L);
 +  }
++#endif // ASSERT
 +
-+  INSN(csrwi, csrrwi);
-+  INSN(csrsi, csrrsi);
-+  INSN(csrci, csrrci);
-+
-+#undef INSN
-+
-+#define INSN(NAME, CSR)                                      \
-+  void MacroAssembler::NAME(Register Rd, Register Rs) {      \
-+    csrrw(Rd, CSR, Rs);                                      \
-+  }
++#ifndef PRODUCT
++  block_comment("zero memory");
++#endif // PRODUCT
 +
-+  INSN(fscsr,   CSR_FCSR);
-+  INSN(fsrm,    CSR_FRM);
-+  INSN(fsflags, CSR_FFLAGS);
++  Label loop;
++  Label entry;
 +
-+#undef INSN
++  // Algorithm:
++  //
++  //  t0 = cnt & 7
++  //  cnt -= t0
++  //  p += t0
++  //  switch (t0) {
++  //    do {
++  //      cnt -= 8
++  //        p[-8] = 0
++  //      case 7:
++  //        p[-7] = 0
++  //      case 6:
++  //        p[-6] = 0
++  //        ...
++  //      case 1:
++  //        p[-1] = 0
++  //      case 0:
++  //        p += 8
++  //     } while (cnt)
++  //  }
 +
-+#define INSN(NAME)                              \
-+  void MacroAssembler::NAME(Register Rs) {      \
-+    NAME(x0, Rs);                               \
-+  }
++  const int unroll = 8;   // Number of sd(zr) instructions we'll unroll
 +
-+  INSN(fscsr);
-+  INSN(fsrm);
-+  INSN(fsflags);
-+
-+#undef INSN
-+
-+void MacroAssembler::fsrmi(Register Rd, unsigned imm) {
-+  guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register");
-+  csrrwi(Rd, CSR_FRM, imm);
-+}
-+
-+void MacroAssembler::fsflagsi(Register Rd, unsigned imm) {
-+   csrrwi(Rd, CSR_FFLAGS, imm);
++  srli(len, len, LogBytesPerWord);
++  andi(t0, len, unroll - 1);  // t0 = cnt % unroll
++  sub(len, len, t0);          // cnt -= unroll
++  // tmp always points to the end of the region we're about to zero
++  shadd(tmp, t0, addr, t1, LogBytesPerWord);
++  la(t1, entry);
++  slli(t0, t0, 2);
++  sub(t1, t1, t0);
++  jr(t1);
++  bind(loop);
++  sub(len, len, unroll);
++  for (int i = -unroll; i < 0; i++) {
++    Assembler::sd(zr, Address(tmp, i * wordSize));
++  }
++  bind(entry);
++  add(tmp, tmp, unroll * wordSize);
++  bnez(len, loop);
 +}
 +
-+#define INSN(NAME)                             \
-+  void MacroAssembler::NAME(unsigned imm) {    \
-+    NAME(x0, imm);                             \
++// shift left by shamt and add
++// Rd = (Rs1 << shamt) + Rs2
++void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) {
++  if (UseZba) {
++    if (shamt == 1) {
++      sh1add(Rd, Rs1, Rs2);
++      return;
++    } else if (shamt == 2) {
++      sh2add(Rd, Rs1, Rs2);
++      return;
++    } else if (shamt == 3) {
++      sh3add(Rd, Rs1, Rs2);
++      return;
++    }
 +  }
 +
-+  INSN(fsrmi);
-+  INSN(fsflagsi);
++  if (shamt != 0) {
++    slli(tmp, Rs1, shamt);
++    add(Rd, Rs2, tmp);
++  } else {
++    add(Rd, Rs1, Rs2);
++  }
++}
 +
-+#undef INSN
++void MacroAssembler::zero_extend(Register dst, Register src, int bits) {
++  if (UseZba && bits == 32) {
++    zext_w(dst, src);
++    return;
++  }
 +
-+void MacroAssembler::push_reg(Register Rs)
-+{
-+  addi(esp, esp, 0 - wordSize);
-+  sd(Rs, Address(esp, 0));
-+}
++  if (UseZbb && bits == 16) {
++    zext_h(dst, src);
++    return;
++  }
 +
-+void MacroAssembler::pop_reg(Register Rd)
-+{
-+  ld(Rd, esp, 0);
-+  addi(esp, esp, wordSize);
++  if (bits == 8) {
++    zext_b(dst, src);
++  } else {
++    slli(dst, src, XLEN - bits);
++    srli(dst, dst, XLEN - bits);
++  }
 +}
 +
-+int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) {
-+  int count = 0;
-+  // Scan bitset to accumulate register pairs
-+  for (int reg = 31; reg >= 0; reg--) {
-+    if ((1U << 31) & bitset) {
-+      regs[count++] = reg;
++void MacroAssembler::sign_extend(Register dst, Register src, int bits) {
++  if (UseZbb) {
++    if (bits == 8) {
++      sext_b(dst, src);
++      return;
++    } else if (bits == 16) {
++      sext_h(dst, src);
++      return;
 +    }
-+    bitset <<= 1;
 +  }
-+  return count;
-+}
-+
-+// Push lots of registers in the bit set supplied.  Don't push sp.
-+// Return the number of words pushed
-+int MacroAssembler::push_reg(unsigned int bitset, Register stack) {
-+  DEBUG_ONLY(int words_pushed = 0;)
-+  CompressibleRegion cr(this);
 +
-+  unsigned char regs[32];
-+  int count = bitset_to_regs(bitset, regs);
-+  // reserve one slot to align for odd count
-+  int offset = is_even(count) ? 0 : wordSize;
++  if (bits == 32) {
++    sext_w(dst, src);
++  } else {
++    slli(dst, src, XLEN - bits);
++    srai(dst, dst, XLEN - bits);
++  }
++}
 +
-+  if (count) {
-+    addi(stack, stack, - count * wordSize - offset);
++void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp)
++{
++  if (src1 == src2) {
++    mv(dst, zr);
++    return;
 +  }
-+  for (int i = count - 1; i >= 0; i--) {
-+    sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
-+    DEBUG_ONLY(words_pushed ++;)
++  Label done;
++  Register left = src1;
++  Register right = src2;
++  if (dst == src1) {
++    assert_different_registers(dst, src2, tmp);
++    mv(tmp, src1);
++    left = tmp;
++  } else if (dst == src2) {
++    assert_different_registers(dst, src1, tmp);
++    mv(tmp, src2);
++    right = tmp;
 +  }
 +
-+  assert(words_pushed == count, "oops, pushed != count");
-+
-+  return count;
++  // installs 1 if gt else 0
++  slt(dst, right, left);
++  bnez(dst, done);
++  slt(dst, left, right);
++  // dst = -1 if lt; else if eq , dst = 0
++  neg(dst, dst);
++  bind(done);
 +}
 +
-+int MacroAssembler::pop_reg(unsigned int bitset, Register stack) {
-+  DEBUG_ONLY(int words_popped = 0;)
-+  CompressibleRegion cr(this);
-+
-+  unsigned char regs[32];
-+  int count = bitset_to_regs(bitset, regs);
-+  // reserve one slot to align for odd count
-+  int offset = is_even(count) ? 0 : wordSize;
++#ifdef COMPILER2
++// short string
++// StringUTF16.indexOfChar
++// StringLatin1.indexOfChar
++void MacroAssembler::string_indexof_char_short(Register str1, Register cnt1,
++                                                  Register ch, Register result,
++                                                  bool isL)
++{
++  Register ch1 = t0;
++  Register index = t1;
 +
-+  for (int i = count - 1; i >= 0; i--) {
-+    ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
-+    DEBUG_ONLY(words_popped ++;)
-+  }
++  BLOCK_COMMENT("string_indexof_char_short {");
 +
-+  if (count) {
-+    addi(stack, stack, count * wordSize + offset);
-+  }
-+  assert(words_popped == count, "oops, popped != count");
++  Label LOOP, LOOP1, LOOP4, LOOP8;
++  Label MATCH,  MATCH1, MATCH2, MATCH3,
++          MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH;
 +
-+  return count;
-+}
++  mv(result, -1);
++  mv(index, zr);
 +
-+// Push float registers in the bitset, except sp.
-+// Return the number of heapwords pushed.
-+int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
-+  CompressibleRegion cr(this);
-+  int words_pushed = 0;
-+  unsigned char regs[32];
-+  int count = bitset_to_regs(bitset, regs);
-+  int push_slots = count + (count & 1);
++  bind(LOOP);
++  addi(t0, index, 8);
++  ble(t0, cnt1, LOOP8);
++  addi(t0, index, 4);
++  ble(t0, cnt1, LOOP4);
++  j(LOOP1);
 +
-+  if (count) {
-+    addi(stack, stack, -push_slots * wordSize);
-+  }
++  bind(LOOP8);
++  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
++  beq(ch, ch1, MATCH);
++  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
++  beq(ch, ch1, MATCH1);
++  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
++  beq(ch, ch1, MATCH2);
++  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
++  beq(ch, ch1, MATCH3);
++  isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8));
++  beq(ch, ch1, MATCH4);
++  isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10));
++  beq(ch, ch1, MATCH5);
++  isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12));
++  beq(ch, ch1, MATCH6);
++  isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14));
++  beq(ch, ch1, MATCH7);
++  addi(index, index, 8);
++  addi(str1, str1, isL ? 8 : 16);
++  blt(index, cnt1, LOOP);
++  j(NOMATCH);
 +
-+  for (int i = count - 1; i >= 0; i--) {
-+    fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize));
-+    words_pushed++;
-+  }
++  bind(LOOP4);
++  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
++  beq(ch, ch1, MATCH);
++  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
++  beq(ch, ch1, MATCH1);
++  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
++  beq(ch, ch1, MATCH2);
++  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
++  beq(ch, ch1, MATCH3);
++  addi(index, index, 4);
++  addi(str1, str1, isL ? 4 : 8);
++  bge(index, cnt1, NOMATCH);
 +
-+  assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count);
-+  return count;
-+}
++  bind(LOOP1);
++  isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1));
++  beq(ch, ch1, MATCH);
++  addi(index, index, 1);
++  addi(str1, str1, isL ? 1 : 2);
++  blt(index, cnt1, LOOP1);
++  j(NOMATCH);
 +
-+int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
-+  CompressibleRegion cr(this);
-+  int words_popped = 0;
-+  unsigned char regs[32];
-+  int count = bitset_to_regs(bitset, regs);
-+  int pop_slots = count + (count & 1);
++  bind(MATCH1);
++  addi(index, index, 1);
++  j(MATCH);
 +
-+  for (int i = count - 1; i >= 0; i--) {
-+    fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize));
-+    words_popped++;
-+  }
++  bind(MATCH2);
++  addi(index, index, 2);
++  j(MATCH);
 +
-+  if (count) {
-+    addi(stack, stack, pop_slots * wordSize);
-+  }
++  bind(MATCH3);
++  addi(index, index, 3);
++  j(MATCH);
 +
-+  assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count);
-+  return count;
-+}
++  bind(MATCH4);
++  addi(index, index, 4);
++  j(MATCH);
 +
-+#ifdef COMPILER2
-+int MacroAssembler::push_vp(unsigned int bitset, Register stack) {
-+  CompressibleRegion cr(this);
-+  int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
++  bind(MATCH5);
++  addi(index, index, 5);
++  j(MATCH);
 +
-+  // Scan bitset to accumulate register pairs
-+  unsigned char regs[32];
-+  int count = 0;
-+  for (int reg = 31; reg >= 0; reg--) {
-+    if ((1U << 31) & bitset) {
-+      regs[count++] = reg;
-+    }
-+    bitset <<= 1;
-+  }
++  bind(MATCH6);
++  addi(index, index, 6);
++  j(MATCH);
 +
-+  for (int i = 0; i < count; i++) {
-+    sub(stack, stack, vector_size_in_bytes);
-+    vs1r_v(as_VectorRegister(regs[i]), stack);
-+  }
++  bind(MATCH7);
++  addi(index, index, 7);
 +
-+  return count * vector_size_in_bytes / wordSize;
++  bind(MATCH);
++  mv(result, index);
++  bind(NOMATCH);
++  BLOCK_COMMENT("} string_indexof_char_short");
 +}
 +
-+int MacroAssembler::pop_vp(unsigned int bitset, Register stack) {
-+  CompressibleRegion cr(this);
-+  int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
++// StringUTF16.indexOfChar
++// StringLatin1.indexOfChar
++void MacroAssembler::string_indexof_char(Register str1, Register cnt1,
++                                            Register ch, Register result,
++                                            Register tmp1, Register tmp2,
++                                            Register tmp3, Register tmp4,
++                                            bool isL)
++{
++  Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG;
++  Register ch1 = t0;
++  Register orig_cnt = t1;
++  Register mask1 = tmp3;
++  Register mask2 = tmp2;
++  Register match_mask = tmp1;
++  Register trailing_char = tmp4;
++  Register unaligned_elems = tmp4;
 +
-+  // Scan bitset to accumulate register pairs
-+  unsigned char regs[32];
-+  int count = 0;
-+  for (int reg = 31; reg >= 0; reg--) {
-+    if ((1U << 31) & bitset) {
-+      regs[count++] = reg;
++  BLOCK_COMMENT("string_indexof_char {");
++  beqz(cnt1, NOMATCH);
++
++  addi(t0, cnt1, isL ? -32 : -16);
++  bgtz(t0, DO_LONG);
++  string_indexof_char_short(str1, cnt1, ch, result, isL);
++  j(DONE);
++
++  bind(DO_LONG);
++  mv(orig_cnt, cnt1);
++  if (AvoidUnalignedAccesses) {
++    Label ALIGNED;
++    andi(unaligned_elems, str1, 0x7);
++    beqz(unaligned_elems, ALIGNED);
++    sub(unaligned_elems, unaligned_elems, 8);
++    neg(unaligned_elems, unaligned_elems);
++    if (!isL) {
++      srli(unaligned_elems, unaligned_elems, 1);
 +    }
-+    bitset <<= 1;
++    // do unaligned part per element
++    string_indexof_char_short(str1, unaligned_elems, ch, result, isL);
++    bgez(result, DONE);
++    mv(orig_cnt, cnt1);
++    sub(cnt1, cnt1, unaligned_elems);
++    bind(ALIGNED);
 +  }
 +
-+  for (int i = count - 1; i >= 0; i--) {
-+    vl1r_v(as_VectorRegister(regs[i]), stack);
-+    add(stack, stack, vector_size_in_bytes);
++  // duplicate ch
++  if (isL) {
++    slli(ch1, ch, 8);
++    orr(ch, ch1, ch);
 +  }
++  slli(ch1, ch, 16);
++  orr(ch, ch1, ch);
++  slli(ch1, ch, 32);
++  orr(ch, ch1, ch);
 +
-+  return count * vector_size_in_bytes / wordSize;
-+}
-+#endif // COMPILER2
++  if (!isL) {
++    slli(cnt1, cnt1, 1);
++  }
 +
-+void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) {
-+  CompressibleRegion cr(this);
-+  // Push integer registers x7, x10-x17, x28-x31.
-+  push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
++  uint64_t mask0101 = UCONST64(0x0101010101010101);
++  uint64_t mask0001 = UCONST64(0x0001000100010001);
++  mv(mask1, isL ? mask0101 : mask0001);
++  uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f);
++  uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff);
++  mv(mask2, isL ? mask7f7f : mask7fff);
 +
-+  // Push float registers f0-f7, f10-f17, f28-f31.
-+  addi(sp, sp, - wordSize * 20);
-+  int offset = 0;
-+  for (int i = 0; i < 32; i++) {
-+    if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
-+      fsd(as_FloatRegister(i), Address(sp, wordSize * (offset ++)));
-+    }
-+  }
-+}
++  bind(CH1_LOOP);
++  ld(ch1, Address(str1));
++  addi(str1, str1, 8);
++  addi(cnt1, cnt1, -8);
++  compute_match_mask(ch1, ch, match_mask, mask1, mask2);
++  bnez(match_mask, HIT);
++  bgtz(cnt1, CH1_LOOP);
++  j(NOMATCH);
 +
-+void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) {
-+  CompressibleRegion cr(this);
-+  int offset = 0;
-+  for (int i = 0; i < 32; i++) {
-+    if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
-+      fld(as_FloatRegister(i), Address(sp, wordSize * (offset ++)));
-+    }
++  bind(HIT);
++  ctzc_bit(trailing_char, match_mask, isL, ch1, result);
++  srli(trailing_char, trailing_char, 3);
++  addi(cnt1, cnt1, 8);
++  ble(cnt1, trailing_char, NOMATCH);
++  // match case
++  if (!isL) {
++    srli(cnt1, cnt1, 1);
++    srli(trailing_char, trailing_char, 1);
 +  }
-+  addi(sp, sp, wordSize * 20);
 +
-+  pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
-+}
++  sub(result, orig_cnt, cnt1);
++  add(result, result, trailing_char);
++  j(DONE);
 +
-+// Push all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4).
-+void MacroAssembler::pusha() {
-+  CompressibleRegion cr(this);
-+  push_reg(0xffffffe2, sp);
-+}
++  bind(NOMATCH);
++  mv(result, -1);
 +
-+// Pop all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4).
-+void MacroAssembler::popa() {
-+  CompressibleRegion cr(this);
-+  pop_reg(0xffffffe2, sp);
++  bind(DONE);
++  BLOCK_COMMENT("} string_indexof_char");
 +}
 +
-+void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) {
-+  CompressibleRegion cr(this);
-+  // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
-+  push_reg(0xffffffe0, sp);
++typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp);
 +
-+  // float registers
-+  addi(sp, sp, - 32 * wordSize);
-+  for (int i = 0; i < 32; i++) {
-+    fsd(as_FloatRegister(i), Address(sp, i * wordSize));
-+  }
++// Search for needle in haystack and return index or -1
++// x10: result
++// x11: haystack
++// x12: haystack_len
++// x13: needle
++// x14: needle_len
++void MacroAssembler::string_indexof(Register haystack, Register needle,
++                                       Register haystack_len, Register needle_len,
++                                       Register tmp1, Register tmp2,
++                                       Register tmp3, Register tmp4,
++                                       Register tmp5, Register tmp6,
++                                       Register result, int ae)
++{
++  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
 +
-+  // vector registers
-+  if (save_vectors) {
-+    sub(sp, sp, vector_size_in_bytes * VectorRegisterImpl::number_of_registers);
-+    vsetvli(t0, x0, Assembler::e64, Assembler::m8);
-+    for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) {
-+      add(t0, sp, vector_size_in_bytes * i);
-+      vse64_v(as_VectorRegister(i), t0);
-+    }
-+  }
-+}
++  Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH;
 +
-+void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) {
-+  CompressibleRegion cr(this);
-+  // vector registers
-+  if (restore_vectors) {
-+    vsetvli(t0, x0, Assembler::e64, Assembler::m8);
-+    for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) {
-+      vle64_v(as_VectorRegister(i), sp);
-+      add(sp, sp, vector_size_in_bytes * 8);
-+    }
-+  }
++  Register ch1 = t0;
++  Register ch2 = t1;
++  Register nlen_tmp = tmp1; // needle len tmp
++  Register hlen_tmp = tmp2; // haystack len tmp
++  Register result_tmp = tmp4;
 +
-+  // float registers
-+  for (int i = 0; i < 32; i++) {
-+    fld(as_FloatRegister(i), Address(sp, i * wordSize));
-+  }
-+  addi(sp, sp, 32 * wordSize);
++  bool isLL = ae == StrIntrinsicNode::LL;
 +
-+  // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
-+  pop_reg(0xffffffe0, sp);
-+}
++  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
++  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
++  int needle_chr_shift = needle_isL ? 0 : 1;
++  int haystack_chr_shift = haystack_isL ? 0 : 1;
++  int needle_chr_size = needle_isL ? 1 : 2;
++  int haystack_chr_size = haystack_isL ? 1 : 2;
++  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
++                                   (load_chr_insn)&MacroAssembler::lhu;
++  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
++                                     (load_chr_insn)&MacroAssembler::lhu;
 +
-+static int patch_offset_in_jal(address branch, int64_t offset) {
-+  assert(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction!\n");
-+  Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1);                       // offset[20]    ==> branch[31]
-+  Assembler::patch(branch, 30, 21, (offset >> 1)  & 0x3ff);                     // offset[10:1]  ==> branch[30:21]
-+  Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1);                       // offset[11]    ==> branch[20]
-+  Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff);                      // offset[19:12] ==> branch[19:12]
-+  return NativeInstruction::instruction_size;                                   // only one instruction
-+}
++  BLOCK_COMMENT("string_indexof {");
 +
-+static int patch_offset_in_conditional_branch(address branch, int64_t offset) {
-+  assert(is_imm_in_range(offset, 12, 1), "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne insrusction!\n");
-+  Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1);                       // offset[12]    ==> branch[31]
-+  Assembler::patch(branch, 30, 25, (offset >> 5)  & 0x3f);                      // offset[10:5]  ==> branch[30:25]
-+  Assembler::patch(branch, 7,  7,  (offset >> 11) & 0x1);                       // offset[11]    ==> branch[7]
-+  Assembler::patch(branch, 11, 8,  (offset >> 1)  & 0xf);                       // offset[4:1]   ==> branch[11:8]
-+  return NativeInstruction::instruction_size;                                   // only one instruction
-+}
++  // Note, inline_string_indexOf() generates checks:
++  // if (pattern.count > src.count) return -1;
++  // if (pattern.count == 0) return 0;
 +
-+static int patch_offset_in_pc_relative(address branch, int64_t offset) {
-+  const int PC_RELATIVE_INSTRUCTION_NUM = 2;                                    // auipc, addi/jalr/load
-+  Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff);         // Auipc.          offset[31:12]  ==> branch[31:12]
-+  Assembler::patch(branch + 4, 31, 20, offset & 0xfff);                         // Addi/Jalr/Load. offset[11:0]   ==> branch[31:20]
-+  return PC_RELATIVE_INSTRUCTION_NUM * NativeInstruction::instruction_size;
-+}
++  // We have two strings, a source string in haystack, haystack_len and a pattern string
++  // in needle, needle_len. Find the first occurence of pattern in source or return -1.
 +
-+static int patch_addr_in_movptr(address branch, address target) {
-+  const int MOVPTR_INSTRUCTIONS_NUM = 6;                                        // lui + addi + slli + addi + slli + addi/jalr/load
-+  int32_t lower = ((intptr_t)target << 36) >> 36;
-+  int64_t upper = ((intptr_t)target - lower) >> 28;
-+  Assembler::patch(branch + 0,  31, 12, upper & 0xfffff);                       // Lui.             target[47:28] + target[27] ==> branch[31:12]
-+  Assembler::patch(branch + 4,  31, 20, (lower >> 16) & 0xfff);                 // Addi.            target[27:16] ==> branch[31:20]
-+  Assembler::patch(branch + 12, 31, 20, (lower >> 5) & 0x7ff);                  // Addi.            target[15: 5] ==> branch[31:20]
-+  Assembler::patch(branch + 20, 31, 20, lower & 0x1f);                          // Addi/Jalr/Load.  target[ 4: 0] ==> branch[31:20]
-+  return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
-+}
++  // For larger pattern and source we use a simplified Boyer Moore algorithm.
++  // With a small pattern and source we use linear scan.
 +
-+static int patch_imm_in_li64(address branch, address target) {
-+  const int LI64_INSTRUCTIONS_NUM = 8;                                          // lui + addi + slli + addi + slli + addi + slli + addi
-+  int64_t lower = (intptr_t)target & 0xffffffff;
-+  lower = lower - ((lower << 44) >> 44);
-+  int64_t tmp_imm = ((uint64_t)((intptr_t)target & 0xffffffff00000000)) + (uint64_t)lower;
-+  int32_t upper =  (tmp_imm - (int32_t)lower) >> 32;
-+  int64_t tmp_upper = upper, tmp_lower = upper;
-+  tmp_lower = (tmp_lower << 52) >> 52;
-+  tmp_upper -= tmp_lower;
-+  tmp_upper >>= 12;
-+  // Load upper 32 bits. Upper = target[63:32], but if target[31] = 1 or (target[31:28] == 0x7ff && target[19] == 1),
-+  // upper = target[63:32] + 1.
-+  Assembler::patch(branch + 0,  31, 12, tmp_upper & 0xfffff);                       // Lui.
-+  Assembler::patch(branch + 4,  31, 20, tmp_lower & 0xfff);                         // Addi.
-+  // Load the rest 32 bits.
-+  Assembler::patch(branch + 12, 31, 20, ((int32_t)lower >> 20) & 0xfff);            // Addi.
-+  Assembler::patch(branch + 20, 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff);  // Addi.
-+  Assembler::patch(branch + 28, 31, 20, (intptr_t)target & 0xff);                   // Addi.
-+  return LI64_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
-+}
++  // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm.
++  sub(result_tmp, haystack_len, needle_len);
++  // needle_len < 8, use linear scan
++  sub(t0, needle_len, 8);
++  bltz(t0, LINEARSEARCH);
++  // needle_len >= 256, use linear scan
++  sub(t0, needle_len, 256);
++  bgez(t0, LINEARSTUB);
++  // needle_len >= haystack_len/4, use linear scan
++  srli(t0, haystack_len, 2);
++  bge(needle_len, t0, LINEARSTUB);
 +
-+static int patch_imm_in_li32(address branch, int32_t target) {
-+  const int LI32_INSTRUCTIONS_NUM = 2;                                          // lui + addiw
-+  int64_t upper = (intptr_t)target;
-+  int32_t lower = (((int32_t)target) << 20) >> 20;
-+  upper -= lower;
-+  upper = (int32_t)upper;
-+  Assembler::patch(branch + 0,  31, 12, (upper >> 12) & 0xfffff);               // Lui.
-+  Assembler::patch(branch + 4,  31, 20, lower & 0xfff);                         // Addiw.
-+  return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
-+}
++  // Boyer-Moore-Horspool introduction:
++  // The Boyer Moore alogorithm is based on the description here:-
++  //
++  // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
++  //
++  // This describes and algorithm with 2 shift rules. The 'Bad Character' rule
++  // and the 'Good Suffix' rule.
++  //
++  // These rules are essentially heuristics for how far we can shift the
++  // pattern along the search string.
++  //
++  // The implementation here uses the 'Bad Character' rule only because of the
++  // complexity of initialisation for the 'Good Suffix' rule.
++  //
++  // This is also known as the Boyer-Moore-Horspool algorithm:
++  //
++  // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
++  //
++  // #define ASIZE 256
++  //
++  //    int bm(unsigned char *pattern, int m, unsigned char *src, int n) {
++  //      int i, j;
++  //      unsigned c;
++  //      unsigned char bc[ASIZE];
++  //
++  //      /* Preprocessing */
++  //      for (i = 0; i < ASIZE; ++i)
++  //        bc[i] = m;
++  //      for (i = 0; i < m - 1; ) {
++  //        c = pattern[i];
++  //        ++i;
++  //        // c < 256 for Latin1 string, so, no need for branch
++  //        #ifdef PATTERN_STRING_IS_LATIN1
++  //        bc[c] = m - i;
++  //        #else
++  //        if (c < ASIZE) bc[c] = m - i;
++  //        #endif
++  //      }
++  //
++  //      /* Searching */
++  //      j = 0;
++  //      while (j <= n - m) {
++  //        c = src[i+j];
++  //        if (pattern[m-1] == c)
++  //          int k;
++  //          for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
++  //          if (k < 0) return j;
++  //          // c < 256 for Latin1 string, so, no need for branch
++  //          #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1
++  //          // LL case: (c< 256) always true. Remove branch
++  //          j += bc[pattern[j+m-1]];
++  //          #endif
++  //          #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF
++  //          // UU case: need if (c<ASIZE) check. Skip 1 character if not.
++  //          if (c < ASIZE)
++  //            j += bc[pattern[j+m-1]];
++  //          else
++  //            j += 1
++  //          #endif
++  //          #ifdef SOURCE_IS_UTF_AND_PATTERN_IS_LATIN1
++  //          // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
++  //          if (c < ASIZE)
++  //            j += bc[pattern[j+m-1]];
++  //          else
++  //            j += m
++  //          #endif
++  //      }
++  //      return -1;
++  //    }
 +
-+static long get_offset_of_jal(address insn_addr) {
-+  assert_cond(insn_addr != NULL);
-+  long offset = 0;
-+  unsigned insn = *(unsigned*)insn_addr;
-+  long val = (long)Assembler::sextract(insn, 31, 12);
-+  offset |= ((val >> 19) & 0x1) << 20;
-+  offset |= (val & 0xff) << 12;
-+  offset |= ((val >> 8) & 0x1) << 11;
-+  offset |= ((val >> 9) & 0x3ff) << 1;
-+  offset = (offset << 43) >> 43;
-+  return offset;
-+}
++  // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result
++  Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
++          BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;
 +
-+static long get_offset_of_conditional_branch(address insn_addr) {
-+  long offset = 0;
-+  assert_cond(insn_addr != NULL);
-+  unsigned insn = *(unsigned*)insn_addr;
-+  offset = (long)Assembler::sextract(insn, 31, 31);
-+  offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11);
-+  offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5);
-+  offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1);
-+  offset = (offset << 41) >> 41;
-+  return offset;
-+}
++  Register haystack_end = haystack_len;
++  Register skipch = tmp2;
 +
-+static long get_offset_of_pc_relative(address insn_addr) {
-+  long offset = 0;
-+  assert_cond(insn_addr != NULL);
-+  offset = ((long)(Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12))) << 12;                                  // Auipc.
-+  offset += ((long)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20));                                         // Addi/Jalr/Load.
-+  offset = (offset << 32) >> 32;
-+  return offset;
-+}
++  // pattern length is >=8, so, we can read at least 1 register for cases when
++  // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
++  // UL case. We'll re-read last character in inner pre-loop code to have
++  // single outer pre-loop load
++  const int firstStep = isLL ? 7 : 3;
 +
-+static address get_target_of_movptr(address insn_addr) {
-+  assert_cond(insn_addr != NULL);
-+  intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 28;    // Lui.
-+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 16;                        // Addi.
-+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 5;                         // Addi.
-+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20));                              // Addi/Jalr/Load.
-+  return (address) target_address;
-+}
++  const int ASIZE = 256;
++  const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd)
 +
-+static address get_target_of_li64(address insn_addr) {
-+  assert_cond(insn_addr != NULL);
-+  intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 44;    // Lui.
-+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 32;                        // Addi.
-+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 20;                        // Addi.
-+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)) << 8;                         // Addi.
-+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[7], 31, 20));                              // Addi.
-+  return (address)target_address;
-+}
++  sub(sp, sp, ASIZE);
 +
-+static address get_target_of_li32(address insn_addr) {
-+  assert_cond(insn_addr != NULL);
-+  intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 12;    // Lui.
-+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20));                              // Addiw.
-+  return (address)target_address;
-+}
++  // init BC offset table with default value: needle_len
++  slli(t0, needle_len, 8);
++  orr(t0, t0, needle_len); // [63...16][needle_len][needle_len]
++  slli(tmp1, t0, 16);
++  orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len]
++  slli(tmp1, t0, 32);
++  orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len]
 +
-+// Patch any kind of instruction; there may be several instructions.
-+// Return the total length (in bytes) of the instructions.
-+int MacroAssembler::pd_patch_instruction_size(address branch, address target) {
-+  assert_cond(branch != NULL);
-+  int64_t offset = target - branch;
-+  if (NativeInstruction::is_jal_at(branch)) {                         // jal
-+    return patch_offset_in_jal(branch, offset);
-+  } else if (NativeInstruction::is_branch_at(branch)) {               // beq/bge/bgeu/blt/bltu/bne
-+    return patch_offset_in_conditional_branch(branch, offset);
-+  } else if (NativeInstruction::is_pc_relative_at(branch)) {          // auipc, addi/jalr/load
-+    return patch_offset_in_pc_relative(branch, offset);
-+  } else if (NativeInstruction::is_movptr_at(branch)) {               // movptr
-+    return patch_addr_in_movptr(branch, target);
-+  } else if (NativeInstruction::is_li64_at(branch)) {                 // li64
-+    return patch_imm_in_li64(branch, target);
-+  } else if (NativeInstruction::is_li32_at(branch)) {                 // li32
-+    int64_t imm = (intptr_t)target;
-+    return patch_imm_in_li32(branch, (int32_t)imm);
-+  } else {
-+#ifdef ASSERT
-+    tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n",
-+                  *(unsigned*)branch, p2i(branch));
-+    Disassembler::decode(branch - 16, branch + 16);
-+#endif
-+    ShouldNotReachHere();
-+    return -1;
++  mv(ch1, sp);  // ch1 is t0
++  mv(tmp6, ASIZE / STORE_BYTES); // loop iterations
++
++  bind(BM_INIT_LOOP);
++  // for (i = 0; i < ASIZE; ++i)
++  //   bc[i] = m;
++  for (int i = 0; i < 4; i++) {
++    sd(tmp5, Address(ch1, i * wordSize));
 +  }
-+}
++  add(ch1, ch1, 32);
++  sub(tmp6, tmp6, 4);
++  bgtz(tmp6, BM_INIT_LOOP);
 +
-+address MacroAssembler::target_addr_for_insn(address insn_addr) {
-+  long offset = 0;
-+  assert_cond(insn_addr != NULL);
-+  if (NativeInstruction::is_jal_at(insn_addr)) {                     // jal
-+    offset = get_offset_of_jal(insn_addr);
-+  } else if (NativeInstruction::is_branch_at(insn_addr)) {           // beq/bge/bgeu/blt/bltu/bne
-+    offset = get_offset_of_conditional_branch(insn_addr);
-+  } else if (NativeInstruction::is_pc_relative_at(insn_addr)) {      // auipc, addi/jalr/load
-+    offset = get_offset_of_pc_relative(insn_addr);
-+  } else if (NativeInstruction::is_movptr_at(insn_addr)) {           // movptr
-+    return get_target_of_movptr(insn_addr);
-+  } else if (NativeInstruction::is_li64_at(insn_addr)) {             // li64
-+    return get_target_of_li64(insn_addr);
-+  } else if (NativeInstruction::is_li32_at(insn_addr)) {             // li32
-+    return get_target_of_li32(insn_addr);
++  sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern
++  Register orig_haystack = tmp5;
++  mv(orig_haystack, haystack);
++  // result_tmp = tmp4
++  shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift);
++  sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1
++  mv(tmp3, needle);
++
++  //  for (i = 0; i < m - 1; ) {
++  //    c = pattern[i];
++  //    ++i;
++  //    // c < 256 for Latin1 string, so, no need for branch
++  //    #ifdef PATTERN_STRING_IS_LATIN1
++  //    bc[c] = m - i;
++  //    #else
++  //    if (c < ASIZE) bc[c] = m - i;
++  //    #endif
++  //  }
++  bind(BCLOOP);
++  (this->*needle_load_1chr)(ch1, Address(tmp3), noreg);
++  add(tmp3, tmp3, needle_chr_size);
++  if (!needle_isL) {
++    // ae == StrIntrinsicNode::UU
++    mv(tmp6, ASIZE);
++    bgeu(ch1, tmp6, BCSKIP);
++  }
++  add(tmp4, sp, ch1);
++  sb(ch2, Address(tmp4)); // store skip offset to BC offset table
++
++  bind(BCSKIP);
++  sub(ch2, ch2, 1); // for next pattern element, skip distance -1
++  bgtz(ch2, BCLOOP);
++
++  // tmp6: pattern end, address after needle
++  shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift);
++  if (needle_isL == haystack_isL) {
++    // load last 8 bytes (8LL/4UU symbols)
++    ld(tmp6, Address(tmp6, -wordSize));
 +  } else {
-+    ShouldNotReachHere();
++    // UL: from UTF-16(source) search Latin1(pattern)
++    lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols)
++    // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d
++    // We'll have to wait until load completed, but it's still faster than per-character loads+checks
++    srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a
++    slli(ch2, tmp6, XLEN - 24);
++    srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b
++    slli(ch1, tmp6, XLEN - 16);
++    srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c
++    andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d
++    slli(ch2, ch2, 16);
++    orr(ch2, ch2, ch1); // 0x00000b0c
++    slli(result, tmp3, 48); // use result as temp register
++    orr(tmp6, tmp6, result); // 0x0a00000d
++    slli(result, ch2, 16);
++    orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d
 +  }
-+  return address(((uintptr_t)insn_addr + offset));
-+}
 +
-+int MacroAssembler::patch_oop(address insn_addr, address o) {
-+  // OOPs are either narrow (32 bits) or wide (48 bits).  We encode
-+  // narrow OOPs by setting the upper 16 bits in the first
-+  // instruction.
-+  if (NativeInstruction::is_li32_at(insn_addr)) {
-+    // Move narrow OOP
-+    uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o));
-+    return patch_imm_in_li32(insn_addr, (int32_t)n);
-+  } else if (NativeInstruction::is_movptr_at(insn_addr)) {
-+    // Move wide OOP
-+    return patch_addr_in_movptr(insn_addr, o);
++  // i = m - 1;
++  // skipch = j + i;
++  // if (skipch == pattern[m - 1]
++  //   for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
++  // else
++  //   move j with bad char offset table
++  bind(BMLOOPSTR2);
++  // compare pattern to source string backward
++  shadd(result, nlen_tmp, haystack, result, haystack_chr_shift);
++  (this->*haystack_load_1chr)(skipch, Address(result), noreg);
++  sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8
++  if (needle_isL == haystack_isL) {
++    // re-init tmp3. It's for free because it's executed in parallel with
++    // load above. Alternative is to initialize it before loop, but it'll
++    // affect performance on in-order systems with 2 or more ld/st pipelines
++    srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1]
++  }
++  if (!isLL) { // UU/UL case
++    slli(ch2, nlen_tmp, 1); // offsets in bytes
++  }
++  bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char
++  add(result, haystack, isLL ? nlen_tmp : ch2);
++  ld(ch2, Address(result)); // load 8 bytes from source string
++  mv(ch1, tmp6);
++  if (isLL) {
++    j(BMLOOPSTR1_AFTER_LOAD);
++  } else {
++    sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
++    j(BMLOOPSTR1_CMP);
 +  }
-+  ShouldNotReachHere();
-+  return -1;
-+}
 +
-+void MacroAssembler::reinit_heapbase() {
-+  if (UseCompressedOops) {
-+    if (Universe::is_fully_initialized()) {
-+      mv(xheapbase, CompressedOops::ptrs_base());
++  bind(BMLOOPSTR1);
++  shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift);
++  (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
++  shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift);
++  (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
++
++  bind(BMLOOPSTR1_AFTER_LOAD);
++  sub(nlen_tmp, nlen_tmp, 1);
++  bltz(nlen_tmp, BMLOOPSTR1_LASTCMP);
++
++  bind(BMLOOPSTR1_CMP);
++  beq(ch1, ch2, BMLOOPSTR1);
++
++  bind(BMSKIP);
++  if (!isLL) {
++    // if we've met UTF symbol while searching Latin1 pattern, then we can
++    // skip needle_len symbols
++    if (needle_isL != haystack_isL) {
++      mv(result_tmp, needle_len);
 +    } else {
-+      int32_t offset = 0;
-+      la_patchable(xheapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()), offset);
-+      ld(xheapbase, Address(xheapbase, offset));
++      mv(result_tmp, 1);
 +    }
++    mv(t0, ASIZE);
++    bgeu(skipch, t0, BMADV);
 +  }
-+}
++  add(result_tmp, sp, skipch);
++  lbu(result_tmp, Address(result_tmp)); // load skip offset
 +
-+void MacroAssembler::mv(Register Rd, Address dest) {
-+  assert(dest.getMode() == Address::literal, "Address mode should be Address::literal");
-+  code_section()->relocate(pc(), dest.rspec());
-+  movptr(Rd, dest.target());
-+}
++  bind(BMADV);
++  sub(nlen_tmp, needle_len, 1);
++  // move haystack after bad char skip offset
++  shadd(haystack, result_tmp, haystack, result, haystack_chr_shift);
++  ble(haystack, haystack_end, BMLOOPSTR2);
++  add(sp, sp, ASIZE);
++  j(NOMATCH);
 +
-+void MacroAssembler::mv(Register Rd, address addr) {
-+  // Here in case of use with relocation, use fix length instruciton
-+  // movptr instead of li
-+  movptr(Rd, addr);
-+}
++  bind(BMLOOPSTR1_LASTCMP);
++  bne(ch1, ch2, BMSKIP);
 +
-+void MacroAssembler::mv(Register Rd, RegisterOrConstant src) {
-+  if (src.is_register()) {
-+    mv(Rd, src.as_register());
++  bind(BMMATCH);
++  sub(result, haystack, orig_haystack);
++  if (!haystack_isL) {
++    srli(result, result, 1);
++  }
++  add(sp, sp, ASIZE);
++  j(DONE);
++
++  bind(LINEARSTUB);
++  sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm
++  bltz(t0, LINEARSEARCH);
++  mv(result, zr);
++  RuntimeAddress stub = NULL;
++  if (isLL) {
++    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll());
++    assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
++  } else if (needle_isL) {
++    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul());
++    assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
 +  } else {
-+    mv(Rd, src.as_constant());
++    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu());
++    assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
 +  }
-+}
++  trampoline_call(stub);
++  j(DONE);
 +
-+void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) {
-+  andr(Rd, Rs1, Rs2);
-+  // addw: The result is clipped to 32 bits, then the sign bit is extended,
-+  // and the result is stored in Rd
-+  addw(Rd, Rd, zr);
-+}
++  bind(NOMATCH);
++  mv(result, -1);
++  j(DONE);
 +
-+void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) {
-+  orr(Rd, Rs1, Rs2);
-+  // addw: The result is clipped to 32 bits, then the sign bit is extended,
-+  // and the result is stored in Rd
-+  addw(Rd, Rd, zr);
-+}
++  bind(LINEARSEARCH);
++  string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae);
 +
-+void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) {
-+  xorr(Rd, Rs1, Rs2);
-+  // addw: The result is clipped to 32 bits, then the sign bit is extended,
-+  // and the result is stored in Rd
-+  addw(Rd, Rd, zr);
++  bind(DONE);
++  BLOCK_COMMENT("} string_indexof");
 +}
 +
-+// Note: load_unsigned_short used to be called load_unsigned_word.
-+int MacroAssembler::load_unsigned_short(Register dst, Address src) {
-+  int off = offset();
-+  lhu(dst, src);
-+  return off;
-+}
++// string_indexof
++// result: x10
++// src: x11
++// src_count: x12
++// pattern: x13
++// pattern_count: x14 or 1/2/3/4
++void MacroAssembler::string_indexof_linearscan(Register haystack, Register needle,
++                                                  Register haystack_len, Register needle_len,
++                                                  Register tmp1, Register tmp2,
++                                                  Register tmp3, Register tmp4,
++                                                  int needle_con_cnt, Register result, int ae)
++{
++  // Note:
++  // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant
++  // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1
++  assert(needle_con_cnt <= 4, "Invalid needle constant count");
++  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
 +
-+int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
-+  int off = offset();
-+  lbu(dst, src);
-+  return off;
-+}
++  Register ch1 = t0;
++  Register ch2 = t1;
++  Register hlen_neg = haystack_len, nlen_neg = needle_len;
++  Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4;
 +
-+int MacroAssembler::load_signed_short(Register dst, Address src) {
-+  int off = offset();
-+  lh(dst, src);
-+  return off;
-+}
++  bool isLL = ae == StrIntrinsicNode::LL;
 +
-+int MacroAssembler::load_signed_byte(Register dst, Address src) {
-+  int off = offset();
-+  lb(dst, src);
-+  return off;
-+}
++  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
++  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
++  int needle_chr_shift = needle_isL ? 0 : 1;
++  int haystack_chr_shift = haystack_isL ? 0 : 1;
++  int needle_chr_size = needle_isL ? 1 : 2;
++  int haystack_chr_size = haystack_isL ? 1 : 2;
 +
-+void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
-+  switch (size_in_bytes) {
-+    case  8:  ld(dst, src); break;
-+    case  4:  is_signed ? lw(dst, src) : lwu(dst, src); break;
-+    case  2:  is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
-+    case  1:  is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
-+    default:  ShouldNotReachHere();
-+  }
-+}
++  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
++                                   (load_chr_insn)&MacroAssembler::lhu;
++  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
++                                     (load_chr_insn)&MacroAssembler::lhu;
++  load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu;
++  load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld;
 +
-+void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
-+  switch (size_in_bytes) {
-+    case  8:  sd(src, dst); break;
-+    case  4:  sw(src, dst); break;
-+    case  2:  sh(src, dst); break;
-+    case  1:  sb(src, dst); break;
-+    default:  ShouldNotReachHere();
-+  }
-+}
++  Label DO1, DO2, DO3, MATCH, NOMATCH, DONE;
 +
-+// reverse bytes in halfword in lower 16 bits and sign-extend
-+// Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits)
-+void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) {
-+  if (UseRVB) {
-+    rev8(Rd, Rs);
-+    srai(Rd, Rd, 48);
-+    return;
-+  }
-+  assert_different_registers(Rs, tmp);
-+  assert_different_registers(Rd, tmp);
-+  srli(tmp, Rs, 8);
-+  andi(tmp, tmp, 0xFF);
-+  slli(Rd, Rs, 56);
-+  srai(Rd, Rd, 48); // sign-extend
-+  orr(Rd, Rd, tmp);
-+}
++  Register first = tmp3;
 +
-+// reverse bytes in lower word and sign-extend
-+// Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits)
-+void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
-+  if (UseRVB) {
-+    rev8(Rd, Rs);
-+    srai(Rd, Rd, 32);
-+    return;
-+  }
-+  assert_different_registers(Rs, tmp1, tmp2);
-+  assert_different_registers(Rd, tmp1, tmp2);
-+  revb_h_w_u(Rd, Rs, tmp1, tmp2);
-+  slli(tmp2, Rd, 48);
-+  srai(tmp2, tmp2, 32); // sign-extend
-+  srli(Rd, Rd, 16);
-+  orr(Rd, Rd, tmp2);
-+}
++  if (needle_con_cnt == -1) {
++    Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
 +
-+// reverse bytes in halfword in lower 16 bits and zero-extend
-+// Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
-+void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) {
-+  if (UseRVB) {
-+    rev8(Rd, Rs);
-+    srli(Rd, Rd, 48);
-+    return;
-+  }
-+  assert_different_registers(Rs, tmp);
-+  assert_different_registers(Rd, tmp);
-+  srli(tmp, Rs, 8);
-+  andi(tmp, tmp, 0xFF);
-+  andi(Rd, Rs, 0xFF);
-+  slli(Rd, Rd, 8);
-+  orr(Rd, Rd, tmp);
-+}
++    sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2);
++    bltz(t0, DOSHORT);
 +
-+// reverse bytes in halfwords in lower 32 bits and zero-extend
-+// Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
-+void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) {
-+  if (UseRVB) {
-+    rev8(Rd, Rs);
-+    rori(Rd, Rd, 32);
-+    roriw(Rd, Rd, 16);
-+    zext_w(Rd, Rd);
-+    return;
-+  }
-+  assert_different_registers(Rs, tmp1, tmp2);
-+  assert_different_registers(Rd, tmp1, tmp2);
-+  srli(tmp2, Rs, 16);
-+  revb_h_h_u(tmp2, tmp2, tmp1);
-+  revb_h_h_u(Rd, Rs, tmp1);
-+  slli(tmp2, tmp2, 16);
-+  orr(Rd, Rd, tmp2);
-+}
++    (this->*needle_load_1chr)(first, Address(needle), noreg);
++    slli(t0, needle_len, needle_chr_shift);
++    add(needle, needle, t0);
++    neg(nlen_neg, t0);
++    slli(t0, result_tmp, haystack_chr_shift);
++    add(haystack, haystack, t0);
++    neg(hlen_neg, t0);
 +
-+// This method is only used for revb_h
-+// Rd = Rs[47:0] Rs[55:48] Rs[63:56]
-+void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) {
-+  assert_different_registers(Rs, tmp1, tmp2);
-+  assert_different_registers(Rd, tmp1);
-+  srli(tmp1, Rs, 48);
-+  andi(tmp2, tmp1, 0xFF);
-+  slli(tmp2, tmp2, 8);
-+  srli(tmp1, tmp1, 8);
-+  orr(tmp1, tmp1, tmp2);
-+  slli(Rd, Rs, 16);
-+  orr(Rd, Rd, tmp1);
-+}
++    bind(FIRST_LOOP);
++    add(t0, haystack, hlen_neg);
++    (this->*haystack_load_1chr)(ch2, Address(t0), noreg);
++    beq(first, ch2, STR1_LOOP);
 +
-+// reverse bytes in each halfword
-+// Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8]
-+void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) {
-+  if (UseRVB) {
-+    assert_different_registers(Rs, tmp1);
-+    assert_different_registers(Rd, tmp1);
-+    rev8(Rd, Rs);
-+    zext_w(tmp1, Rd);
-+    roriw(tmp1, tmp1, 16);
-+    slli(tmp1, tmp1, 32);
-+    srli(Rd, Rd, 32);
-+    roriw(Rd, Rd, 16);
-+    zext_w(Rd, Rd);
-+    orr(Rd, Rd, tmp1);
-+    return;
-+  }
-+  assert_different_registers(Rs, tmp1, tmp2);
-+  assert_different_registers(Rd, tmp1, tmp2);
-+  revb_h_helper(Rd, Rs, tmp1, tmp2);
-+  for (int i = 0; i < 3; ++i) {
-+    revb_h_helper(Rd, Rd, tmp1, tmp2);
-+  }
-+}
++    bind(STR2_NEXT);
++    add(hlen_neg, hlen_neg, haystack_chr_size);
++    blez(hlen_neg, FIRST_LOOP);
++    j(NOMATCH);
 +
-+// reverse bytes in each word
-+// Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24]
-+void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
-+  if (UseRVB) {
-+    rev8(Rd, Rs);
-+    rori(Rd, Rd, 32);
-+    return;
-+  }
-+  assert_different_registers(Rs, tmp1, tmp2);
-+  assert_different_registers(Rd, tmp1, tmp2);
-+  revb(Rd, Rs, tmp1, tmp2);
-+  ror_imm(Rd, Rd, 32);
-+}
++    bind(STR1_LOOP);
++    add(nlen_tmp, nlen_neg, needle_chr_size);
++    add(hlen_tmp, hlen_neg, haystack_chr_size);
++    bgez(nlen_tmp, MATCH);
 +
-+// reverse bytes in doubleword
-+// Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56]
-+void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) {
-+  if (UseRVB) {
-+    rev8(Rd, Rs);
-+    return;
-+  }
-+  assert_different_registers(Rs, tmp1, tmp2);
-+  assert_different_registers(Rd, tmp1, tmp2);
-+  andi(tmp1, Rs, 0xFF);
-+  slli(tmp1, tmp1, 8);
-+  for (int step = 8; step < 56; step += 8) {
-+    srli(tmp2, Rs, step);
-+    andi(tmp2, tmp2, 0xFF);
-+    orr(tmp1, tmp1, tmp2);
-+    slli(tmp1, tmp1, 8);
-+  }
-+  srli(Rd, Rs, 56);
-+  andi(Rd, Rd, 0xFF);
-+  orr(Rd, tmp1, Rd);
-+}
++    bind(STR1_NEXT);
++    add(ch1, needle, nlen_tmp);
++    (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
++    add(ch2, haystack, hlen_tmp);
++    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
++    bne(ch1, ch2, STR2_NEXT);
++    add(nlen_tmp, nlen_tmp, needle_chr_size);
++    add(hlen_tmp, hlen_tmp, haystack_chr_size);
++    bltz(nlen_tmp, STR1_NEXT);
++    j(MATCH);
 +
-+// rotate right with shift bits
-+void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp)
-+{
-+  if (UseRVB) {
-+    rori(dst, src, shift);
-+    return;
++    bind(DOSHORT);
++    if (needle_isL == haystack_isL) {
++      sub(t0, needle_len, 2);
++      bltz(t0, DO1);
++      bgtz(t0, DO3);
++    }
 +  }
 +
-+  assert_different_registers(dst, tmp);
-+  assert_different_registers(src, tmp);
-+  assert(shift < 64, "shift amount must be < 64");
-+  slli(tmp, src, 64 - shift);
-+  srli(dst, src, shift);
-+  orr(dst, dst, tmp);
-+}
++  if (needle_con_cnt == 4) {
++    Label CH1_LOOP;
++    (this->*load_4chr)(ch1, Address(needle), noreg);
++    sub(result_tmp, haystack_len, 4);
++    slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp
++    add(haystack, haystack, tmp3);
++    neg(hlen_neg, tmp3);
 +
-+void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) {
-+  if (is_imm_in_range(imm, 12, 0)) {
-+    and_imm12(Rd, Rn, imm);
-+  } else {
-+    assert_different_registers(Rn, tmp);
-+    li(tmp, imm);
-+    andr(Rd, Rn, tmp);
++    bind(CH1_LOOP);
++    add(ch2, haystack, hlen_neg);
++    (this->*load_4chr)(ch2, Address(ch2), noreg);
++    beq(ch1, ch2, MATCH);
++    add(hlen_neg, hlen_neg, haystack_chr_size);
++    blez(hlen_neg, CH1_LOOP);
++    j(NOMATCH);
 +  }
-+}
 +
-+void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) {
-+  ld(tmp1, adr);
-+  if (src.is_register()) {
-+    orr(tmp1, tmp1, src.as_register());
-+  } else {
-+    if (is_imm_in_range(src.as_constant(), 12, 0)) {
-+      ori(tmp1, tmp1, src.as_constant());
-+    } else {
-+      assert_different_registers(tmp1, tmp2);
-+      li(tmp2, src.as_constant());
-+      orr(tmp1, tmp1, tmp2);
++  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) {
++    Label CH1_LOOP;
++    BLOCK_COMMENT("string_indexof DO2 {");
++    bind(DO2);
++    (this->*load_2chr)(ch1, Address(needle), noreg);
++    if (needle_con_cnt == 2) {
++      sub(result_tmp, haystack_len, 2);
 +    }
-+  }
-+  sd(tmp1, adr);
-+}
++    slli(tmp3, result_tmp, haystack_chr_shift);
++    add(haystack, haystack, tmp3);
++    neg(hlen_neg, tmp3);
 +
-+void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L) {
-+  if (UseCompressedClassPointers) {
-+      lwu(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
-+    if (CompressedKlassPointers::base() == NULL) {
-+      slli(tmp, tmp, CompressedKlassPointers::shift());
-+      beq(trial_klass, tmp, L);
-+      return;
-+    }
-+    decode_klass_not_null(tmp);
-+  } else {
-+    ld(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
++    bind(CH1_LOOP);
++    add(tmp3, haystack, hlen_neg);
++    (this->*load_2chr)(ch2, Address(tmp3), noreg);
++    beq(ch1, ch2, MATCH);
++    add(hlen_neg, hlen_neg, haystack_chr_size);
++    blez(hlen_neg, CH1_LOOP);
++    j(NOMATCH);
++    BLOCK_COMMENT("} string_indexof DO2");
 +  }
-+  beq(trial_klass, tmp, L);
-+}
 +
-+// Move an oop into a register. immediate is true if we want
-+// immediate instructions and nmethod entry barriers are not enabled.
-+// i.e. we are not going to patch this instruction while the code is being
-+// executed by another thread.
-+void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
-+  int oop_index;
-+  if (obj == NULL) {
-+    oop_index = oop_recorder()->allocate_oop_index(obj);
-+  } else {
-+#ifdef ASSERT
-+    {
-+      ThreadInVMfromUnknown tiv;
-+      assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
-+    }
-+#endif
-+    oop_index = oop_recorder()->find_index(obj);
-+  }
-+  RelocationHolder rspec = oop_Relocation::spec(oop_index);
++  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) {
++    Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
++    BLOCK_COMMENT("string_indexof DO3 {");
 +
-+  // nmethod entry barrier necessitate using the constant pool. They have to be
-+  // ordered with respected to oop access.
-+  // Using immediate literals would necessitate fence.i.
-+  if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate) {
-+    address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address
-+    ld_constant(dst, Address(dummy, rspec));
-+  } else
-+    mv(dst, Address((address)obj, rspec));
-+}
++    bind(DO3);
++    (this->*load_2chr)(first, Address(needle), noreg);
++    (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg);
++    if (needle_con_cnt == 3) {
++      sub(result_tmp, haystack_len, 3);
++    }
++    slli(hlen_tmp, result_tmp, haystack_chr_shift);
++    add(haystack, haystack, hlen_tmp);
++    neg(hlen_neg, hlen_tmp);
 +
-+// Move a metadata address into a register.
-+void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
-+  int oop_index;
-+  if (obj == NULL) {
-+    oop_index = oop_recorder()->allocate_metadata_index(obj);
-+  } else {
-+    oop_index = oop_recorder()->find_index(obj);
-+  }
-+  RelocationHolder rspec = metadata_Relocation::spec(oop_index);
-+  mv(dst, Address((address)obj, rspec));
-+}
++    bind(FIRST_LOOP);
++    add(ch2, haystack, hlen_neg);
++    (this->*load_2chr)(ch2, Address(ch2), noreg);
++    beq(first, ch2, STR1_LOOP);
 +
-+// Writes to stack successive pages until offset reached to check for
-+// stack overflow + shadow pages.  This clobbers tmp.
-+void MacroAssembler::bang_stack_size(Register size, Register tmp) {
-+  assert_different_registers(tmp, size, t0);
-+  // Bang stack for total size given plus shadow page size.
-+  // Bang one page at a time because large size can bang beyond yellow and
-+  // red zones.
-+  mv(t0, os::vm_page_size());
-+  Label loop;
-+  bind(loop);
-+  sub(tmp, sp, t0);
-+  subw(size, size, t0);
-+  sd(size, Address(tmp));
-+  bgtz(size, loop);
++    bind(STR2_NEXT);
++    add(hlen_neg, hlen_neg, haystack_chr_size);
++    blez(hlen_neg, FIRST_LOOP);
++    j(NOMATCH);
 +
-+  // Bang down shadow pages too.
-+  // At this point, (tmp-0) is the last address touched, so don't
-+  // touch it again.  (It was touched as (tmp-pagesize) but then tmp
-+  // was post-decremented.)  Skip this address by starting at i=1, and
-+  // touch a few more pages below.  N.B.  It is important to touch all
-+  // the way down to and including i=StackShadowPages.
-+  for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) {
-+    // this could be any sized move but this is can be a debugging crumb
-+    // so the bigger the better.
-+    sub(tmp, tmp, os::vm_page_size());
-+    sd(size, Address(tmp, 0));
++    bind(STR1_LOOP);
++    add(hlen_tmp, hlen_neg, 2 * haystack_chr_size);
++    add(ch2, haystack, hlen_tmp);
++    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
++    bne(ch1, ch2, STR2_NEXT);
++    j(MATCH);
++    BLOCK_COMMENT("} string_indexof DO3");
 +  }
-+}
-+
-+SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) {
-+  assert_cond(masm != NULL);
-+  int32_t offset = 0;
-+  _masm = masm;
-+  _masm->la_patchable(t0, ExternalAddress((address)flag_addr), offset);
-+  _masm->lbu(t0, Address(t0, offset));
-+  _masm->beqz(t0, _label);
-+}
-+
-+SkipIfEqual::~SkipIfEqual() {
-+  assert_cond(_masm != NULL);
-+  _masm->bind(_label);
-+  _masm = NULL;
-+}
-+
-+void MacroAssembler::load_mirror(Register dst, Register method, Register tmp) {
-+  const int mirror_offset = in_bytes(Klass::java_mirror_offset());
-+  ld(dst, Address(xmethod, Method::const_offset()));
-+  ld(dst, Address(dst, ConstMethod::constants_offset()));
-+  ld(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes()));
-+  ld(dst, Address(dst, mirror_offset));
-+  resolve_oop_handle(dst, tmp);
-+}
-+
-+void MacroAssembler::resolve_oop_handle(Register result, Register tmp) {
-+  // OopHandle::resolve is an indirection.
-+  assert_different_registers(result, tmp);
-+  access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, noreg);
-+}
-+
-+// ((WeakHandle)result).resolve()
-+void MacroAssembler::resolve_weak_handle(Register result, Register tmp) {
-+  assert_different_registers(result, tmp);
-+  Label resolved;
 +
-+  // A null weak handle resolves to null.
-+  beqz(result, resolved);
++  if (needle_con_cnt == -1 || needle_con_cnt == 1) {
++    Label DO1_LOOP;
 +
-+  // Only 64 bit platforms support GCs that require a tmp register
-+  // Only IN_HEAP loads require a thread_tmp register
-+  // WeakHandle::resolve is an indirection like jweak.
-+  access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
-+                 result, Address(result), tmp, noreg /* tmp_thread */);
-+  bind(resolved);
-+}
++    BLOCK_COMMENT("string_indexof DO1 {");
++    bind(DO1);
++    (this->*needle_load_1chr)(ch1, Address(needle), noreg);
++    sub(result_tmp, haystack_len, 1);
++    mv(tmp3, result_tmp);
++    if (haystack_chr_shift) {
++      slli(tmp3, result_tmp, haystack_chr_shift);
++    }
++    add(haystack, haystack, tmp3);
++    neg(hlen_neg, tmp3);
 +
-+void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
-+                                    Register dst, Address src,
-+                                    Register tmp1, Register thread_tmp) {
-+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+  decorators = AccessInternal::decorator_fixup(decorators);
-+  bool as_raw = (decorators & AS_RAW) != 0;
-+  if (as_raw) {
-+    bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
-+  } else {
-+    bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++    bind(DO1_LOOP);
++    add(tmp3, haystack, hlen_neg);
++    (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg);
++    beq(ch1, ch2, MATCH);
++    add(hlen_neg, hlen_neg, haystack_chr_size);
++    blez(hlen_neg, DO1_LOOP);
++    BLOCK_COMMENT("} string_indexof DO1");
 +  }
-+}
 +
-+void MacroAssembler::null_check(Register reg, int offset) {
-+  if (needs_explicit_null_check(offset)) {
-+    // provoke OS NULL exception if reg = NULL by
-+    // accessing M[reg] w/o changing any registers
-+    // NOTE: this is plenty to provoke a segv
-+    ld(zr, Address(reg, 0));
-+  } else {
-+    // nothing to do, (later) access of M[reg + offset]
-+    // will provoke OS NULL exception if reg = NULL
-+  }
-+}
++  bind(NOMATCH);
++  mv(result, -1);
++  j(DONE);
 +
-+void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
-+                                     Address dst, Register src,
-+                                     Register tmp1, Register thread_tmp) {
-+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+  decorators = AccessInternal::decorator_fixup(decorators);
-+  bool as_raw = (decorators & AS_RAW) != 0;
-+  if (as_raw) {
-+    bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, thread_tmp);
-+  } else {
-+    bs->store_at(this, decorators, type, dst, src, tmp1, thread_tmp);
-+  }
-+}
++  bind(MATCH);
++  srai(t0, hlen_neg, haystack_chr_shift);
++  add(result, result_tmp, t0);
 +
-+// Algorithm must match CompressedOops::encode.
-+void MacroAssembler::encode_heap_oop(Register d, Register s) {
-+  verify_oop(s, "broken oop in encode_heap_oop");
-+  if (CompressedOops::base() == NULL) {
-+    if (CompressedOops::shift() != 0) {
-+      assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
-+      srli(d, s, LogMinObjAlignmentInBytes);
-+    } else {
-+      mv(d, s);
-+    }
-+  } else {
-+    Label notNull;
-+    sub(d, s, xheapbase);
-+    bgez(d, notNull);
-+    mv(d, zr);
-+    bind(notNull);
-+    if (CompressedOops::shift() != 0) {
-+      assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
-+      srli(d, d, CompressedOops::shift());
-+    }
-+  }
++  bind(DONE);
 +}
 +
-+void MacroAssembler::load_klass(Register dst, Register src) {
-+  if (UseCompressedClassPointers) {
-+    lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
-+    decode_klass_not_null(dst);
-+  } else {
-+    ld(dst, Address(src, oopDesc::klass_offset_in_bytes()));
-+  }
-+}
++// Compare strings.
++void MacroAssembler::string_compare(Register str1, Register str2,
++                                       Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
++                                       Register tmp3, int ae)
++{
++  Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
++          DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
++          SHORT_LOOP_START, TAIL_CHECK, L;
 +
-+void MacroAssembler::store_klass(Register dst, Register src) {
-+  // FIXME: Should this be a store release? concurrent gcs assumes
-+  // klass length is valid if klass field is not null.
-+  if (UseCompressedClassPointers) {
-+    encode_klass_not_null(src);
-+    sw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
-+  } else {
-+    sd(src, Address(dst, oopDesc::klass_offset_in_bytes()));
-+  }
-+}
++  const int STUB_THRESHOLD = 64 + 8;
++  bool isLL = ae == StrIntrinsicNode::LL;
++  bool isLU = ae == StrIntrinsicNode::LU;
++  bool isUL = ae == StrIntrinsicNode::UL;
 +
-+void MacroAssembler::store_klass_gap(Register dst, Register src) {
-+  if (UseCompressedClassPointers) {
-+    // Store to klass gap in destination
-+    sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
-+  }
-+}
++  bool str1_isL = isLL || isLU;
++  bool str2_isL = isLL || isUL;
 +
-+void  MacroAssembler::decode_klass_not_null(Register r) {
-+  decode_klass_not_null(r, r);
-+}
++  // for L strings, 1 byte for 1 character
++  // for U strings, 2 bytes for 1 character
++  int str1_chr_size = str1_isL ? 1 : 2;
++  int str2_chr_size = str2_isL ? 1 : 2;
++  int minCharsInWord = isLL ? wordSize : wordSize / 2;
 +
-+void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) {
-+  assert(UseCompressedClassPointers, "should only be used for compressed headers");
++  load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
++  load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
 +
-+  if (CompressedKlassPointers::base() == NULL) {
-+    if (CompressedKlassPointers::shift() != 0) {
-+      assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
-+      slli(dst, src, LogKlassAlignmentInBytes);
-+    } else {
-+      mv(dst, src);
-+    }
-+    return;
-+  }
++  BLOCK_COMMENT("string_compare {");
 +
-+  Register xbase = dst;
-+  if (dst == src) {
-+    xbase = tmp;
++  // Bizzarely, the counts are passed in bytes, regardless of whether they
++  // are L or U strings, however the result is always in characters.
++  if (!str1_isL) {
++    sraiw(cnt1, cnt1, 1);
 +  }
-+
-+  assert_different_registers(src, xbase);
-+  li(xbase, (uintptr_t)CompressedKlassPointers::base());
-+
-+  if (CompressedKlassPointers::shift() != 0) {
-+    assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
-+    assert_different_registers(t0, xbase);
-+    shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes);
-+  } else {
-+    add(dst, xbase, src);
++  if (!str2_isL) {
++    sraiw(cnt2, cnt2, 1);
 +  }
 +
-+  if (xbase == xheapbase) { reinit_heapbase(); }
-+}
-+
-+void MacroAssembler::encode_klass_not_null(Register r) {
-+  encode_klass_not_null(r, r);
-+}
-+
-+void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) {
-+  assert(UseCompressedClassPointers, "should only be used for compressed headers");
++  // Compute the minimum of the string lengths and save the difference in result.
++  sub(result, cnt1, cnt2);
++  bgt(cnt1, cnt2, L);
++  mv(cnt2, cnt1);
++  bind(L);
 +
-+  if (CompressedKlassPointers::base() == NULL) {
-+    if (CompressedKlassPointers::shift() != 0) {
-+      assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
-+      srli(dst, src, LogKlassAlignmentInBytes);
-+    } else {
-+      mv(dst, src);
-+    }
-+    return;
-+  }
++  // A very short string
++  li(t0, minCharsInWord);
++  ble(cnt2, t0, SHORT_STRING);
 +
-+  if (((uint64_t)(uintptr_t)CompressedKlassPointers::base() & 0xffffffff) == 0 &&
-+      CompressedKlassPointers::shift() == 0) {
-+    zero_extend(dst, src, 32);
-+    return;
-+  }
-+
-+  Register xbase = dst;
-+  if (dst == src) {
-+    xbase = tmp;
-+  }
++  // Compare longwords
++  // load first parts of strings and finish initialization while loading
++  {
++    if (str1_isL == str2_isL) { // LL or UU
++      // load 8 bytes once to compare
++      ld(tmp1, Address(str1));
++      beq(str1, str2, DONE);
++      ld(tmp2, Address(str2));
++      li(t0, STUB_THRESHOLD);
++      bge(cnt2, t0, STUB);
++      sub(cnt2, cnt2, minCharsInWord);
++      beqz(cnt2, TAIL_CHECK);
++      // convert cnt2 from characters to bytes
++      if (!str1_isL) {
++        slli(cnt2, cnt2, 1);
++      }
++      add(str2, str2, cnt2);
++      add(str1, str1, cnt2);
++      sub(cnt2, zr, cnt2);
++    } else if (isLU) { // LU case
++      lwu(tmp1, Address(str1));
++      ld(tmp2, Address(str2));
++      li(t0, STUB_THRESHOLD);
++      bge(cnt2, t0, STUB);
++      addi(cnt2, cnt2, -4);
++      add(str1, str1, cnt2);
++      sub(cnt1, zr, cnt2);
++      slli(cnt2, cnt2, 1);
++      add(str2, str2, cnt2);
++      inflate_lo32(tmp3, tmp1);
++      mv(tmp1, tmp3);
++      sub(cnt2, zr, cnt2);
++      addi(cnt1, cnt1, 4);
++    } else { // UL case
++      ld(tmp1, Address(str1));
++      lwu(tmp2, Address(str2));
++      li(t0, STUB_THRESHOLD);
++      bge(cnt2, t0, STUB);
++      addi(cnt2, cnt2, -4);
++      slli(t0, cnt2, 1);
++      sub(cnt1, zr, t0);
++      add(str1, str1, t0);
++      add(str2, str2, cnt2);
++      inflate_lo32(tmp3, tmp2);
++      mv(tmp2, tmp3);
++      sub(cnt2, zr, cnt2);
++      addi(cnt1, cnt1, 8);
++    }
++    addi(cnt2, cnt2, isUL ? 4 : 8);
++    bgez(cnt2, TAIL);
++    xorr(tmp3, tmp1, tmp2);
++    bnez(tmp3, DIFFERENCE);
 +
-+  assert_different_registers(src, xbase);
-+  li(xbase, (intptr_t)CompressedKlassPointers::base());
-+  sub(dst, src, xbase);
-+  if (CompressedKlassPointers::shift() != 0) {
-+    assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
-+    srli(dst, dst, LogKlassAlignmentInBytes);
-+  }
-+  if (xbase == xheapbase) {
-+    reinit_heapbase();
-+  }
-+}
++    // main loop
++    bind(NEXT_WORD);
++    if (str1_isL == str2_isL) { // LL or UU
++      add(t0, str1, cnt2);
++      ld(tmp1, Address(t0));
++      add(t0, str2, cnt2);
++      ld(tmp2, Address(t0));
++      addi(cnt2, cnt2, 8);
++    } else if (isLU) { // LU case
++      add(t0, str1, cnt1);
++      lwu(tmp1, Address(t0));
++      add(t0, str2, cnt2);
++      ld(tmp2, Address(t0));
++      addi(cnt1, cnt1, 4);
++      inflate_lo32(tmp3, tmp1);
++      mv(tmp1, tmp3);
++      addi(cnt2, cnt2, 8);
++    } else { // UL case
++      add(t0, str2, cnt2);
++      lwu(tmp2, Address(t0));
++      add(t0, str1, cnt1);
++      ld(tmp1, Address(t0));
++      inflate_lo32(tmp3, tmp2);
++      mv(tmp2, tmp3);
++      addi(cnt1, cnt1, 8);
++      addi(cnt2, cnt2, 4);
++    }
++    bgez(cnt2, TAIL);
 +
-+void  MacroAssembler::decode_heap_oop_not_null(Register r) {
-+  decode_heap_oop_not_null(r, r);
-+}
++    xorr(tmp3, tmp1, tmp2);
++    beqz(tmp3, NEXT_WORD);
++    j(DIFFERENCE);
++    bind(TAIL);
++    xorr(tmp3, tmp1, tmp2);
++    bnez(tmp3, DIFFERENCE);
++    // Last longword.  In the case where length == 4 we compare the
++    // same longword twice, but that's still faster than another
++    // conditional branch.
++    if (str1_isL == str2_isL) { // LL or UU
++      ld(tmp1, Address(str1));
++      ld(tmp2, Address(str2));
++    } else if (isLU) { // LU case
++      lwu(tmp1, Address(str1));
++      ld(tmp2, Address(str2));
++      inflate_lo32(tmp3, tmp1);
++      mv(tmp1, tmp3);
++    } else { // UL case
++      lwu(tmp2, Address(str2));
++      ld(tmp1, Address(str1));
++      inflate_lo32(tmp3, tmp2);
++      mv(tmp2, tmp3);
++    }
++    bind(TAIL_CHECK);
++    xorr(tmp3, tmp1, tmp2);
++    beqz(tmp3, DONE);
 +
-+void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
-+  assert(UseCompressedOops, "should only be used for compressed headers");
-+  assert(Universe::heap() != NULL, "java heap should be initialized");
-+  // Cannot assert, unverified entry point counts instructions (see .ad file)
-+  // vtableStubs also counts instructions in pd_code_size_limit.
-+  // Also do not verify_oop as this is called by verify_oop.
-+  if (CompressedOops::shift() != 0) {
-+    assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
-+    slli(dst, src, LogMinObjAlignmentInBytes);
-+    if (CompressedOops::base() != NULL) {
-+      add(dst, xheapbase, dst);
++    // Find the first different characters in the longwords and
++    // compute their difference.
++    bind(DIFFERENCE);
++    ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb
++    srl(tmp1, tmp1, result);
++    srl(tmp2, tmp2, result);
++    if (isLL) {
++      andi(tmp1, tmp1, 0xFF);
++      andi(tmp2, tmp2, 0xFF);
++    } else {
++      andi(tmp1, tmp1, 0xFFFF);
++      andi(tmp2, tmp2, 0xFFFF);
 +    }
-+  } else {
-+    assert(CompressedOops::base() == NULL, "sanity");
-+    mv(dst, src);
++    sub(result, tmp1, tmp2);
++    j(DONE);
 +  }
-+}
 +
-+void  MacroAssembler::decode_heap_oop(Register d, Register s) {
-+  if (CompressedOops::base() == NULL) {
-+    if (CompressedOops::shift() != 0 || d != s) {
-+      slli(d, s, CompressedOops::shift());
-+    }
-+  } else {
-+    Label done;
-+    mv(d, s);
-+    beqz(s, done);
-+    shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes);
-+    bind(done);
++  bind(STUB);
++  RuntimeAddress stub = NULL;
++  switch (ae) {
++    case StrIntrinsicNode::LL:
++      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL());
++      break;
++    case StrIntrinsicNode::UU:
++      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU());
++      break;
++    case StrIntrinsicNode::LU:
++      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU());
++      break;
++    case StrIntrinsicNode::UL:
++      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL());
++      break;
++    default:
++      ShouldNotReachHere();
 +  }
-+  verify_oop(d, "broken oop in decode_heap_oop");
-+}
++  assert(stub.target() != NULL, "compare_long_string stub has not been generated");
++  trampoline_call(stub);
++  j(DONE);
 +
-+void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
-+                                    Register thread_tmp, DecoratorSet decorators) {
-+  access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
-+}
++  bind(SHORT_STRING);
++  // Is the minimum length zero?
++  beqz(cnt2, DONE);
++  // arrange code to do most branches while loading and loading next characters
++  // while comparing previous
++  (this->*str1_load_chr)(tmp1, Address(str1), t0);
++  addi(str1, str1, str1_chr_size);
++  addi(cnt2, cnt2, -1);
++  beqz(cnt2, SHORT_LAST_INIT);
++  (this->*str2_load_chr)(cnt1, Address(str2), t0);
++  addi(str2, str2, str2_chr_size);
++  j(SHORT_LOOP_START);
++  bind(SHORT_LOOP);
++  addi(cnt2, cnt2, -1);
++  beqz(cnt2, SHORT_LAST);
++  bind(SHORT_LOOP_START);
++  (this->*str1_load_chr)(tmp2, Address(str1), t0);
++  addi(str1, str1, str1_chr_size);
++  (this->*str2_load_chr)(t0, Address(str2), t0);
++  addi(str2, str2, str2_chr_size);
++  bne(tmp1, cnt1, SHORT_LOOP_TAIL);
++  addi(cnt2, cnt2, -1);
++  beqz(cnt2, SHORT_LAST2);
++  (this->*str1_load_chr)(tmp1, Address(str1), t0);
++  addi(str1, str1, str1_chr_size);
++  (this->*str2_load_chr)(cnt1, Address(str2), t0);
++  addi(str2, str2, str2_chr_size);
++  beq(tmp2, t0, SHORT_LOOP);
++  sub(result, tmp2, t0);
++  j(DONE);
++  bind(SHORT_LOOP_TAIL);
++  sub(result, tmp1, cnt1);
++  j(DONE);
++  bind(SHORT_LAST2);
++  beq(tmp2, t0, DONE);
++  sub(result, tmp2, t0);
 +
-+void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
-+                                   Register thread_tmp, DecoratorSet decorators) {
-+  access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
-+}
++  j(DONE);
++  bind(SHORT_LAST_INIT);
++  (this->*str2_load_chr)(cnt1, Address(str2), t0);
++  addi(str2, str2, str2_chr_size);
++  bind(SHORT_LAST);
++  beq(tmp1, cnt1, DONE);
++  sub(result, tmp1, cnt1);
 +
-+void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
-+                                            Register thread_tmp, DecoratorSet decorators) {
-+  access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, thread_tmp);
-+}
++  bind(DONE);
 +
-+// Used for storing NULLs.
-+void MacroAssembler::store_heap_oop_null(Address dst) {
-+  access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg);
++  BLOCK_COMMENT("} string_compare");
 +}
 +
-+int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2,
-+                                    bool want_remainder)
-+{
-+  // Full implementation of Java idiv and irem.  The function
-+  // returns the (pc) offset of the div instruction - may be needed
-+  // for implicit exceptions.
-+  //
-+  // input : rs1: dividend
-+  //         rs2: divisor
-+  //
-+  // result: either
-+  //         quotient  (= rs1 idiv rs2)
-+  //         remainder (= rs1 irem rs2)
-+
++void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
++                                      Register tmp4, Register tmp5, Register tmp6, Register result,
++                                      Register cnt1, int elem_size) {
++  Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR;
++  Register tmp1 = t0;
++  Register tmp2 = t1;
++  Register cnt2 = tmp2;  // cnt2 only used in array length compare
++  Register elem_per_word = tmp6;
++  int log_elem_size = exact_log2(elem_size);
++  int length_offset = arrayOopDesc::length_offset_in_bytes();
++  int base_offset   = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
 +
-+  int idivl_offset = offset();
-+  if (!want_remainder) {
-+    divw(result, rs1, rs2);
-+  } else {
-+    remw(result, rs1, rs2); // result = rs1 % rs2;
-+  }
-+  return idivl_offset;
-+}
++  assert(elem_size == 1 || elem_size == 2, "must be char or byte");
++  assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6);
++  li(elem_per_word, wordSize / elem_size);
 +
-+int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2,
-+                                    bool want_remainder)
-+{
-+  // Full implementation of Java ldiv and lrem.  The function
-+  // returns the (pc) offset of the div instruction - may be needed
-+  // for implicit exceptions.
-+  //
-+  // input : rs1: dividend
-+  //         rs2: divisor
-+  //
-+  // result: either
-+  //         quotient  (= rs1 idiv rs2)
-+  //         remainder (= rs1 irem rs2)
++  BLOCK_COMMENT("arrays_equals {");
 +
-+  int idivq_offset = offset();
-+  if (!want_remainder) {
-+    div(result, rs1, rs2);
-+  } else {
-+    rem(result, rs1, rs2); // result = rs1 % rs2;
-+  }
-+  return idivq_offset;
-+}
++  // if (a1 == a2), return true
++  beq(a1, a2, SAME);
 +
-+// Look up the method for a megamorpic invkkeinterface call.
-+// The target method is determined by <intf_klass, itable_index>.
-+// The receiver klass is in recv_klass.
-+// On success, the result will be in method_result, and execution falls through.
-+// On failure, execution transfers to the given label.
-+void MacroAssembler::lookup_interface_method(Register recv_klass,
-+                                             Register intf_klass,
-+                                             RegisterOrConstant itable_index,
-+                                             Register method_result,
-+                                             Register scan_tmp,
-+                                             Label& L_no_such_interface,
-+                                             bool return_method) {
-+  assert_different_registers(recv_klass, intf_klass, scan_tmp);
-+  assert_different_registers(method_result, intf_klass, scan_tmp);
-+  assert(recv_klass != method_result || !return_method,
-+         "recv_klass can be destroyed when mehtid isn't needed");
-+  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
-+         "caller must be same register for non-constant itable index as for method");
++  mv(result, false);
++  beqz(a1, DONE);
++  beqz(a2, DONE);
++  lwu(cnt1, Address(a1, length_offset));
++  lwu(cnt2, Address(a2, length_offset));
++  bne(cnt2, cnt1, DONE);
++  beqz(cnt1, SAME);
 +
-+  // Compute start of first itableOffsetEntry (which is at the end of the vtable).
-+  int vtable_base = in_bytes(Klass::vtable_start_offset());
-+  int itentry_off = itableMethodEntry::method_offset_in_bytes();
-+  int scan_step   = itableOffsetEntry::size() * wordSize;
-+  int vte_size    = vtableEntry::size_in_bytes();
-+  assert(vte_size == wordSize, "else adjust times_vte_scale");
++  slli(tmp5, cnt1, 3 + log_elem_size);
++  sub(tmp5, zr, tmp5);
++  add(a1, a1, base_offset);
++  add(a2, a2, base_offset);
++  ld(tmp3, Address(a1, 0));
++  ld(tmp4, Address(a2, 0));
++  ble(cnt1, elem_per_word, SHORT); // short or same
 +
-+  lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset()));
++  // Main 16 byte comparison loop with 2 exits
++  bind(NEXT_DWORD); {
++    ld(tmp1, Address(a1, wordSize));
++    ld(tmp2, Address(a2, wordSize));
++    sub(cnt1, cnt1, 2 * wordSize / elem_size);
++    blez(cnt1, TAIL);
++    bne(tmp3, tmp4, DONE);
++    ld(tmp3, Address(a1, 2 * wordSize));
++    ld(tmp4, Address(a2, 2 * wordSize));
++    add(a1, a1, 2 * wordSize);
++    add(a2, a2, 2 * wordSize);
++    ble(cnt1, elem_per_word, TAIL2);
++  } beq(tmp1, tmp2, NEXT_DWORD);
++  j(DONE);
 +
-+  // %%% Could store the aligned, prescaled offset in the klassoop.
-+  shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3);
-+  add(scan_tmp, scan_tmp, vtable_base);
++  bind(TAIL);
++  xorr(tmp4, tmp3, tmp4);
++  xorr(tmp2, tmp1, tmp2);
++  sll(tmp2, tmp2, tmp5);
++  orr(tmp5, tmp4, tmp2);
++  j(IS_TMP5_ZR);
 +
-+  if (return_method) {
-+    // Adjust recv_klass by scaled itable_index, so we can free itable_index.
-+    assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
-+    if (itable_index.is_register()) {
-+      slli(t0, itable_index.as_register(), 3);
-+    } else {
-+      li(t0, itable_index.as_constant() << 3);
-+    }
-+    add(recv_klass, recv_klass, t0);
-+    if (itentry_off) {
-+      add(recv_klass, recv_klass, itentry_off);
-+    }
-+  }
++  bind(TAIL2);
++  bne(tmp1, tmp2, DONE);
 +
-+  Label search, found_method;
++  bind(SHORT);
++  xorr(tmp4, tmp3, tmp4);
++  sll(tmp5, tmp4, tmp5);
 +
-+  ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes()));
-+  beq(intf_klass, method_result, found_method);
-+  bind(search);
-+  // Check that the previous entry is non-null. A null entry means that
-+  // the receiver class doens't implement the interface, and wasn't the
-+  // same as when the caller was compiled.
-+  beqz(method_result, L_no_such_interface, /* is_far */ true);
-+  addi(scan_tmp, scan_tmp, scan_step);
-+  ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes()));
-+  bne(intf_klass, method_result, search);
++  bind(IS_TMP5_ZR);
++  bnez(tmp5, DONE);
 +
-+  bind(found_method);
++  bind(SAME);
++  mv(result, true);
++  // That's it.
++  bind(DONE);
 +
-+  // Got a hit.
-+  if (return_method) {
-+    lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset_in_bytes()));
-+    add(method_result, recv_klass, scan_tmp);
-+    ld(method_result, Address(method_result));
-+  }
++  BLOCK_COMMENT("} array_equals");
 +}
 +
-+// virtual method calling
-+void MacroAssembler::lookup_virtual_method(Register recv_klass,
-+                                           RegisterOrConstant vtable_index,
-+                                           Register method_result) {
-+  const int base = in_bytes(Klass::vtable_start_offset());
-+  assert(vtableEntry::size() * wordSize == 8,
-+         "adjust the scaling in the code below");
-+  int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes();
++// Compare Strings
 +
-+  if (vtable_index.is_register()) {
-+    shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord);
-+    ld(method_result, Address(method_result, vtable_offset_in_bytes));
-+  } else {
-+    vtable_offset_in_bytes += vtable_index.as_constant() * wordSize;
-+    ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes));
-+  }
-+}
++// For Strings we're passed the address of the first characters in a1
++// and a2 and the length in cnt1.
++// elem_size is the element size in bytes: either 1 or 2.
++// There are two implementations.  For arrays >= 8 bytes, all
++// comparisons (including the final one, which may overlap) are
++// performed 8 bytes at a time.  For strings < 8 bytes, we compare a
++// halfword, then a short, and then a byte.
 +
-+void MacroAssembler::membar(uint32_t order_constraint) {
-+  address prev = pc() - NativeMembar::instruction_size;
-+  address last = code()->last_insn();
++void MacroAssembler::string_equals(Register a1, Register a2,
++                                      Register result, Register cnt1, int elem_size)
++{
++  Label SAME, DONE, SHORT, NEXT_WORD;
++  Register tmp1 = t0;
++  Register tmp2 = t1;
 +
-+  if (last != NULL && nativeInstruction_at(last)->is_membar() && prev == last) {
-+    NativeMembar *bar = NativeMembar_at(prev);
-+    // We are merging two memory barrier instructions.  On RISCV we
-+    // can do this simply by ORing them together.
-+    bar->set_kind(bar->get_kind() | order_constraint);
-+    BLOCK_COMMENT("merged membar");
-+  } else {
-+    code()->set_last_insn(pc());
++  assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte");
++  assert_different_registers(a1, a2, result, cnt1, t0, t1);
 +
-+    uint32_t predecessor = 0;
-+    uint32_t successor = 0;
++  BLOCK_COMMENT("string_equals {");
 +
-+    membar_mask_to_pred_succ(order_constraint, predecessor, successor);
-+    fence(predecessor, successor);
-+  }
-+}
++  mv(result, false);
 +
-+// Form an addres from base + offset in Rd. Rd my or may not
-+// actually be used: you must use the Address that is returned. It
-+// is up to you to ensure that the shift provided mathces the size
-+// of your data.
-+Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset) {
-+  if (is_offset_in_range(byte_offset, 12)) { // 12: imm in range 2^12
-+    return Address(base, byte_offset);
-+  }
++  // Check for short strings, i.e. smaller than wordSize.
++  sub(cnt1, cnt1, wordSize);
++  bltz(cnt1, SHORT);
 +
-+  // Do it the hard way
-+  mv(Rd, byte_offset);
-+  add(Rd, base, Rd);
-+  return Address(Rd);
-+}
++  // Main 8 byte comparison loop.
++  bind(NEXT_WORD); {
++    ld(tmp1, Address(a1, 0));
++    add(a1, a1, wordSize);
++    ld(tmp2, Address(a2, 0));
++    add(a2, a2, wordSize);
++    sub(cnt1, cnt1, wordSize);
++    bne(tmp1, tmp2, DONE);
++  } bgtz(cnt1, NEXT_WORD);
 +
-+void MacroAssembler::check_klass_subtype(Register sub_klass,
-+                                         Register super_klass,
-+                                         Register tmp_reg,
-+                                         Label& L_success) {
-+  Label L_failure;
-+  check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, NULL);
-+  check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, NULL);
-+  bind(L_failure);
-+}
++  // Last longword.  In the case where length == 4 we compare the
++  // same longword twice, but that's still faster than another
++  // conditional branch.
++  // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
++  // length == 4.
++  add(tmp1, a1, cnt1);
++  ld(tmp1, Address(tmp1, 0));
++  add(tmp2, a2, cnt1);
++  ld(tmp2, Address(tmp2, 0));
++  bne(tmp1, tmp2, DONE);
++  j(SAME);
 +
-+void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) {
-+  ld(t0, Address(xthread, JavaThread::polling_word_offset()));
-+  if (acquire) {
-+    membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
-+  }
-+  if (at_return) {
-+    bgtu(in_nmethod ? sp : fp, t0, slow_path, true /* is_far */);
-+  } else {
-+    andi(t0, t0, SafepointMechanism::poll_bit());
-+    bnez(t0, slow_path, true /* is_far */);
-+  }
-+}
++  bind(SHORT);
++  Label TAIL03, TAIL01;
 +
-+void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
-+                                Label &succeed, Label *fail) {
-+  // oldv holds comparison value
-+  // newv holds value to write in exchange
-+  // addr identifies memory word to compare against/update
-+  Label retry_load, nope;
-+  bind(retry_load);
-+  // Load reserved from the memory location
-+  lr_d(tmp, addr, Assembler::aqrl);
-+  // Fail and exit if it is not what we expect
-+  bne(tmp, oldv, nope);
-+  // If the store conditional succeeds, tmp will be zero
-+  sc_d(tmp, newv, addr, Assembler::rl);
-+  beqz(tmp, succeed);
-+  // Retry only when the store conditional failed
-+  j(retry_load);
++  // 0-7 bytes left.
++  andi(t0, cnt1, 4);
++  beqz(t0, TAIL03);
++  {
++    lwu(tmp1, Address(a1, 0));
++    add(a1, a1, 4);
++    lwu(tmp2, Address(a2, 0));
++    add(a2, a2, 4);
++    bne(tmp1, tmp2, DONE);
++  }
 +
-+  bind(nope);
-+  membar(AnyAny);
-+  mv(oldv, tmp);
-+  if (fail != NULL) {
-+    j(*fail);
++  bind(TAIL03);
++  // 0-3 bytes left.
++  andi(t0, cnt1, 2);
++  beqz(t0, TAIL01);
++  {
++    lhu(tmp1, Address(a1, 0));
++    add(a1, a1, 2);
++    lhu(tmp2, Address(a2, 0));
++    add(a2, a2, 2);
++    bne(tmp1, tmp2, DONE);
++  }
++
++  bind(TAIL01);
++  if (elem_size == 1) { // Only needed when comparing 1-byte elements
++    // 0-1 bytes left.
++    andi(t0, cnt1, 1);
++    beqz(t0, SAME);
++    {
++      lbu(tmp1, a1, 0);
++      lbu(tmp2, a2, 0);
++      bne(tmp1, tmp2, DONE);
++    }
 +  }
++
++  // Arrays are equal.
++  bind(SAME);
++  mv(result, true);
++
++  // That's it.
++  bind(DONE);
++  BLOCK_COMMENT("} string_equals");
 +}
 +
-+void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp,
-+                                        Label &succeed, Label *fail) {
-+  assert(oopDesc::mark_offset_in_bytes() == 0, "assumption");
-+  cmpxchgptr(oldv, newv, obj, tmp, succeed, fail);
++typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far);
++typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label,
++                                                              bool is_far, bool is_unordered);
++
++static conditional_branch_insn conditional_branches[] =
++{
++  /* SHORT branches */
++  (conditional_branch_insn)&Assembler::beq,
++  (conditional_branch_insn)&Assembler::bgt,
++  NULL, // BoolTest::overflow
++  (conditional_branch_insn)&Assembler::blt,
++  (conditional_branch_insn)&Assembler::bne,
++  (conditional_branch_insn)&Assembler::ble,
++  NULL, // BoolTest::no_overflow
++  (conditional_branch_insn)&Assembler::bge,
++
++  /* UNSIGNED branches */
++  (conditional_branch_insn)&Assembler::beq,
++  (conditional_branch_insn)&Assembler::bgtu,
++  NULL,
++  (conditional_branch_insn)&Assembler::bltu,
++  (conditional_branch_insn)&Assembler::bne,
++  (conditional_branch_insn)&Assembler::bleu,
++  NULL,
++  (conditional_branch_insn)&Assembler::bgeu
++};
++
++static float_conditional_branch_insn float_conditional_branches[] =
++{
++  /* FLOAT SHORT branches */
++  (float_conditional_branch_insn)&MacroAssembler::float_beq,
++  (float_conditional_branch_insn)&MacroAssembler::float_bgt,
++  NULL,  // BoolTest::overflow
++  (float_conditional_branch_insn)&MacroAssembler::float_blt,
++  (float_conditional_branch_insn)&MacroAssembler::float_bne,
++  (float_conditional_branch_insn)&MacroAssembler::float_ble,
++  NULL, // BoolTest::no_overflow
++  (float_conditional_branch_insn)&MacroAssembler::float_bge,
++
++  /* DOUBLE SHORT branches */
++  (float_conditional_branch_insn)&MacroAssembler::double_beq,
++  (float_conditional_branch_insn)&MacroAssembler::double_bgt,
++  NULL,
++  (float_conditional_branch_insn)&MacroAssembler::double_blt,
++  (float_conditional_branch_insn)&MacroAssembler::double_bne,
++  (float_conditional_branch_insn)&MacroAssembler::double_ble,
++  NULL,
++  (float_conditional_branch_insn)&MacroAssembler::double_bge
++};
++
++void MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) {
++  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])),
++         "invalid conditional branch index");
++  (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far);
 +}
 +
-+void MacroAssembler::load_reserved(Register addr,
-+                                   enum operand_size size,
-+                                   Assembler::Aqrl acquire) {
-+  switch (size) {
-+    case int64:
-+      lr_d(t0, addr, acquire);
-+      break;
-+    case int32:
-+      lr_w(t0, addr, acquire);
++// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use
++// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode().
++void MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) {
++  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])),
++         "invalid float conditional branch index");
++  int booltest_flag = cmpFlag & ~(MacroAssembler::double_branch_mask);
++  (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far,
++                                               (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true);
++}
++
++void MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
++  switch (cmpFlag) {
++    case BoolTest::eq:
++    case BoolTest::le:
++      beqz(op1, L, is_far);
 +      break;
-+    case uint32:
-+      lr_w(t0, addr, acquire);
-+      zero_extend(t0, t0, 32);
++    case BoolTest::ne:
++    case BoolTest::gt:
++      bnez(op1, L, is_far);
 +      break;
 +    default:
 +      ShouldNotReachHere();
 +  }
 +}
 +
-+void MacroAssembler::store_conditional(Register addr,
-+                                       Register new_val,
-+                                       enum operand_size size,
-+                                       Assembler::Aqrl release) {
-+  switch (size) {
-+    case int64:
-+      sc_d(t0, new_val, addr, release);
++void MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
++  switch (cmpFlag) {
++    case BoolTest::eq:
++      beqz(op1, L, is_far);
 +      break;
-+    case int32:
-+    case uint32:
-+      sc_w(t0, new_val, addr, release);
++    case BoolTest::ne:
++      bnez(op1, L, is_far);
 +      break;
 +    default:
 +      ShouldNotReachHere();
 +  }
 +}
 +
++void MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) {
++  Label L;
++  cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L);
++  mv(dst, src);
++  bind(L);
++}
 +
-+void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected,
-+                                                 Register new_val,
-+                                                 enum operand_size size,
-+                                                 Register tmp1, Register tmp2, Register tmp3) {
-+  assert(size == int8 || size == int16, "unsupported operand size");
-+
-+  Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3;
-+
-+  andi(shift, addr, 3);
-+  slli(shift, shift, 3);
-+
-+  andi(aligned_addr, addr, ~3);
++// Set dst to NaN if any NaN input.
++void MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2,
++                                  bool is_double, bool is_min) {
++  assert_different_registers(dst, src1, src2);
 +
-+  if (size == int8) {
-+    addi(mask, zr, 0xff);
++  Label Done;
++  fsflags(zr);
++  if (is_double) {
++    is_min ? fmin_d(dst, src1, src2)
++           : fmax_d(dst, src1, src2);
++    // Checking NaNs
++    flt_d(zr, src1, src2);
 +  } else {
-+    // size == int16 case
-+    addi(mask, zr, -1);
-+    zero_extend(mask, mask, 16);
++    is_min ? fmin_s(dst, src1, src2)
++           : fmax_s(dst, src1, src2);
++    // Checking NaNs
++    flt_s(zr, src1, src2);
 +  }
-+  sll(mask, mask, shift);
 +
-+  xori(not_mask, mask, -1);
++  frflags(t0);
++  beqz(t0, Done);
 +
-+  sll(expected, expected, shift);
-+  andr(expected, expected, mask);
++  // In case of NaNs
++  is_double ? fadd_d(dst, src1, src2)
++            : fadd_s(dst, src1, src2);
 +
-+  sll(new_val, new_val, shift);
-+  andr(new_val, new_val, mask);
++  bind(Done);
 +}
 +
-+// cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps.
-+// It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w,
-+// which are forced to work with 4-byte aligned address.
-+void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected,
-+                                          Register new_val,
-+                                          enum operand_size size,
-+                                          Assembler::Aqrl acquire, Assembler::Aqrl release,
-+                                          Register result, bool result_as_bool,
-+                                          Register tmp1, Register tmp2, Register tmp3) {
-+  Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
-+  assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
-+  cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
++#endif // COMPILER2
 +
-+  Label retry, fail, done;
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+new file mode 100644
+index 0000000000..c660bce437
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+@@ -0,0 +1,966 @@
++/*
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  bind(retry);
-+  lr_w(old, aligned_addr, acquire);
-+  andr(tmp, old, mask);
-+  bne(tmp, expected, fail);
++#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP
++#define CPU_RISCV_MACROASSEMBLER_RISCV_HPP
 +
-+  andr(tmp, old, not_mask);
-+  orr(tmp, tmp, new_val);
-+  sc_w(tmp, tmp, aligned_addr, release);
-+  bnez(tmp, retry);
++#include "asm/assembler.hpp"
++#include "metaprogramming/enableIf.hpp"
 +
-+  if (result_as_bool) {
-+    addi(result, zr, 1);
-+    j(done);
++// MacroAssembler extends Assembler by frequently used macros.
++//
++// Instructions for which a 'better' code sequence exists depending
++// on arguments should also go in here.
 +
-+    bind(fail);
-+    mv(result, zr);
++class MacroAssembler: public Assembler {
 +
-+    bind(done);
-+  } else {
-+    andr(tmp, old, mask);
++ public:
++  MacroAssembler(CodeBuffer* code) : Assembler(code) {
++  }
++  virtual ~MacroAssembler() {}
 +
-+    bind(fail);
-+    srl(result, tmp, shift);
++  void safepoint_poll(Label& slow_path);
++  void safepoint_poll_acquire(Label& slow_path);
 +
-+    if (size == int8) {
-+      sign_extend(result, result, 8);
-+    } else {
-+      // size == int16 case
-+      sign_extend(result, result, 16);
-+    }
++  // Biased locking support
++  // lock_reg and obj_reg must be loaded up with the appropriate values.
++  // swap_reg is killed.
++  // tmp_reg must be supplied and must not be rscratch1 or rscratch2
++  // Optional slow case is for implementations (interpreter and C1) which branch to
++  // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
++  // Returns offset of first potentially-faulting instruction for null
++  // check info (currently consumed only by C1). If
++  // swap_reg_contains_mark is true then returns -1 as it is assumed
++  // the calling code has already passed any potential faults.
++  int biased_locking_enter(Register lock_reg, Register obj_reg,
++                           Register swap_reg, Register tmp_reg,
++                           bool swap_reg_contains_mark,
++                           Label& done, Label* slow_case = NULL,
++                           BiasedLockingCounters* counters = NULL,
++                           Register flag = noreg);
++  void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done, Register flag = noreg);
++
++  // Helper functions for statistics gathering.
++  // Unconditional atomic increment.
++  void atomic_incw(Register counter_addr, Register tmp);
++  void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) {
++    la(tmp1, counter_addr);
++    atomic_incw(tmp1, tmp2);
 +  }
-+}
 +
-+// weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement
-+// the weak CAS stuff. The major difference is that it just failed when store conditional
-+// failed.
-+void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected,
-+                                               Register new_val,
-+                                               enum operand_size size,
-+                                               Assembler::Aqrl acquire, Assembler::Aqrl release,
-+                                               Register result,
-+                                               Register tmp1, Register tmp2, Register tmp3) {
-+  Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
-+  assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
-+  cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
++  // Alignment
++  void align(int modulus, int extra_offset = 0);
 +
-+  Label succ, fail, done;
++  // Stack frame creation/removal
++  // Note that SP must be updated to the right place before saving/restoring RA and FP
++  // because signal based thread suspend/resume could happen asynchronously.
++  void enter() {
++    addi(sp, sp, - 2 * wordSize);
++    sd(ra, Address(sp, wordSize));
++    sd(fp, Address(sp));
++    addi(fp, sp, 2 * wordSize);
++  }
 +
-+  lr_w(old, aligned_addr, acquire);
-+  andr(tmp, old, mask);
-+  bne(tmp, expected, fail);
++  void leave() {
++    addi(sp, fp, - 2 * wordSize);
++    ld(fp, Address(sp));
++    ld(ra, Address(sp, wordSize));
++    addi(sp, sp, 2 * wordSize);
++  }
 +
-+  andr(tmp, old, not_mask);
-+  orr(tmp, tmp, new_val);
-+  sc_w(tmp, tmp, aligned_addr, release);
-+  beqz(tmp, succ);
 +
-+  bind(fail);
-+  addi(result, zr, 1);
-+  j(done);
++  // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
++  // The pointer will be loaded into the thread register.
++  void get_thread(Register thread);
 +
-+  bind(succ);
-+  mv(result, zr);
++  // Support for VM calls
++  //
++  // It is imperative that all calls into the VM are handled via the call_VM macros.
++  // They make sure that the stack linkage is setup correctly. call_VM's correspond
++  // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
 +
-+  bind(done);
-+}
++  void call_VM(Register oop_result,
++               address entry_point,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1, Register arg_2,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1, Register arg_2, Register arg_3,
++               bool check_exceptions = true);
 +
-+void MacroAssembler::cmpxchg(Register addr, Register expected,
-+                             Register new_val,
-+                             enum operand_size size,
-+                             Assembler::Aqrl acquire, Assembler::Aqrl release,
-+                             Register result, bool result_as_bool) {
-+  assert(size != int8 && size != int16, "unsupported operand size");
++  // Overloadings with last_Java_sp
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               int number_of_arguments = 0,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, Register arg_2,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, Register arg_2, Register arg_3,
++               bool check_exceptions = true);
 +
-+  Label retry_load, done, ne_done;
-+  bind(retry_load);
-+  load_reserved(addr, size, acquire);
-+  bne(t0, expected, ne_done);
-+  store_conditional(addr, new_val, size, release);
-+  bnez(t0, retry_load);
++  void get_vm_result(Register oop_result, Register java_thread);
++  void get_vm_result_2(Register metadata_result, Register java_thread);
 +
-+  // equal, succeed
-+  if (result_as_bool) {
-+    li(result, 1);
-+  } else {
-+    mv(result, expected);
-+  }
-+  j(done);
++  // These always tightly bind to MacroAssembler::call_VM_leaf_base
++  // bypassing the virtual implementation
++  void call_VM_leaf(address entry_point,
++                    int number_of_arguments = 0);
++  void call_VM_leaf(address entry_point,
++                    Register arg_0);
++  void call_VM_leaf(address entry_point,
++                    Register arg_0, Register arg_1);
++  void call_VM_leaf(address entry_point,
++                    Register arg_0, Register arg_1, Register arg_2);
 +
-+  // not equal, failed
-+  bind(ne_done);
-+  if (result_as_bool) {
-+    mv(result, zr);
-+  } else {
-+    mv(result, t0);
-+  }
++  // These always tightly bind to MacroAssembler::call_VM_base
++  // bypassing the virtual implementation
++  void super_call_VM_leaf(address entry_point, Register arg_0);
++  void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1);
++  void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2);
++  void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3);
 +
-+  bind(done);
-+}
++  // last Java Frame (fills frame anchor)
++  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp);
++  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp);
++  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register tmp);
 +
-+void MacroAssembler::cmpxchg_weak(Register addr, Register expected,
-+                                  Register new_val,
-+                                  enum operand_size size,
-+                                  Assembler::Aqrl acquire, Assembler::Aqrl release,
-+                                  Register result) {
-+  Label fail, done, sc_done;
-+  load_reserved(addr, size, acquire);
-+  bne(t0, expected, fail);
-+  store_conditional(addr, new_val, size, release);
-+  beqz(t0, sc_done);
++  // thread in the default location (xthread)
++  void reset_last_Java_frame(bool clear_fp);
 +
-+  // fail
-+  bind(fail);
-+  li(result, 1);
-+  j(done);
++  virtual void call_VM_leaf_base(
++    address entry_point,                // the entry point
++    int     number_of_arguments,        // the number of arguments to pop after the call
++    Label*  retaddr = NULL
++  );
 +
-+  // sc_done
-+  bind(sc_done);
-+  mv(result, 0);
-+  bind(done);
-+}
++  virtual void call_VM_leaf_base(
++    address entry_point,                // the entry point
++    int     number_of_arguments,        // the number of arguments to pop after the call
++    Label&  retaddr) {
++    call_VM_leaf_base(entry_point, number_of_arguments, &retaddr);
++  }
 +
-+#define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE)                                              \
-+void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \
-+  prev = prev->is_valid() ? prev : zr;                                                      \
-+  if (incr.is_register()) {                                                                 \
-+    AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE));              \
-+  } else {                                                                                  \
-+    mv(t0, incr.as_constant());                                                             \
-+    AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE));                              \
-+  }                                                                                         \
-+  return;                                                                                   \
-+}
++  virtual void call_VM_base(           // returns the register containing the thread upon return
++    Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
++    Register java_thread,              // the thread if computed before     ; use noreg otherwise
++    Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
++    address  entry_point,              // the entry point
++    int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
++    bool     check_exceptions          // whether to check for pending exceptions after return
++  );
 +
-+ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed)
-+ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed)
-+ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl)
-+ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl)
++  void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions);
 +
-+#undef ATOMIC_OP
++  virtual void check_and_handle_earlyret(Register java_thread);
++  virtual void check_and_handle_popframe(Register java_thread);
 +
-+#define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE)                                       \
-+void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) {      \
-+  prev = prev->is_valid() ? prev : zr;                                               \
-+  AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE));                       \
-+  return;                                                                            \
-+}
++  void resolve_oop_handle(Register result, Register tmp = x15);
++  void resolve_jobject(Register value, Register thread, Register tmp);
 +
-+ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed)
-+ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed)
-+ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl)
-+ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl)
++  void movoop(Register dst, jobject obj, bool immediate = false);
++  void mov_metadata(Register dst, Metadata* obj);
++  void bang_stack_size(Register size, Register tmp);
++  void set_narrow_oop(Register dst, jobject obj);
++  void set_narrow_klass(Register dst, Klass* k);
 +
-+#undef ATOMIC_XCHG
++  void load_mirror(Register dst, Register method, Register tmp = x15);
++  void access_load_at(BasicType type, DecoratorSet decorators, Register dst,
++                      Address src, Register tmp1, Register thread_tmp);
++  void access_store_at(BasicType type, DecoratorSet decorators, Address dst,
++                       Register src, Register tmp1, Register thread_tmp);
++  void load_klass(Register dst, Register src);
++  void store_klass(Register dst, Register src);
++  void cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L);
 +
-+#define ATOMIC_XCHGU(OP1, OP2)                                                       \
-+void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) {     \
-+  atomic_##OP2(prev, newv, addr);                                                    \
-+  zero_extend(prev, prev, 32);                                                       \
-+  return;                                                                            \
-+}
++  void encode_klass_not_null(Register r);
++  void decode_klass_not_null(Register r);
++  void encode_klass_not_null(Register dst, Register src, Register tmp = xheapbase);
++  void decode_klass_not_null(Register dst, Register src, Register tmp = xheapbase);
++  void decode_heap_oop_not_null(Register r);
++  void decode_heap_oop_not_null(Register dst, Register src);
++  void decode_heap_oop(Register d, Register s);
++  void decode_heap_oop(Register r) { decode_heap_oop(r, r); }
++  void encode_heap_oop(Register d, Register s);
++  void encode_heap_oop(Register r) { encode_heap_oop(r, r); };
++  void load_heap_oop(Register dst, Address src, Register tmp1 = noreg,
++                     Register thread_tmp = noreg, DecoratorSet decorators = 0);
++  void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
++                              Register thread_tmp = noreg, DecoratorSet decorators = 0);
++  void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
++                      Register thread_tmp = noreg, DecoratorSet decorators = 0);
 +
-+ATOMIC_XCHGU(xchgwu, xchgw)
-+ATOMIC_XCHGU(xchgalwu, xchgalw)
++  void store_klass_gap(Register dst, Register src);
 +
-+#undef ATOMIC_XCHGU
++  // currently unimplemented
++  // Used for storing NULL. All other oop constants should be
++  // stored using routines that take a jobject.
++  void store_heap_oop_null(Address dst);
 +
-+void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
-+  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
-+  assert(CodeCache::find_blob(entry.target()) != NULL,
-+         "destination of far call not found in code cache");
-+  int32_t offset = 0;
-+  if (far_branches()) {
-+    // We can use auipc + jalr here because we know that the total size of
-+    // the code cache cannot exceed 2Gb.
-+    la_patchable(tmp, entry, offset);
-+    if (cbuf != NULL) { cbuf->set_insts_mark(); }
-+    jalr(x0, tmp, offset);
-+  } else {
-+    if (cbuf != NULL) { cbuf->set_insts_mark(); }
-+    j(entry);
-+  }
-+}
++  void load_prototype_header(Register dst, Register src);
 +
-+void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
-+  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
-+  assert(CodeCache::find_blob(entry.target()) != NULL,
-+         "destination of far call not found in code cache");
-+  int32_t offset = 0;
-+  if (far_branches()) {
-+    // We can use auipc + jalr here because we know that the total size of
-+    // the code cache cannot exceed 2Gb.
-+    la_patchable(tmp, entry, offset);
-+    if (cbuf != NULL) { cbuf->set_insts_mark(); }
-+    jalr(x1, tmp, offset); // link
-+  } else {
-+    if (cbuf != NULL) { cbuf->set_insts_mark(); }
-+    jal(entry); // link
-+  }
-+}
++  // This dummy is to prevent a call to store_heap_oop from
++  // converting a zero (linke NULL) into a Register by giving
++  // the compiler two choices it can't resolve
 +
-+void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
-+                                                   Register super_klass,
-+                                                   Register tmp_reg,
-+                                                   Label* L_success,
-+                                                   Label* L_failure,
-+                                                   Label* L_slow_path,
-+                                                   Register super_check_offset) {
-+  assert_different_registers(sub_klass, super_klass, tmp_reg);
-+  bool must_load_sco = (super_check_offset == noreg);
-+  if (must_load_sco) {
-+    assert(tmp_reg != noreg, "supply either a temp or a register offset");
-+  } else {
-+    assert_different_registers(sub_klass, super_klass, super_check_offset);
-+  }
++  void store_heap_oop(Address dst, void* dummy);
 +
-+  Label L_fallthrough;
-+  int label_nulls = 0;
-+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
-+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
-+  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
-+  assert(label_nulls <= 1, "at most one NULL in batch");
++  // Support for NULL-checks
++  //
++  // Generates code that causes a NULL OS exception if the content of reg is NULL.
++  // If the accessed location is M[reg + offset] and the offset is known, provide the
++  // offset. No explicit code generateion is needed if the offset is within a certain
++  // range (0 <= offset <= page_size).
 +
-+  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
-+  int sco_offset = in_bytes(Klass::super_check_offset_offset());
-+  Address super_check_offset_addr(super_klass, sco_offset);
++  virtual void null_check(Register reg, int offset = -1);
++  static bool needs_explicit_null_check(intptr_t offset);
++  static bool uses_implicit_null_check(void* address);
 +
-+  // Hacked jmp, which may only be used just before L_fallthrough.
-+#define final_jmp(label)                                                \
-+  if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
-+  else                            j(label)             /*omit semi*/
++  // idiv variant which deals with MINLONG as dividend and -1 as divisor
++  int corrected_idivl(Register result, Register rs1, Register rs2,
++                      bool want_remainder);
++  int corrected_idivq(Register result, Register rs1, Register rs2,
++                      bool want_remainder);
 +
-+  // If the pointers are equal, we are done (e.g., String[] elements).
-+  // This self-check enables sharing of secondary supertype arrays among
-+  // non-primary types such as array-of-interface. Otherwise, each such
-+  // type would need its own customized SSA.
-+  // We move this check to the front fo the fast path because many
-+  // type checks are in fact trivially successful in this manner,
-+  // so we get a nicely predicted branch right at the start of the check.
-+  beq(sub_klass, super_klass, *L_success);
++  // interface method calling
++  void lookup_interface_method(Register recv_klass,
++                               Register intf_klass,
++                               RegisterOrConstant itable_index,
++                               Register method_result,
++                               Register scan_tmp,
++                               Label& no_such_interface,
++                               bool return_method = true);
 +
-+  // Check the supertype display:
-+  if (must_load_sco) {
-+    lwu(tmp_reg, super_check_offset_addr);
-+    super_check_offset = tmp_reg;
-+  }
-+  add(t0, sub_klass, super_check_offset);
-+  Address super_check_addr(t0);
-+  ld(t0, super_check_addr); // load displayed supertype
++  // virtual method calling
++  // n.n. x86 allows RegisterOrConstant for vtable_index
++  void lookup_virtual_method(Register recv_klass,
++                             RegisterOrConstant vtable_index,
++                             Register method_result);
 +
-+  // Ths check has worked decisively for primary supers.
-+  // Secondary supers are sought in the super_cache ('super_cache_addr').
-+  // (Secondary supers are interfaces and very deeply nested subtypes.)
-+  // This works in the same check above because of a tricky aliasing
-+  // between the super_Cache and the primary super dispaly elements.
-+  // (The 'super_check_addr' can address either, as the case requires.)
-+  // Note that the cache is updated below if it does not help us find
-+  // what we need immediately.
-+  // So if it was a primary super, we can just fail immediately.
-+  // Otherwise, it's the slow path for us (no success at this point).
++  // Form an addres from base + offset in Rd. Rd my or may not
++  // actually be used: you must use the Address that is returned. It
++  // is up to you to ensure that the shift provided mathces the size
++  // of your data.
++  Address form_address(Register Rd, Register base, long byte_offset);
 +
-+  beq(super_klass, t0, *L_success);
-+  mv(t1, sc_offset);
-+  if (L_failure == &L_fallthrough) {
-+    beq(super_check_offset, t1, *L_slow_path);
-+  } else {
-+    bne(super_check_offset, t1, *L_failure, /* is_far */ true);
-+    final_jmp(*L_slow_path);
-+  }
++  // allocation
++  void tlab_allocate(
++    Register obj,                   // result: pointer to object after successful allocation
++    Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,     // object size in bytes if   known at compile time
++    Register tmp1,                  // temp register
++    Register tmp2,                  // temp register
++    Label&   slow_case,             // continuation point of fast allocation fails
++    bool is_far = false
++  );
 +
-+  bind(L_fallthrough);
++  void eden_allocate(
++    Register obj,                   // result: pointer to object after successful allocation
++    Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,     // object size in bytes if   known at compile time
++    Register tmp,                   // temp register
++    Label&   slow_case,             // continuation point if fast allocation fails
++    bool is_far = false
++  );
 +
-+#undef final_jmp
-+}
++  // Test sub_klass against super_klass, with fast and slow paths.
 +
-+// Scans count pointer sized words at [addr] for occurence of value,
-+// generic
-+void MacroAssembler::repne_scan(Register addr, Register value, Register count,
-+                                Register tmp) {
-+  Label Lloop, Lexit;
-+  beqz(count, Lexit);
-+  bind(Lloop);
-+  ld(tmp, addr);
-+  beq(value, tmp, Lexit);
-+  add(addr, addr, wordSize);
-+  sub(count, count, 1);
-+  bnez(count, Lloop);
-+  bind(Lexit);
-+}
++  // The fast path produces a tri-state answer: yes / no / maybe-slow.
++  // One of the three labels can be NULL, meaning take the fall-through.
++  // If super_check_offset is -1, the value is loaded up from super_klass.
++  // No registers are killed, except tmp_reg
++  void check_klass_subtype_fast_path(Register sub_klass,
++                                     Register super_klass,
++                                     Register tmp_reg,
++                                     Label* L_success,
++                                     Label* L_failure,
++                                     Label* L_slow_path,
++                                     Register super_check_offset = noreg);
 +
-+void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
-+                                                   Register super_klass,
-+                                                   Register tmp1_reg,
-+                                                   Register tmp2_reg,
-+                                                   Label* L_success,
-+                                                   Label* L_failure) {
-+  assert_different_registers(sub_klass, super_klass, tmp1_reg);
-+  if (tmp2_reg != noreg) {
-+    assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0);
-+  }
-+#define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg)
++  // The reset of the type cehck; must be wired to a corresponding fast path.
++  // It does not repeat the fast path logic, so don't use it standalone.
++  // The tmp1_reg and tmp2_reg can be noreg, if no temps are avaliable.
++  // Updates the sub's secondary super cache as necessary.
++  void check_klass_subtype_slow_path(Register sub_klass,
++                                     Register super_klass,
++                                     Register tmp1_reg,
++                                     Register tmp2_reg,
++                                     Label* L_success,
++                                     Label* L_failure);
 +
-+  Label L_fallthrough;
-+  int label_nulls = 0;
-+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
-+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
++  void check_klass_subtype(Register sub_klass,
++                           Register super_klass,
++                           Register tmp_reg,
++                           Label& L_success);
 +
-+  assert(label_nulls <= 1, "at most one NULL in the batch");
++  Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
 +
-+  // A couple of usefule fields in sub_klass:
-+  int ss_offset = in_bytes(Klass::secondary_supers_offset());
-+  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
-+  Address secondary_supers_addr(sub_klass, ss_offset);
-+  Address super_cache_addr(     sub_klass, sc_offset);
++  // only if +VerifyOops
++  void verify_oop(Register reg, const char* s = "broken oop");
++  void verify_oop_addr(Address addr, const char* s = "broken oop addr");
 +
-+  BLOCK_COMMENT("check_klass_subtype_slow_path");
++  void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {}
++  void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {}
 +
-+  // Do a linear scan of the secondary super-klass chain.
-+  // This code is rarely used, so simplicity is a virtue here.
-+  // The repne_scan instruction uses fixed registers, which we must spill.
-+  // Don't worry too much about pre-existing connecitons with the input regs.
++#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
++#define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
 +
-+  assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super)
-+  assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter)
++  // A more convenient access to fence for our purposes
++  // We used four bit to indicate the read and write bits in the predecessors and successors,
++  // and extended i for r, o for w if UseConservativeFence enabled.
++  enum Membar_mask_bits {
++    StoreStore = 0b0101,               // (pred = ow   + succ =   ow)
++    LoadStore  = 0b1001,               // (pred = ir   + succ =   ow)
++    StoreLoad  = 0b0110,               // (pred = ow   + succ =   ir)
++    LoadLoad   = 0b1010,               // (pred = ir   + succ =   ir)
++    AnyAny     = LoadStore | StoreLoad // (pred = iorw + succ = iorw)
++  };
 +
-+  RegSet pushed_registers;
-+  if (!IS_A_TEMP(x12)) {
-+    pushed_registers += x12;
-+  }
-+  if (!IS_A_TEMP(x15)) {
-+    pushed_registers += x15;
-+  }
++  void membar(uint32_t order_constraint);
 +
-+  if (super_klass != x10 || UseCompressedOops) {
-+    if (!IS_A_TEMP(x10)) {
-+      pushed_registers += x10;
++  static void membar_mask_to_pred_succ(uint32_t order_constraint, uint32_t& predecessor, uint32_t& successor) {
++    predecessor = (order_constraint >> 2) & 0x3;
++    successor = order_constraint & 0x3;
++
++    // extend rw -> iorw:
++    // 01(w) -> 0101(ow)
++    // 10(r) -> 1010(ir)
++    // 11(rw)-> 1111(iorw)
++    if (UseConservativeFence) {
++      predecessor |= predecessor << 2;
++      successor |= successor << 2;
 +    }
 +  }
 +
-+  push_reg(pushed_registers, sp);
++  static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) {
++    return ((predecessor & 0x3) << 2) | (successor & 0x3);
++  }
 +
-+  // Get super_klass value into x10 (even if it was in x15 or x12)
-+  mv(x10, super_klass);
++  // prints msg, dumps registers and stops execution
++  void stop(const char* msg);
 +
-+#ifndef PRODUCT
-+  mv(t1, (address)&SharedRuntime::_partial_subtype_ctr);
-+  Address pst_counter_addr(t1);
-+  ld(t0, pst_counter_addr);
-+  add(t0, t0, 1);
-+  sd(t0, pst_counter_addr);
-+#endif // PRODUCT
++  static void debug64(char* msg, int64_t pc, int64_t regs[]);
 +
-+  // We will consult the secondary-super array.
-+  ld(x15, secondary_supers_addr);
-+  // Load the array length.
-+  lwu(x12, Address(x15, Array<Klass*>::length_offset_in_bytes()));
-+  // Skip to start of data.
-+  add(x15, x15, Array<Klass*>::base_offset_in_bytes());
++  void unimplemented(const char* what = "");
 +
-+  // Set t0 to an obvious invalid value, falling through by default
-+  li(t0, -1);
-+  // Scan X12 words at [X15] for an occurrence of X10.
-+  repne_scan(x15, x10, x12, t0);
++  void should_not_reach_here() { stop("should not reach here"); }
 +
-+  // pop will restore x10, so we should use a temp register to keep its value
-+  mv(t1, x10);
++  static address target_addr_for_insn(address insn_addr);
 +
-+  // Unspill the temp registers:
-+  pop_reg(pushed_registers, sp);
++  // Required platform-specific helpers for Label::patch_instructions.
++  // They _shadow_ the declarations in AbstractAssembler, which are undefined.
++  static int pd_patch_instruction_size(address branch, address target);
++  static void pd_patch_instruction(address branch, address target, const char* file = NULL, int line = 0) {
++    pd_patch_instruction_size(branch, target);
++  }
++  static address pd_call_destination(address branch) {
++    return target_addr_for_insn(branch);
++  }
 +
-+  bne(t1, t0, *L_failure);
++  static int patch_oop(address insn_addr, address o);
++  address emit_trampoline_stub(int insts_call_instruction_offset, address target);
++  void emit_static_call_stub();
 +
-+  // Success. Cache the super we found an proceed in triumph.
-+  sd(super_klass, super_cache_addr);
++  // The following 4 methods return the offset of the appropriate move instruction
 +
-+  if (L_success != &L_fallthrough) {
-+    j(*L_success);
-+  }
++  // Support for fast byte/short loading with zero extension (depending on particular CPU)
++  int load_unsigned_byte(Register dst, Address src);
++  int load_unsigned_short(Register dst, Address src);
 +
-+#undef IS_A_TEMP
++  // Support for fast byte/short loading with sign extension (depending on particular CPU)
++  int load_signed_byte(Register dst, Address src);
++  int load_signed_short(Register dst, Address src);
 +
-+  bind(L_fallthrough);
-+}
++  // Load and store values by size and signed-ness
++  void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
++  void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
 +
-+// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
-+void MacroAssembler::tlab_allocate(Register obj,
-+                                   Register var_size_in_bytes,
-+                                   int con_size_in_bytes,
-+                                   Register tmp1,
-+                                   Register tmp2,
-+                                   Label& slow_case,
-+                                   bool is_far) {
-+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+  bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far);
-+}
++ public:
++  // Standard pseudoinstruction
++  void nop();
++  void mv(Register Rd, Register Rs);
++  void notr(Register Rd, Register Rs);
++  void neg(Register Rd, Register Rs);
++  void negw(Register Rd, Register Rs);
++  void sext_w(Register Rd, Register Rs);
++  void zext_b(Register Rd, Register Rs);
++  void seqz(Register Rd, Register Rs);          // set if = zero
++  void snez(Register Rd, Register Rs);          // set if != zero
++  void sltz(Register Rd, Register Rs);          // set if < zero
++  void sgtz(Register Rd, Register Rs);          // set if > zero
 +
-+// Defines obj, preserves var_size_in_bytes
-+void MacroAssembler::eden_allocate(Register obj,
-+                                   Register var_size_in_bytes,
-+                                   int con_size_in_bytes,
-+                                   Register tmp,
-+                                   Label& slow_case,
-+                                   bool is_far) {
-+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+  bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp, slow_case, is_far);
-+}
++  // Float pseudoinstruction
++  void fmv_s(FloatRegister Rd, FloatRegister Rs);
++  void fabs_s(FloatRegister Rd, FloatRegister Rs);    // single-precision absolute value
++  void fneg_s(FloatRegister Rd, FloatRegister Rs);
 +
++  // Double pseudoinstruction
++  void fmv_d(FloatRegister Rd, FloatRegister Rs);
++  void fabs_d(FloatRegister Rd, FloatRegister Rs);
++  void fneg_d(FloatRegister Rd, FloatRegister Rs);
 +
-+// get_thread() can be called anywhere inside generated code so we
-+// need to save whatever non-callee save context might get clobbered
-+// by the call to Thread::current() or, indeed, the call setup code.
-+void MacroAssembler::get_thread(Register thread) {
-+  // save all call-clobbered regs except thread
-+  RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) +
-+                      RegSet::range(x28, x31) + ra - thread;
-+  push_reg(saved_regs, sp);
++  // Pseudoinstruction for control and status register
++  void rdinstret(Register Rd);                  // read instruction-retired counter
++  void rdcycle(Register Rd);                    // read cycle counter
++  void rdtime(Register Rd);                     // read time
++  void csrr(Register Rd, unsigned csr);         // read csr
++  void csrw(unsigned csr, Register Rs);         // write csr
++  void csrs(unsigned csr, Register Rs);         // set bits in csr
++  void csrc(unsigned csr, Register Rs);         // clear bits in csr
++  void csrwi(unsigned csr, unsigned imm);
++  void csrsi(unsigned csr, unsigned imm);
++  void csrci(unsigned csr, unsigned imm);
++  void frcsr(Register Rd);                      // read float-point csr
++  void fscsr(Register Rd, Register Rs);         // swap float-point csr
++  void fscsr(Register Rs);                      // write float-point csr
++  void frrm(Register Rd);                       // read float-point rounding mode
++  void fsrm(Register Rd, Register Rs);          // swap float-point rounding mode
++  void fsrm(Register Rs);                       // write float-point rounding mode
++  void fsrmi(Register Rd, unsigned imm);
++  void fsrmi(unsigned imm);
++  void frflags(Register Rd);                    // read float-point exception flags
++  void fsflags(Register Rd, Register Rs);       // swap float-point exception flags
++  void fsflags(Register Rs);                    // write float-point exception flags
++  void fsflagsi(Register Rd, unsigned imm);
++  void fsflagsi(unsigned imm);
 +
-+  int32_t offset = 0;
-+  movptr_with_offset(ra, CAST_FROM_FN_PTR(address, Thread::current), offset);
-+  jalr(ra, ra, offset);
-+  if (thread != x10) {
-+    mv(thread, x10);
-+  }
++  void beqz(Register Rs, const address &dest);
++  void bnez(Register Rs, const address &dest);
++  void blez(Register Rs, const address &dest);
++  void bgez(Register Rs, const address &dest);
++  void bltz(Register Rs, const address &dest);
++  void bgtz(Register Rs, const address &dest);
++  void la(Register Rd, Label &label);
++  void la(Register Rd, const address &dest);
++  void la(Register Rd, const Address &adr);
++  //label
++  void beqz(Register Rs, Label &l, bool is_far = false);
++  void bnez(Register Rs, Label &l, bool is_far = false);
++  void blez(Register Rs, Label &l, bool is_far = false);
++  void bgez(Register Rs, Label &l, bool is_far = false);
++  void bltz(Register Rs, Label &l, bool is_far = false);
++  void bgtz(Register Rs, Label &l, bool is_far = false);
++  void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 +
-+  // restore pushed registers
-+  pop_reg(saved_regs, sp);
-+}
++  void push_reg(RegSet regs, Register stack) { if (regs.bits()) { push_reg(regs.bits(), stack); } }
++  void pop_reg(RegSet regs, Register stack) { if (regs.bits()) { pop_reg(regs.bits(), stack); } }
++  void push_reg(Register Rs);
++  void pop_reg(Register Rd);
++  int  push_reg(unsigned int bitset, Register stack);
++  int  pop_reg(unsigned int bitset, Register stack);
 +
-+void MacroAssembler::load_byte_map_base(Register reg) {
-+  CardTable::CardValue* byte_map_base =
-+    ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
-+  li(reg, (uint64_t)byte_map_base);
-+}
++  // Push and pop everything that might be clobbered by a native
++  // runtime call except t0 and t1. (They are always
++  // temporary registers, so we don't have to protect them.)
++  // Additional registers can be excluded in a passed RegSet.
++  void push_call_clobbered_registers_except(RegSet exclude);
++  void pop_call_clobbered_registers_except(RegSet exclude);
 +
-+void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) {
-+  relocInfo::relocType rtype = dest.rspec().reloc()->type();
-+  unsigned long low_address = (uintptr_t)CodeCache::low_bound();
-+  unsigned long high_address = (uintptr_t)CodeCache::high_bound();
-+  unsigned long dest_address = (uintptr_t)dest.target();
-+  long offset_low = dest_address - low_address;
-+  long offset_high = dest_address - high_address;
++  void push_call_clobbered_registers() {
++    push_call_clobbered_registers_except(RegSet());
++  }
++  void pop_call_clobbered_registers() {
++    pop_call_clobbered_registers_except(RegSet());
++  }
 +
-+  assert(is_valid_riscv64_address(dest.target()), "bad address");
-+  assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address");
++  void pusha();
++  void popa();
++  void push_CPU_state();
++  void pop_CPU_state();
 +
-+  InstructionMark im(this);
-+  code_section()->relocate(inst_mark(), dest.rspec());
-+  // RISC-V doesn't compute a page-aligned address, in order to partially
-+  // compensate for the use of *signed* offsets in its base+disp12
-+  // addressing mode (RISC-V's PC-relative reach remains asymmetric
-+  // [-(2G + 2K), 2G - 2k).
-+  if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) {
-+    int64_t distance = dest.target() - pc();
-+    auipc(reg1, (int32_t)distance + 0x800);
-+    offset = ((int32_t)distance << 20) >> 20;
-+  } else {
-+    movptr_with_offset(reg1, dest.target(), offset);
++  // if heap base register is used - reinit it with the correct value
++  void reinit_heapbase();
++
++  void bind(Label& L) {
++    Assembler::bind(L);
++    // fences across basic blocks should not be merged
++    code()->clear_last_insn();
 +  }
-+}
 +
-+void MacroAssembler::build_frame(int framesize) {
-+  assert(framesize >= 2, "framesize must include space for FP/RA");
-+  assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
-+  sub(sp, sp, framesize);
-+  sd(fp, Address(sp, framesize - 2 * wordSize));
-+  sd(ra, Address(sp, framesize - wordSize));
-+  if (PreserveFramePointer) { add(fp, sp, framesize); }
-+  verify_cross_modify_fence_not_required();
-+}
++  // mv
++  inline void mv(Register Rd, int imm64)                { li(Rd, (int64_t)imm64); }
++  inline void mv(Register Rd, long imm64)               { li(Rd, (int64_t)imm64); }
++  inline void mv(Register Rd, long long imm64)          { li(Rd, (int64_t)imm64); }
++  inline void mv(Register Rd, unsigned int imm64)       { li(Rd, (int64_t)imm64); }
++  inline void mv(Register Rd, unsigned long imm64)      { li(Rd, (int64_t)imm64); }
++  inline void mv(Register Rd, unsigned long long imm64) { li(Rd, (int64_t)imm64); }
 +
-+void MacroAssembler::remove_frame(int framesize) {
-+  assert(framesize >= 2, "framesize must include space for FP/RA");
-+  assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
-+  ld(fp, Address(sp, framesize - 2 * wordSize));
-+  ld(ra, Address(sp, framesize - wordSize));
-+  add(sp, sp, framesize);
-+}
++  inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); }
 +
-+void MacroAssembler::reserved_stack_check() {
-+    // testing if reserved zone needs to be enabled
-+    Label no_reserved_zone_enabling;
++  void mv(Register Rd, Address dest);
++  void mv(Register Rd, address dest);
++  void mv(Register Rd, RegisterOrConstant src);
 +
-+    ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset()));
-+    bltu(sp, t0, no_reserved_zone_enabling);
++  // logic
++  void andrw(Register Rd, Register Rs1, Register Rs2);
++  void orrw(Register Rd, Register Rs1, Register Rs2);
++  void xorrw(Register Rd, Register Rs1, Register Rs2);
 +
-+    enter();   // RA and FP are live.
-+    mv(c_rarg0, xthread);
-+    int32_t offset = 0;
-+    la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)), offset);
-+    jalr(x1, t0, offset);
-+    leave();
++  // revb
++  void revb_h_h(Register Rd, Register Rs, Register tmp = t0);                           // reverse bytes in halfword in lower 16 bits, sign-extend
++  void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);      // reverse bytes in lower word, sign-extend
++  void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0);                         // reverse bytes in halfword in lower 16 bits, zero-extend
++  void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);    // reverse bytes in halfwords in lower 32 bits, zero-extend
++  void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);  // reverse bytes in upper 16 bits (48:63) and move to lower
++  void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);         // reverse bytes in each halfword
++  void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);         // reverse bytes in each word
++  void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);          // reverse bytes in doubleword
 +
-+    // We have already removed our own frame.
-+    // throw_delayed_StackOverflowError will think that it's been
-+    // called by our caller.
-+    offset = 0;
-+    la_patchable(t0, RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()), offset);
-+    jalr(x0, t0, offset);
-+    should_not_reach_here();
++  void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0);
++  void andi(Register Rd, Register Rn, int64_t imm, Register tmp = t0);
++  void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1);
 +
-+    bind(no_reserved_zone_enabling);
-+}
++  void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail);
++  void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail);
++  void cmpxchg(Register addr, Register expected,
++               Register new_val,
++               enum operand_size size,
++               Assembler::Aqrl acquire, Assembler::Aqrl release,
++               Register result, bool result_as_bool = false);
++  void cmpxchg_weak(Register addr, Register expected,
++                    Register new_val,
++                    enum operand_size size,
++                    Assembler::Aqrl acquire, Assembler::Aqrl release,
++                    Register result);
++  void cmpxchg_narrow_value_helper(Register addr, Register expected,
++                                   Register new_val,
++                                   enum operand_size size,
++                                   Register tmp1, Register tmp2, Register tmp3);
++  void cmpxchg_narrow_value(Register addr, Register expected,
++                            Register new_val,
++                            enum operand_size size,
++                            Assembler::Aqrl acquire, Assembler::Aqrl release,
++                            Register result, bool result_as_bool,
++                            Register tmp1, Register tmp2, Register tmp3);
++  void weak_cmpxchg_narrow_value(Register addr, Register expected,
++                                 Register new_val,
++                                 enum operand_size size,
++                                 Assembler::Aqrl acquire, Assembler::Aqrl release,
++                                 Register result,
++                                 Register tmp1, Register tmp2, Register tmp3);
 +
-+// Move the address of the polling page into dest.
-+void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) {
-+  ld(dest, Address(xthread, JavaThread::polling_page_offset()));
-+}
++  void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
++  void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
++  void atomic_addal(Register prev, RegisterOrConstant incr, Register addr);
++  void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr);
 +
-+// Read the polling page.  The address of the polling page must
-+// already be in r.
-+address MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) {
-+  address mark;
-+  {
-+    InstructionMark im(this);
-+    code_section()->relocate(inst_mark(), rtype);
-+    lwu(zr, Address(r, offset));
-+    mark = inst_mark();
-+  }
-+  verify_cross_modify_fence_not_required();
-+  return mark;
-+}
++  void atomic_xchg(Register prev, Register newv, Register addr);
++  void atomic_xchgw(Register prev, Register newv, Register addr);
++  void atomic_xchgal(Register prev, Register newv, Register addr);
++  void atomic_xchgalw(Register prev, Register newv, Register addr);
++  void atomic_xchgwu(Register prev, Register newv, Register addr);
++  void atomic_xchgalwu(Register prev, Register newv, Register addr);
 +
-+void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
-+#ifdef ASSERT
-+  {
-+    ThreadInVMfromUnknown tiv;
-+    assert (UseCompressedOops, "should only be used for compressed oops");
-+    assert (Universe::heap() != NULL, "java heap should be initialized");
-+    assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
-+    assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
++  static bool far_branches() {
++    return ReservedCodeCacheSize > branch_range;
 +  }
-+#endif
-+  int oop_index = oop_recorder()->find_index(obj);
-+  InstructionMark im(this);
-+  RelocationHolder rspec = oop_Relocation::spec(oop_index);
-+  code_section()->relocate(inst_mark(), rspec);
-+  li32(dst, 0xDEADBEEF);
-+  zero_extend(dst, dst, 32);
-+}
-+
-+void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
-+  assert (UseCompressedClassPointers, "should only be used for compressed headers");
-+  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
-+  int index = oop_recorder()->find_index(k);
-+  assert(!Universe::heap()->is_in(k), "should not be an oop");
-+
-+  InstructionMark im(this);
-+  RelocationHolder rspec = metadata_Relocation::spec(index);
-+  code_section()->relocate(inst_mark(), rspec);
-+  narrowKlass nk = CompressedKlassPointers::encode(k);
-+  li32(dst, nk);
-+  zero_extend(dst, dst, 32);
-+}
 +
-+// Maybe emit a call via a trampoline.  If the code cache is small
-+// trampolines won't be emitted.
-+address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) {
-+  assert(JavaThread::current()->is_Compiler_thread(), "just checking");
-+  assert(entry.rspec().type() == relocInfo::runtime_call_type ||
-+         entry.rspec().type() == relocInfo::opt_virtual_call_type ||
-+         entry.rspec().type() == relocInfo::static_call_type ||
-+         entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
++  // Jumps that can reach anywhere in the code cache.
++  // Trashes tmp.
++  void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0);
++  void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0);
 +
-+  // We need a trampoline if branches are far.
-+  if (far_branches()) {
-+    bool in_scratch_emit_size = false;
-+#ifdef COMPILER2
-+    // We don't want to emit a trampoline if C2 is generating dummy
-+    // code during its branch shortening phase.
-+    CompileTask* task = ciEnv::current()->task();
-+    in_scratch_emit_size =
-+      (task != NULL && is_c2_compile(task->comp_level()) &&
-+       Compile::current()->output()->in_scratch_emit_size());
-+#endif
-+    if (!in_scratch_emit_size) {
-+      address stub = emit_trampoline_stub(offset(), entry.target());
-+      if (stub == NULL) {
-+        postcond(pc() == badAddress);
-+        return NULL; // CodeCache is full
-+      }
++  static int far_branch_size() {
++    if (far_branches()) {
++      return 2 * 4;  // auipc + jalr, see far_call() & far_jump()
++    } else {
++      return 4;
 +    }
 +  }
 +
-+  if (cbuf != NULL) { cbuf->set_insts_mark(); }
-+  relocate(entry.rspec());
-+  if (!far_branches()) {
-+    jal(entry.target());
-+  } else {
-+    jal(pc());
-+  }
-+  // just need to return a non-null address
-+  postcond(pc() != badAddress);
-+  return pc();
-+}
++  void load_byte_map_base(Register reg);
 +
-+address MacroAssembler::ic_call(address entry, jint method_index) {
-+  RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
-+  movptr(t1, (address)Universe::non_oop_word());
-+  assert_cond(entry != NULL);
-+  return trampoline_call(Address(entry, rh));
-+}
++  void bang_stack_with_offset(int offset) {
++    // stack grows down, caller passes positive offset
++    assert(offset > 0, "must bang with negative offset");
++    sub(t0, sp, offset);
++    sd(zr, Address(t0));
++  }
 +
-+// Emit a trampoline stub for a call to a target which is too far away.
-+//
-+// code sequences:
-+//
-+// call-site:
-+//   branch-and-link to <destination> or <trampoline stub>
-+//
-+// Related trampoline stub for this call site in the stub section:
-+//   load the call target from the constant pool
-+//   branch (RA still points to the call site above)
++  void la_patchable(Register reg1, const Address &dest, int32_t &offset);
 +
-+address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
-+                                             address dest) {
-+  address stub = start_a_stub(NativeInstruction::instruction_size
-+                            + NativeCallTrampolineStub::instruction_size);
-+  if (stub == NULL) {
-+    return NULL;  // CodeBuffer::expand failed
++  virtual void _call_Unimplemented(address call_site) {
++    mv(t1, call_site);
 +  }
 +
-+  // Create a trampoline stub relocation which relates this trampoline stub
-+  // with the call instruction at insts_call_instruction_offset in the
-+  // instructions code-section.
-+
-+  // make sure 4 byte aligned here, so that the destination address would be
-+  // 8 byte aligned after 3 intructions
-+  // when we reach here we may get a 2-byte alignment so need to align it
-+  align(wordSize, NativeCallTrampolineStub::data_offset);
++  #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
 +
-+  relocate(trampoline_stub_Relocation::spec(code()->insts()->start() +
-+                                            insts_call_instruction_offset));
-+  const int stub_start_offset = offset();
++  // Frame creation and destruction shared between JITs.
++  void build_frame(int framesize);
++  void remove_frame(int framesize);
 +
-+  // Now, create the trampoline stub's code:
-+  // - load the call
-+  // - call
-+  Label target;
-+  ld(t0, target);  // auipc + ld
-+  jr(t0);          // jalr
-+  bind(target);
-+  assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
-+         "should be");
-+  assert(offset() % wordSize == 0, "bad alignment");
-+  emit_int64((intptr_t)dest);
++  void reserved_stack_check();
 +
-+  const address stub_start_addr = addr_at(stub_start_offset);
++  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
++                                                Register tmp,
++                                                int offset);
 +
-+  assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
++  void get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype);
++  void read_polling_page(Register r, address page, relocInfo::relocType rtype);
++  void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
 +
-+  end_a_stub();
-+  return stub_start_addr;
-+}
++  address trampoline_call(Address entry, CodeBuffer* cbuf = NULL);
++  address ic_call(address entry, jint method_index = 0);
 +
-+Address MacroAssembler::add_memory_helper(const Address dst) {
-+  switch (dst.getMode()) {
-+    case Address::base_plus_offset:
-+      // This is the expected mode, although we allow all the other
-+      // forms below.
-+      return form_address(t1, dst.base(), dst.offset());
-+    default:
-+      la(t1, dst);
-+      return Address(t1);
-+  }
-+}
++  void add_memory_int64(const Address dst, int64_t imm);
++  void add_memory_int32(const Address dst, int32_t imm);
 +
-+void MacroAssembler::add_memory_int64(const Address dst, int64_t imm) {
-+  Address adr = add_memory_helper(dst);
-+  assert_different_registers(adr.base(), t0);
-+  ld(t0, adr);
-+  addi(t0, t0, imm);
-+  sd(t0, adr);
-+}
++  void cmpptr(Register src1, Address src2, Label& equal);
 +
-+void MacroAssembler::add_memory_int32(const Address dst, int32_t imm) {
-+  Address adr = add_memory_helper(dst);
-+  assert_different_registers(adr.base(), t0);
-+  lwu(t0, adr);
-+  addiw(t0, t0, imm);
-+  sw(t0, adr);
-+}
++  void compute_index(Register str1, Register trailing_zeros, Register match_mask,
++                     Register result, Register char_tmp, Register tmp,
++                     bool haystack_isL);
++  void compute_match_mask(Register src, Register pattern, Register match_mask,
++                          Register mask1, Register mask2);
 +
-+void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) {
-+  assert_different_registers(src1, t0);
-+  int32_t offset;
-+  la_patchable(t0, src2, offset);
-+  ld(t0, Address(t0, offset));
-+  beq(src1, t0, equal);
-+}
++#ifdef COMPILER2
++  void mul_add(Register out, Register in, Register offset,
++               Register len, Register k, Register tmp);
++  void cad(Register dst, Register src1, Register src2, Register carry);
++  void cadc(Register dst, Register src1, Register src2, Register carry);
++  void adc(Register dst, Register src1, Register src2, Register carry);
++  void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
++                       Register src1, Register src2, Register carry);
++  void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
++                             Register y, Register y_idx, Register z,
++                             Register carry, Register product,
++                             Register idx, Register kdx);
++  void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
++                             Register y, Register y_idx, Register z,
++                             Register carry, Register product,
++                             Register idx, Register kdx);
++  void multiply_128_x_128_loop(Register y, Register z,
++                               Register carry, Register carry2,
++                               Register idx, Register jdx,
++                               Register yz_idx1, Register yz_idx2,
++                               Register tmp, Register tmp3, Register tmp4,
++                               Register tmp6, Register product_hi);
++  void multiply_to_len(Register x, Register xlen, Register y, Register ylen,
++                       Register z, Register zlen,
++                       Register tmp1, Register tmp2, Register tmp3, Register tmp4,
++                       Register tmp5, Register tmp6, Register product_hi);
++#endif
 +
-+void MacroAssembler::load_method_holder_cld(Register result, Register method) {
-+  load_method_holder(result, method);
-+  ld(result, Address(result, InstanceKlass::class_loader_data_offset()));
-+}
++  void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
++  void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
 +
-+void MacroAssembler::load_method_holder(Register holder, Register method) {
-+  ld(holder, Address(method, Method::const_offset()));                      // ConstMethod*
-+  ld(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
-+  ld(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
-+}
++  void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1);
 +
-+// string indexof
-+// compute index by trailing zeros
-+void MacroAssembler::compute_index(Register haystack, Register trailing_zeros,
-+                                   Register match_mask, Register result,
-+                                   Register ch2, Register tmp,
-+                                   bool haystack_isL)
-+{
-+  int haystack_chr_shift = haystack_isL ? 0 : 1;
-+  srl(match_mask, match_mask, trailing_zeros);
-+  srli(match_mask, match_mask, 1);
-+  srli(tmp, trailing_zeros, LogBitsPerByte);
-+  if (!haystack_isL) andi(tmp, tmp, 0xE);
-+  add(haystack, haystack, tmp);
-+  ld(ch2, Address(haystack));
-+  if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift);
-+  add(result, result, tmp);
-+}
++  void zero_words(Register base, u_int64_t cnt);
++  address zero_words(Register ptr, Register cnt);
++  void fill_words(Register base, Register cnt, Register value);
++  void zero_memory(Register addr, Register len, Register tmp);
 +
-+// string indexof
-+// Find pattern element in src, compute match mask,
-+// only the first occurrence of 0x80/0x8000 at low bits is the valid match index
-+// match mask patterns and corresponding indices would be like:
-+// - 0x8080808080808080 (Latin1)
-+// -   7 6 5 4 3 2 1 0  (match index)
-+// - 0x8000800080008000 (UTF16)
-+// -   3   2   1   0    (match index)
-+void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask,
-+                                        Register mask1, Register mask2)
-+{
-+  xorr(src, pattern, src);
-+  sub(match_mask, src, mask1);
-+  orr(src, src, mask2);
-+  notr(src, src);
-+  andr(match_mask, match_mask, src);
-+}
++  // shift left by shamt and add
++  void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt);
 +
-+#ifdef COMPILER2
-+// Code for BigInteger::mulAdd instrinsic
-+// out     = x10
-+// in      = x11
-+// offset  = x12  (already out.length-offset)
-+// len     = x13
-+// k       = x14
-+// tmp     = x28
-+//
-+// pseudo code from java implementation:
-+// long kLong = k & LONG_MASK;
-+// carry = 0;
-+// offset = out.length-offset - 1;
-+// for (int j = len - 1; j >= 0; j--) {
-+//     product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry;
-+//     out[offset--] = (int)product;
-+//     carry = product >>> 32;
-+// }
-+// return (int)carry;
-+void MacroAssembler::mul_add(Register out, Register in, Register offset,
-+                             Register len, Register k, Register tmp) {
-+  Label L_tail_loop, L_unroll, L_end;
-+  mv(tmp, out);
-+  mv(out, zr);
-+  blez(len, L_end);
-+  zero_extend(k, k, 32);
-+  slliw(t0, offset, LogBytesPerInt);
-+  add(offset, tmp, t0);
-+  slliw(t0, len, LogBytesPerInt);
-+  add(in, in, t0);
++  // Here the float instructions with safe deal with some exceptions.
++  // e.g. convert from NaN, +Inf, -Inf to int, float, double
++  // will trigger exception, we need to deal with these situations
++  // to get correct results.
++  void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0);
++  void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0);
++  void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
++  void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
 +
-+  const int unroll = 8;
-+  li(tmp, unroll);
-+  blt(len, tmp, L_tail_loop);
-+  bind(L_unroll);
-+  for (int i = 0; i < unroll; i++) {
-+    sub(in, in, BytesPerInt);
-+    lwu(t0, Address(in, 0));
-+    mul(t1, t0, k);
-+    add(t0, t1, out);
-+    sub(offset, offset, BytesPerInt);
-+    lwu(t1, Address(offset, 0));
-+    add(t0, t0, t1);
-+    sw(t0, Address(offset, 0));
-+    srli(out, t0, 32);
++  // vector load/store unit-stride instructions
++  void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
++    switch (sew) {
++      case Assembler::e64:
++        vle64_v(vd, base, vm);
++        break;
++      case Assembler::e32:
++        vle32_v(vd, base, vm);
++        break;
++      case Assembler::e16:
++        vle16_v(vd, base, vm);
++        break;
++      case Assembler::e8: // fall through
++      default:
++        vle8_v(vd, base, vm);
++        break;
++    }
 +  }
-+  subw(len, len, tmp);
-+  bge(len, tmp, L_unroll);
 +
-+  bind(L_tail_loop);
-+  blez(len, L_end);
-+  sub(in, in, BytesPerInt);
-+  lwu(t0, Address(in, 0));
-+  mul(t1, t0, k);
-+  add(t0, t1, out);
-+  sub(offset, offset, BytesPerInt);
-+  lwu(t1, Address(offset, 0));
-+  add(t0, t0, t1);
-+  sw(t0, Address(offset, 0));
-+  srli(out, t0, 32);
-+  subw(len, len, 1);
-+  j(L_tail_loop);
++  void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
++    switch (sew) {
++      case Assembler::e64:
++        vse64_v(store_data, base, vm);
++        break;
++      case Assembler::e32:
++        vse32_v(store_data, base, vm);
++        break;
++      case Assembler::e16:
++        vse16_v(store_data, base, vm);
++        break;
++      case Assembler::e8: // fall through
++      default:
++        vse8_v(store_data, base, vm);
++        break;
++    }
++  }
 +
-+  bind(L_end);
-+}
++  static const int zero_words_block_size;
 +
-+// add two unsigned input and output carry
-+void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry)
-+{
-+  assert_different_registers(dst, carry);
-+  assert_different_registers(dst, src2);
-+  add(dst, src1, src2);
-+  sltu(carry, dst, src2);
-+}
++  void cast_primitive_type(BasicType type, Register Rt) {
++    switch (type) {
++      case T_BOOLEAN:
++        sltu(Rt, zr, Rt);
++        break;
++      case T_CHAR   :
++        zero_extend(Rt, Rt, 16);
++        break;
++      case T_BYTE   :
++        sign_extend(Rt, Rt, 8);
++        break;
++      case T_SHORT  :
++        sign_extend(Rt, Rt, 16);
++        break;
++      case T_INT    :
++        addw(Rt, Rt, zr);
++        break;
++      case T_LONG   : /* nothing to do */        break;
++      case T_VOID   : /* nothing to do */        break;
++      case T_FLOAT  : /* nothing to do */        break;
++      case T_DOUBLE : /* nothing to do */        break;
++      default: ShouldNotReachHere();
++    }
++  }
 +
-+// add two input with carry
-+void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry)
-+{
-+  assert_different_registers(dst, carry);
-+  add(dst, src1, src2);
-+  add(dst, dst, carry);
-+}
++  // float cmp with unordered_result
++  void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
++  void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
 +
-+// add two unsigned input with carry and output carry
-+void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry)
-+{
-+  assert_different_registers(dst, src2);
-+  adc(dst, src1, src2, carry);
-+  sltu(carry, dst, src2);
-+}
++  // Zero/Sign-extend
++  void zero_extend(Register dst, Register src, int bits);
++  void sign_extend(Register dst, Register src, int bits);
 +
-+void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
-+                                     Register src1, Register src2, Register carry)
-+{
-+  cad(dest_lo, dest_lo, src1, carry);
-+  add(dest_hi, dest_hi, carry);
-+  cad(dest_lo, dest_lo, src2, carry);
-+  add(final_dest_hi, dest_hi, carry);
-+}
++  // compare src1 and src2 and get -1/0/1 in dst.
++  // if [src1 > src2], dst = 1;
++  // if [src1 == src2], dst = 0;
++  // if [src1 < src2], dst = -1;
++  void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0);
 +
-+/**
-+ * Multiply 32 bit by 32 bit first loop.
-+ */
-+void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
-+                                           Register y, Register y_idx, Register z,
-+                                           Register carry, Register product,
-+                                           Register idx, Register kdx)
-+{
-+  // jlong carry, x[], y[], z[];
-+  // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
-+  //     long product = y[idx] * x[xstart] + carry;
-+  //     z[kdx] = (int)product;
-+  //     carry = product >>> 32;
-+  // }
-+  // z[xstart] = (int)carry;
++  int push_fp(unsigned int bitset, Register stack);
++  int pop_fp(unsigned int bitset, Register stack);
 +
-+  Label L_first_loop, L_first_loop_exit;
-+  blez(idx, L_first_loop_exit);
++  // vext
++  void vmnot_m(VectorRegister vd, VectorRegister vs);
++  void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked);
++  void vfneg_v(VectorRegister vd, VectorRegister vs);
 +
-+  shadd(t0, xstart, x, t0, LogBytesPerInt);
-+  lwu(x_xstart, Address(t0, 0));
++private:
 +
-+  bind(L_first_loop);
-+  subw(idx, idx, 1);
-+  shadd(t0, idx, y, t0, LogBytesPerInt);
-+  lwu(y_idx, Address(t0, 0));
-+  mul(product, x_xstart, y_idx);
-+  add(product, product, carry);
-+  srli(carry, product, 32);
-+  subw(kdx, kdx, 1);
-+  shadd(t0, kdx, z, t0, LogBytesPerInt);
-+  sw(product, Address(t0, 0));
-+  bgtz(idx, L_first_loop);
++#ifdef ASSERT
++  // Macro short-hand support to clean-up after a failed call to trampoline
++  // call generation (see trampoline_call() below), when a set of Labels must
++  // be reset (before returning).
++#define reset_labels1(L1) L1.reset()
++#define reset_labels2(L1, L2) L1.reset(); L2.reset()
++#define reset_labels3(L1, L2, L3) L1.reset(); reset_labels2(L2, L3)
++#define reset_labels5(L1, L2, L3, L4, L5) reset_labels2(L1, L2); reset_labels3(L3, L4, L5)
++#endif
++  void repne_scan(Register addr, Register value, Register count, Register tmp);
 +
-+  bind(L_first_loop_exit);
-+}
++  // Return true if an address is within the 48-bit RISCV64 address space.
++  bool is_valid_riscv64_address(address addr) {
++    // sv48: must have bits 63–48 all equal to bit 47
++    return ((uintptr_t)addr >> 47) == 0;
++  }
 +
-+/**
-+ * Multiply 64 bit by 64 bit first loop.
-+ */
-+void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
-+                                           Register y, Register y_idx, Register z,
-+                                           Register carry, Register product,
-+                                           Register idx, Register kdx)
-+{
-+  //
-+  //  jlong carry, x[], y[], z[];
-+  //  for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
-+  //    huge_128 product = y[idx] * x[xstart] + carry;
-+  //    z[kdx] = (jlong)product;
-+  //    carry  = (jlong)(product >>> 64);
-+  //  }
-+  //  z[xstart] = carry;
-+  //
++  void ld_constant(Register dest, const Address &const_addr) {
++    if (NearCpool) {
++      ld(dest, const_addr);
++    } else {
++      int32_t offset = 0;
++      la_patchable(dest, InternalAddress(const_addr.target()), offset);
++      ld(dest, Address(dest, offset));
++    }
++  }
 +
-+  Label L_first_loop, L_first_loop_exit;
-+  Label L_one_x, L_one_y, L_multiply;
++  int bitset_to_regs(unsigned int bitset, unsigned char* regs);
++  Address add_memory_helper(const Address dst);
 +
-+  subw(xstart, xstart, 1);
-+  bltz(xstart, L_one_x);
++  void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire);
++  void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release);
 +
-+  shadd(t0, xstart, x, t0, LogBytesPerInt);
-+  ld(x_xstart, Address(t0, 0));
-+  ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian
++public:
++  void string_compare(Register str1, Register str2,
++                      Register cnt1, Register cnt2, Register result,
++                      Register tmp1, Register tmp2, Register tmp3,
++                      int ae);
 +
-+  bind(L_first_loop);
-+  subw(idx, idx, 1);
-+  bltz(idx, L_first_loop_exit);
-+  subw(idx, idx, 1);
-+  bltz(idx, L_one_y);
++  void string_indexof_char_short(Register str1, Register cnt1,
++                                 Register ch, Register result,
++                                 bool isL);
 +
-+  shadd(t0, idx, y, t0, LogBytesPerInt);
-+  ld(y_idx, Address(t0, 0));
-+  ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian
-+  bind(L_multiply);
++  void string_indexof_char(Register str1, Register cnt1,
++                           Register ch, Register result,
++                           Register tmp1, Register tmp2,
++                           Register tmp3, Register tmp4,
++                           bool isL);
 +
-+  mulhu(t0, x_xstart, y_idx);
-+  mul(product, x_xstart, y_idx);
-+  cad(product, product, carry, t1);
-+  adc(carry, t0, zr, t1);
++  void string_indexof(Register str1, Register str2,
++                      Register cnt1, Register cnt2,
++                      Register tmp1, Register tmp2,
++                      Register tmp3, Register tmp4,
++                      Register tmp5, Register tmp6,
++                      Register result, int ae);
 +
-+  subw(kdx, kdx, 2);
-+  ror_imm(product, product, 32); // back to big-endian
-+  shadd(t0, kdx, z, t0, LogBytesPerInt);
-+  sd(product, Address(t0, 0));
++  void string_indexof_linearscan(Register haystack, Register needle,
++                                 Register haystack_len, Register needle_len,
++                                 Register tmp1, Register tmp2,
++                                 Register tmp3, Register tmp4,
++                                 int needle_con_cnt, Register result, int ae);
 +
-+  j(L_first_loop);
++  void arrays_equals(Register r1, Register r2,
++                     Register tmp3, Register tmp4,
++                     Register tmp5, Register tmp6,
++                     Register result, Register cnt1,
++                     int elem_size);
 +
-+  bind(L_one_y);
-+  lwu(y_idx, Address(y, 0));
-+  j(L_multiply);
++  void string_equals(Register r1, Register r2,
++                     Register result, Register cnt1,
++                     int elem_size);
 +
-+  bind(L_one_x);
-+  lwu(x_xstart, Address(x, 0));
-+  j(L_first_loop);
++  // refer to conditional_branches and float_conditional_branches
++  static const int bool_test_bits = 3;
++  static const int neg_cond_bits = 2;
++  static const int unsigned_branch_mask = 1 << bool_test_bits;
++  static const int double_branch_mask = 1 << bool_test_bits;
 +
-+  bind(L_first_loop_exit);
-+}
++  // cmp
++  void cmp_branch(int cmpFlag,
++                  Register op1, Register op2,
++                  Label& label, bool is_far = false);
 +
-+/**
-+ * Multiply 128 bit by 128 bit. Unrolled inner loop.
-+ *
-+ */
-+void MacroAssembler::multiply_128_x_128_loop(Register y, Register z,
-+                                             Register carry, Register carry2,
-+                                             Register idx, Register jdx,
-+                                             Register yz_idx1, Register yz_idx2,
-+                                             Register tmp, Register tmp3, Register tmp4,
-+                                             Register tmp6, Register product_hi)
-+{
-+  //   jlong carry, x[], y[], z[];
-+  //   int kdx = xstart+1;
-+  //   for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
-+  //     huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry;
-+  //     jlong carry2  = (jlong)(tmp3 >>> 64);
-+  //     huge_128 tmp4 = (y[idx]   * product_hi) + z[kdx+idx] + carry2;
-+  //     carry  = (jlong)(tmp4 >>> 64);
-+  //     z[kdx+idx+1] = (jlong)tmp3;
-+  //     z[kdx+idx] = (jlong)tmp4;
-+  //   }
-+  //   idx += 2;
-+  //   if (idx > 0) {
-+  //     yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry;
-+  //     z[kdx+idx] = (jlong)yz_idx1;
-+  //     carry  = (jlong)(yz_idx1 >>> 64);
-+  //   }
-+  //
++  void float_cmp_branch(int cmpFlag,
++                        FloatRegister op1, FloatRegister op2,
++                        Label& label, bool is_far = false);
 +
-+  Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
++  void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op,
++                                    Label& L, bool is_far = false);
 +
-+  srliw(jdx, idx, 2);
++  void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op,
++                               Label& L, bool is_far = false);
 +
-+  bind(L_third_loop);
++  void enc_cmove(int cmpFlag,
++                 Register op1, Register op2,
++                 Register dst, Register src);
 +
-+  subw(jdx, jdx, 1);
-+  bltz(jdx, L_third_loop_exit);
-+  subw(idx, idx, 4);
++  void spill(Register r, bool is64, int offset) {
++    is64 ? sd(r, Address(sp, offset))
++         : sw(r, Address(sp, offset));
++  }
 +
-+  shadd(t0, idx, y, t0, LogBytesPerInt);
-+  ld(yz_idx2, Address(t0, 0));
-+  ld(yz_idx1, Address(t0, wordSize));
++  void spill(FloatRegister f, bool is64, int offset) {
++    is64 ? fsd(f, Address(sp, offset))
++         : fsw(f, Address(sp, offset));
++  }
 +
-+  shadd(tmp6, idx, z, t0, LogBytesPerInt);
++  void spill(VectorRegister v, int offset) {
++    add(t0, sp, offset);
++    vs1r_v(v, t0);
++  }
 +
-+  ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian
-+  ror_imm(yz_idx2, yz_idx2, 32);
++  void unspill(Register r, bool is64, int offset) {
++    is64 ? ld(r, Address(sp, offset))
++         : lw(r, Address(sp, offset));
++  }
 +
-+  ld(t1, Address(tmp6, 0));
-+  ld(t0, Address(tmp6, wordSize));
++  void unspillu(Register r, bool is64, int offset) {
++    is64 ? ld(r, Address(sp, offset))
++         : lwu(r, Address(sp, offset));
++  }
 +
-+  mul(tmp3, product_hi, yz_idx1); //  yz_idx1 * product_hi -> tmp4:tmp3
-+  mulhu(tmp4, product_hi, yz_idx1);
++  void unspill(FloatRegister f, bool is64, int offset) {
++    is64 ? fld(f, Address(sp, offset))
++         : flw(f, Address(sp, offset));
++  }
 +
-+  ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian
-+  ror_imm(t1, t1, 32, tmp);
++  void unspill(VectorRegister v, int offset) {
++    add(t0, sp, offset);
++    vl1r_v(v, t0);
++  }
 +
-+  mul(tmp, product_hi, yz_idx2); //  yz_idx2 * product_hi -> carry2:tmp
-+  mulhu(carry2, product_hi, yz_idx2);
++  void minmax_FD(FloatRegister dst,
++                 FloatRegister src1, FloatRegister src2,
++                 bool is_double, bool is_min);
 +
-+  cad(tmp3, tmp3, carry, carry);
-+  adc(tmp4, tmp4, zr, carry);
-+  cad(tmp3, tmp3, t0, t0);
-+  cadc(tmp4, tmp4, tmp, t0);
-+  adc(carry, carry2, zr, t0);
-+  cad(tmp4, tmp4, t1, carry2);
-+  adc(carry, carry, zr, carry2);
++};
 +
-+  ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian
-+  ror_imm(tmp4, tmp4, 32);
-+  sd(tmp4, Address(tmp6, 0));
-+  sd(tmp3, Address(tmp6, wordSize));
++#ifdef ASSERT
++inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
++#endif
 +
-+  j(L_third_loop);
++/**
++ * class SkipIfEqual:
++ *
++ * Instantiating this class will result in assembly code being output that will
++ * jump around any code emitted between the creation of the instance and it's
++ * automatic destruction at the end of a scope block, depending on the value of
++ * the flag passed to the constructor, which will be checked at run-time.
++ */
++class SkipIfEqual {
++ private:
++  MacroAssembler* _masm;
++  Label _label;
 +
-+  bind(L_third_loop_exit);
++ public:
++   SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
++   ~SkipIfEqual();
++};
 +
-+  andi(idx, idx, 0x3);
-+  beqz(idx, L_post_third_loop_done);
++#endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
+new file mode 100644
+index 0000000000..ef968ccd96
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
+@@ -0,0 +1,31 @@
++/*
++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  Label L_check_1;
-+  subw(idx, idx, 2);
-+  bltz(idx, L_check_1);
++#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
++#define CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
 +
-+  shadd(t0, idx, y, t0, LogBytesPerInt);
-+  ld(yz_idx1, Address(t0, 0));
-+  ror_imm(yz_idx1, yz_idx1, 32);
++// Still empty.
 +
-+  mul(tmp3, product_hi, yz_idx1); //  yz_idx1 * product_hi -> tmp4:tmp3
-+  mulhu(tmp4, product_hi, yz_idx1);
++#endif // CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
+diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
+new file mode 100644
+index 0000000000..fd907f77af
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
+@@ -0,0 +1,450 @@
++/*
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  shadd(t0, idx, z, t0, LogBytesPerInt);
-+  ld(yz_idx2, Address(t0, 0));
-+  ror_imm(yz_idx2, yz_idx2, 32, tmp);
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "classfile/javaClasses.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "memory/allocation.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/flags/flagSetting.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/stubRoutines.hpp"
 +
-+  add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp);
++#define __ _masm->
 +
-+  ror_imm(tmp3, tmp3, 32, tmp);
-+  sd(tmp3, Address(t0, 0));
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#endif
 +
-+  bind(L_check_1);
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
 +
-+  andi(idx, idx, 0x1);
-+  subw(idx, idx, 1);
-+  bltz(idx, L_post_third_loop_done);
-+  shadd(t0, idx, y, t0, LogBytesPerInt);
-+  lwu(tmp4, Address(t0, 0));
-+  mul(tmp3, tmp4, product_hi); //  tmp4 * product_hi -> carry2:tmp3
-+  mulhu(carry2, tmp4, product_hi);
++void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
++  assert_cond(_masm != NULL);
++  if (VerifyMethodHandles) {
++    verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class),
++                 "MH argument is a Class");
++  }
++  __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
++}
 +
-+  shadd(t0, idx, z, t0, LogBytesPerInt);
-+  lwu(tmp4, Address(t0, 0));
++#ifdef ASSERT
++static int check_nonzero(const char* xname, int x) {
++  assert(x != 0, "%s should be nonzero", xname);
++  return x;
++}
++#define NONZERO(x) check_nonzero(#x, x)
++#else //ASSERT
++#define NONZERO(x) (x)
++#endif //PRODUCT
 +
-+  add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0);
++#ifdef ASSERT
++void MethodHandles::verify_klass(MacroAssembler* _masm,
++                                 Register obj, SystemDictionary::WKID klass_id,
++                                 const char* error_message) {
++  assert_cond(_masm != NULL);
++  InstanceKlass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id);
++  Klass* klass = SystemDictionary::well_known_klass(klass_id);
++  Register temp = t1;
++  Register temp2 = t0; // used by MacroAssembler::cmpptr
++  Label L_ok, L_bad;
++  BLOCK_COMMENT("verify_klass {");
++  __ verify_oop(obj);
++  __ beqz(obj, L_bad);
++  __ push_reg(RegSet::of(temp, temp2), sp);
++  __ load_klass(temp, obj);
++  __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok);
++  intptr_t super_check_offset = klass->super_check_offset();
++  __ ld(temp, Address(temp, super_check_offset));
++  __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok);
++  __ pop_reg(RegSet::of(temp, temp2), sp);
++  __ bind(L_bad);
++  __ stop(error_message);
++  __ BIND(L_ok);
++  __ pop_reg(RegSet::of(temp, temp2), sp);
++  BLOCK_COMMENT("} verify_klass");
++}
 +
-+  shadd(t0, idx, z, t0, LogBytesPerInt);
-+  sw(tmp3, Address(t0, 0));
++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {}
 +
-+  slli(t0, carry2, 32);
-+  srli(carry, tmp3, 32);
-+  orr(carry, carry, t0);
++#endif //ASSERT
 +
-+  bind(L_post_third_loop_done);
-+}
++void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
++                                            bool for_compiler_entry) {
++  assert_cond(_masm != NULL);
++  assert(method == xmethod, "interpreter calling convention");
++  Label L_no_such_method;
++  __ beqz(xmethod, L_no_such_method);
++  __ verify_method_ptr(method);
 +
-+/**
-+ * Code for BigInteger::multiplyToLen() intrinsic.
-+ *
-+ * x10: x
-+ * x11: xlen
-+ * x12: y
-+ * x13: ylen
-+ * x14: z
-+ * x15: zlen
-+ * x16: tmp1
-+ * x17: tmp2
-+ * x7:  tmp3
-+ * x28: tmp4
-+ * x29: tmp5
-+ * x30: tmp6
-+ * x31: tmp7
-+ */
-+void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen,
-+                                     Register z, Register zlen,
-+                                     Register tmp1, Register tmp2, Register tmp3, Register tmp4,
-+                                     Register tmp5, Register tmp6, Register product_hi)
-+{
-+  assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
++  if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
++    Label run_compiled_code;
++    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
++    // compiled code in threads for which the event is enabled.  Check here for
++    // interp_only_mode if these events CAN be enabled.
 +
-+  const Register idx = tmp1;
-+  const Register kdx = tmp2;
-+  const Register xstart = tmp3;
++    __ lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset()));
++    __ beqz(t0, run_compiled_code);
++    __ ld(t0, Address(method, Method::interpreter_entry_offset()));
++    __ jr(t0);
++    __ BIND(run_compiled_code);
++  }
 +
-+  const Register y_idx = tmp4;
-+  const Register carry = tmp5;
-+  const Register product = xlen;
-+  const Register x_xstart = zlen; // reuse register
++  const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() :
++                                                     Method::from_interpreted_offset();
++  __ ld(t0,Address(method, entry_offset));
++  __ jr(t0);
++  __ bind(L_no_such_method);
++  __ far_jump(RuntimeAddress(StubRoutines::throw_AbstractMethodError_entry()));
++}
 +
-+  mv(idx, ylen); // idx = ylen;
-+  mv(kdx, zlen); // kdx = xlen+ylen;
-+  mv(carry, zr); // carry = 0;
++void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
++                                        Register recv, Register method_temp,
++                                        Register temp2,
++                                        bool for_compiler_entry) {
++  assert_cond(_masm != NULL);
++  BLOCK_COMMENT("jump_to_lambda_form {");
++  // This is the initial entry point of a lazy method handle.
++  // After type checking, it picks up the invoker from the LambdaForm.
++  assert_different_registers(recv, method_temp, temp2);
++  assert(recv != noreg, "required register");
++  assert(method_temp == xmethod, "required register for loading method");
 +
-+  Label L_multiply_64_x_64_loop, L_done;
++  // Load the invoker, as MH -> MH.form -> LF.vmentry
++  __ verify_oop(recv);
++  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2);
++  __ verify_oop(method_temp);
++  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2);
++  __ verify_oop(method_temp);
++  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())), temp2);
++  __ verify_oop(method_temp);
++  __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), noreg, noreg);
 +
-+  subw(xstart, xlen, 1);
-+  bltz(xstart, L_done);
++  if (VerifyMethodHandles && !for_compiler_entry) {
++    // make sure recv is already on stack
++    __ ld(temp2, Address(method_temp, Method::const_offset()));
++    __ load_sized_value(temp2,
++                        Address(temp2, ConstMethod::size_of_parameters_offset()),
++                        sizeof(u2), /*is_signed*/ false);
++    Label L;
++    __ ld(t0, __ argument_address(temp2, -1));
++    __ beq(recv, t0, L);
++    __ ld(x10, __ argument_address(temp2, -1));
++    __ ebreak();
++    __ BIND(L);
++  }
 +
-+  const Register jdx = tmp1;
++  jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry);
++  BLOCK_COMMENT("} jump_to_lambda_form");
++}
 +
-+  if (AvoidUnalignedAccesses) {
-+    // Check if x and y are both 8-byte aligned.
-+    orr(t0, xlen, ylen);
-+    andi(t0, t0, 0x1);
-+    beqz(t0, L_multiply_64_x_64_loop);
++// Code generation
++address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
++                                                                vmIntrinsics::ID iid) {
++  assert_cond(_masm != NULL);
++  const bool not_for_compiler_entry = false;  // this is the interpreter entry
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  if (iid == vmIntrinsics::_invokeGeneric ||
++      iid == vmIntrinsics::_compiledLambdaForm) {
++    // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
++    // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
++    // They all allow an appendix argument.
++    __ ebreak();           // empty stubs make SG sick
++    return NULL;
++  }
 +
-+    multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
-+    shadd(t0, xstart, z, t0, LogBytesPerInt);
-+    sw(carry, Address(t0, 0));
++  // x30: sender SP (must preserve; see prepare_to_jump_from_interpreted)
++  // xmethod: Method*
++  // x13: argument locator (parameter slot count, added to sp)
++  // x11: used as temp to hold mh or receiver
++  // x10, x29: garbage temps, blown away
++  Register argp   = x13;   // argument list ptr, live on error paths
++  Register mh     = x11;   // MH receiver; dies quickly and is recycled
 +
-+    Label L_second_loop_unaligned;
-+    bind(L_second_loop_unaligned);
-+    mv(carry, zr);
-+    mv(jdx, ylen);
-+    subw(xstart, xstart, 1);
-+    bltz(xstart, L_done);
-+    sub(sp, sp, 2 * wordSize);
-+    sd(z, Address(sp, 0));
-+    sd(zr, Address(sp, wordSize));
-+    shadd(t0, xstart, z, t0, LogBytesPerInt);
-+    addi(z, t0, 4);
-+    shadd(t0, xstart, x, t0, LogBytesPerInt);
-+    lwu(product, Address(t0, 0));
-+    Label L_third_loop, L_third_loop_exit;
++  // here's where control starts out:
++  __ align(CodeEntryAlignment);
++  address entry_point = __ pc();
 +
-+    blez(jdx, L_third_loop_exit);
++  if (VerifyMethodHandles) {
++    assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
 +
-+    bind(L_third_loop);
-+    subw(jdx, jdx, 1);
-+    shadd(t0, jdx, y, t0, LogBytesPerInt);
-+    lwu(t0, Address(t0, 0));
-+    mul(t1, t0, product);
-+    add(t0, t1, carry);
-+    shadd(tmp6, jdx, z, t1, LogBytesPerInt);
-+    lwu(t1, Address(tmp6, 0));
-+    add(t0, t0, t1);
-+    sw(t0, Address(tmp6, 0));
-+    srli(carry, t0, 32);
-+    bgtz(jdx, L_third_loop);
++    Label L;
++    BLOCK_COMMENT("verify_intrinsic_id {");
++    __ lhu(t0, Address(xmethod, Method::intrinsic_id_offset_in_bytes()));
++    __ mv(t1, (int) iid);
++    __ beq(t0, t1, L);
++    if (iid == vmIntrinsics::_linkToVirtual ||
++        iid == vmIntrinsics::_linkToSpecial) {
++      // could do this for all kinds, but would explode assembly code size
++      trace_method_handle(_masm, "bad Method*::intrinsic_id");
++    }
++    __ ebreak();
++    __ bind(L);
++    BLOCK_COMMENT("} verify_intrinsic_id");
++  }
 +
-+    bind(L_third_loop_exit);
-+    ld(z, Address(sp, 0));
-+    addi(sp, sp, 2 * wordSize);
-+    shadd(t0, xstart, z, t0, LogBytesPerInt);
-+    sw(carry, Address(t0, 0));
++  // First task:  Find out how big the argument list is.
++  Address x13_first_arg_addr;
++  int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
++  assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
++  if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
++    __ ld(argp, Address(xmethod, Method::const_offset()));
++    __ load_sized_value(argp,
++                        Address(argp, ConstMethod::size_of_parameters_offset()),
++                        sizeof(u2), /*is_signed*/ false);
++    x13_first_arg_addr = __ argument_address(argp, -1);
++  } else {
++    DEBUG_ONLY(argp = noreg);
++  }
 +
-+    j(L_second_loop_unaligned);
++  if (!is_signature_polymorphic_static(iid)) {
++    __ ld(mh, x13_first_arg_addr);
++    DEBUG_ONLY(argp = noreg);
 +  }
 +
-+  bind(L_multiply_64_x_64_loop);
-+  multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
++  // x13_first_arg_addr is live!
 +
-+  Label L_second_loop_aligned;
-+  beqz(kdx, L_second_loop_aligned);
++  trace_method_handle_interpreter_entry(_masm, iid);
++  if (iid == vmIntrinsics::_invokeBasic) {
++    generate_method_handle_dispatch(_masm, iid, mh, noreg, not_for_compiler_entry);
++  } else {
++    // Adjust argument list by popping the trailing MemberName argument.
++    Register recv = noreg;
++    if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
++      // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
++      __ ld(recv = x12, x13_first_arg_addr);
++    }
++    DEBUG_ONLY(argp = noreg);
++    Register xmember = xmethod;  // MemberName ptr; incoming method ptr is dead now
++    __ pop_reg(xmember);             // extract last argument
++    generate_method_handle_dispatch(_masm, iid, recv, xmember, not_for_compiler_entry);
++  }
 +
-+  Label L_carry;
-+  subw(kdx, kdx, 1);
-+  beqz(kdx, L_carry);
++  return entry_point;
++}
 +
-+  shadd(t0, kdx, z, t0, LogBytesPerInt);
-+  sw(carry, Address(t0, 0));
-+  srli(carry, carry, 32);
-+  subw(kdx, kdx, 1);
 +
-+  bind(L_carry);
-+  shadd(t0, kdx, z, t0, LogBytesPerInt);
-+  sw(carry, Address(t0, 0));
++void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
++                                                    vmIntrinsics::ID iid,
++                                                    Register receiver_reg,
++                                                    Register member_reg,
++                                                    bool for_compiler_entry) {
++  assert_cond(_masm != NULL);
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  // temps used in this code are not used in *either* compiled or interpreted calling sequences
++  Register temp1 = x7;
++  Register temp2 = x28;
++  Register temp3 = x29;  // x30 is live by this point: it contains the sender SP
++  if (for_compiler_entry) {
++    assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment");
++    assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
++    assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
++    assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
++  }
 +
-+  // Second and third (nested) loops.
-+  //
-+  // for (int i = xstart-1; i >= 0; i--) { // Second loop
-+  //   carry = 0;
-+  //   for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
-+  //     long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
-+  //                    (z[k] & LONG_MASK) + carry;
-+  //     z[k] = (int)product;
-+  //     carry = product >>> 32;
-+  //   }
-+  //   z[i] = (int)carry;
-+  // }
-+  //
-+  // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi
++  assert_different_registers(temp1, temp2, temp3, receiver_reg);
++  assert_different_registers(temp1, temp2, temp3, member_reg);
 +
-+  bind(L_second_loop_aligned);
-+  mv(carry, zr); // carry = 0;
-+  mv(jdx, ylen); // j = ystart+1
++  if (iid == vmIntrinsics::_invokeBasic) {
++    // indirect through MH.form.vmentry.vmtarget
++    jump_to_lambda_form(_masm, receiver_reg, xmethod, temp1, for_compiler_entry);
++  } else {
++    // The method is a member invoker used by direct method handles.
++    if (VerifyMethodHandles) {
++      // make sure the trailing argument really is a MemberName (caller responsibility)
++      verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName),
++                   "MemberName required for invokeVirtual etc.");
++    }
 +
-+  subw(xstart, xstart, 1); // i = xstart-1;
-+  bltz(xstart, L_done);
++    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()));
++    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()));
++    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes()));
++    Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes()));
 +
-+  sub(sp, sp, 4 * wordSize);
-+  sd(z, Address(sp, 0));
++    Register temp1_recv_klass = temp1;
++    if (iid != vmIntrinsics::_linkToStatic) {
++      __ verify_oop(receiver_reg);
++      if (iid == vmIntrinsics::_linkToSpecial) {
++        // Don't actually load the klass; just null-check the receiver.
++        __ null_check(receiver_reg);
++      } else {
++        // load receiver klass itself
++        __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
++        __ load_klass(temp1_recv_klass, receiver_reg);
++        __ verify_klass_ptr(temp1_recv_klass);
++      }
++      BLOCK_COMMENT("check_receiver {");
++      // The receiver for the MemberName must be in receiver_reg.
++      // Check the receiver against the MemberName.clazz
++      if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
++        // Did not load it above...
++        __ load_klass(temp1_recv_klass, receiver_reg);
++        __ verify_klass_ptr(temp1_recv_klass);
++      }
++      if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
++        Label L_ok;
++        Register temp2_defc = temp2;
++        __ load_heap_oop(temp2_defc, member_clazz, temp3);
++        load_klass_from_Class(_masm, temp2_defc);
++        __ verify_klass_ptr(temp2_defc);
++        __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok);
++        // If we get here, the type check failed!
++        __ ebreak();
++        __ bind(L_ok);
++      }
++      BLOCK_COMMENT("} check_receiver");
++    }
++    if (iid == vmIntrinsics::_linkToSpecial ||
++        iid == vmIntrinsics::_linkToStatic) {
++      DEBUG_ONLY(temp1_recv_klass = noreg);  // these guys didn't load the recv_klass
++    }
 +
-+  Label L_last_x;
-+  shadd(t0, xstart, z, t0, LogBytesPerInt);
-+  addi(z, t0, 4);
-+  subw(xstart, xstart, 1); // i = xstart-1;
-+  bltz(xstart, L_last_x);
++    // Live registers at this point:
++    //  member_reg - MemberName that was the trailing argument
++    //  temp1_recv_klass - klass of stacked receiver, if needed
++    //  x30 - interpreter linkage (if interpreted)
++    //  x11 ... x10 - compiler arguments (if compiled)
 +
-+  shadd(t0, xstart, x, t0, LogBytesPerInt);
-+  ld(product_hi, Address(t0, 0));
-+  ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian
++    Label L_incompatible_class_change_error;
++    switch (iid) {
++      case vmIntrinsics::_linkToSpecial:
++        if (VerifyMethodHandles) {
++          verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
++        }
++        __ load_heap_oop(xmethod, member_vmtarget);
++        __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg);
++        break;
 +
-+  Label L_third_loop_prologue;
-+  bind(L_third_loop_prologue);
++      case vmIntrinsics::_linkToStatic:
++        if (VerifyMethodHandles) {
++          verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
++        }
++        __ load_heap_oop(xmethod, member_vmtarget);
++        __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg);
++        break;
 +
-+  sd(ylen, Address(sp, wordSize));
-+  sd(x, Address(sp, 2 * wordSize));
-+  sd(xstart, Address(sp, 3 * wordSize));
-+  multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product,
-+                          tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi);
-+  ld(z, Address(sp, 0));
-+  ld(ylen, Address(sp, wordSize));
-+  ld(x, Address(sp, 2 * wordSize));
-+  ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen
-+  addi(sp, sp, 4 * wordSize);
++      case vmIntrinsics::_linkToVirtual:
++      {
++        // same as TemplateTable::invokevirtual,
++        // minus the CP setup and profiling:
 +
-+  addiw(tmp3, xlen, 1);
-+  shadd(t0, tmp3, z, t0, LogBytesPerInt);
-+  sw(carry, Address(t0, 0));
++        if (VerifyMethodHandles) {
++          verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
++        }
 +
-+  subw(tmp3, tmp3, 1);
-+  bltz(tmp3, L_done);
++        // pick out the vtable index from the MemberName, and then we can discard it:
++        Register temp2_index = temp2;
++        __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg);
 +
-+  srli(carry, carry, 32);
-+  shadd(t0, tmp3, z, t0, LogBytesPerInt);
-+  sw(carry, Address(t0, 0));
-+  j(L_second_loop_aligned);
++        if (VerifyMethodHandles) {
++          Label L_index_ok;
++          __ bgez(temp2_index, L_index_ok);
++          __ ebreak();
++          __ BIND(L_index_ok);
++        }
 +
-+  // Next infrequent code is moved outside loops.
-+  bind(L_last_x);
-+  lwu(product_hi, Address(x, 0));
-+  j(L_third_loop_prologue);
++        // Note:  The verifier invariants allow us to ignore MemberName.clazz and vmtarget
++        // at this point.  And VerifyMethodHandles has already checked clazz, if needed.
 +
-+  bind(L_done);
-+}
-+#endif
++        // get target Method* & entry point
++        __ lookup_virtual_method(temp1_recv_klass, temp2_index, xmethod);
++        break;
++      }
 +
-+// Count bits of trailing zero chars from lsb to msb until first non-zero element.
-+// For LL case, one byte for one element, so shift 8 bits once, and for other case,
-+// shift 16 bits once.
-+void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2)
-+{
-+  if (UseRVB) {
-+    assert_different_registers(Rd, Rs, tmp1);
-+    int step = isLL ? 8 : 16;
-+    ctz(Rd, Rs);
-+    andi(tmp1, Rd, step - 1);
-+    sub(Rd, Rd, tmp1);
-+    return;
-+  }
-+  assert_different_registers(Rd, Rs, tmp1, tmp2);
-+  Label Loop;
-+  int step = isLL ? 8 : 16;
-+  li(Rd, -step);
-+  mv(tmp2, Rs);
++      case vmIntrinsics::_linkToInterface:
++      {
++        // same as TemplateTable::invokeinterface
++        // (minus the CP setup and profiling, with different argument motion)
++        if (VerifyMethodHandles) {
++          verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
++        }
 +
-+  bind(Loop);
-+  addi(Rd, Rd, step);
-+  andi(tmp1, tmp2, ((1 << step) - 1));
-+  srli(tmp2, tmp2, step);
-+  beqz(tmp1, Loop);
-+}
++        Register temp3_intf = temp3;
++        __ load_heap_oop(temp3_intf, member_clazz);
++        load_klass_from_Class(_masm, temp3_intf);
++        __ verify_klass_ptr(temp3_intf);
 +
-+// This instruction reads adjacent 4 bytes from the lower half of source register,
-+// inflate into a register, for example:
-+// Rs: A7A6A5A4A3A2A1A0
-+// Rd: 00A300A200A100A0
-+void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2)
-+{
-+  assert_different_registers(Rd, Rs, tmp1, tmp2);
-+  li(tmp1, 0xFF);
-+  mv(Rd, zr);
-+  for (int i = 0; i <= 3; i++)
-+  {
-+    andr(tmp2, Rs, tmp1);
-+    if (i) {
-+      slli(tmp2, tmp2, i * 8);
-+    }
-+    orr(Rd, Rd, tmp2);
-+    if (i != 3) {
-+      slli(tmp1, tmp1, 8);
++        Register rindex = xmethod;
++        __ access_load_at(T_ADDRESS, IN_HEAP, rindex, member_vmindex, noreg, noreg);
++        if (VerifyMethodHandles) {
++          Label L;
++          __ bgez(rindex, L);
++          __ ebreak();
++          __ bind(L);
++        }
++
++        // given intf, index, and recv klass, dispatch to the implementation method
++        __ lookup_interface_method(temp1_recv_klass, temp3_intf,
++                                   // note: next two args must be the same:
++                                   rindex, xmethod,
++                                   temp2,
++                                   L_incompatible_class_change_error);
++        break;
++      }
++
++      default:
++        fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid));
++        break;
 +    }
-+  }
-+}
 +
-+// This instruction reads adjacent 4 bytes from the upper half of source register,
-+// inflate into a register, for example:
-+// Rs: A7A6A5A4A3A2A1A0
-+// Rd: 00A700A600A500A4
-+void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2)
-+{
-+  assert_different_registers(Rd, Rs, tmp1, tmp2);
-+  li(tmp1, 0xFF00000000);
-+  mv(Rd, zr);
-+  for (int i = 0; i <= 3; i++)
-+  {
-+    andr(tmp2, Rs, tmp1);
-+    orr(Rd, Rd, tmp2);
-+    srli(Rd, Rd, 8);
-+    if (i != 3) {
-+      slli(tmp1, tmp1, 8);
++    // live at this point:  xmethod, x30 (if interpreted)
++
++    // After figuring out which concrete method to call, jump into it.
++    // Note that this works in the interpreter with no data motion.
++    // But the compiled version will require that r2_recv be shifted out.
++    __ verify_method_ptr(xmethod);
++    jump_from_method_handle(_masm, xmethod, temp1, for_compiler_entry);
++    if (iid == vmIntrinsics::_linkToInterface) {
++      __ bind(L_incompatible_class_change_error);
++      __ far_jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));
 +    }
 +  }
++
 +}
 +
-+// The size of the blocks erased by the zero_blocks stub.  We must
-+// handle anything smaller than this ourselves in zero_words().
-+const int MacroAssembler::zero_words_block_size = 8;
++#ifndef PRODUCT
++void trace_method_handle_stub(const char* adaptername,
++                              oopDesc* mh,
++                              intptr_t* saved_regs,
++                              intptr_t* entry_sp) {  }
 +
-+// zero_words() is used by C2 ClearArray patterns.  It is as small as
-+// possible, handling small word counts locally and delegating
-+// anything larger to the zero_blocks stub.  It is expanded many times
-+// in compiled code, so it is important to keep it short.
++// The stub wraps the arguments in a struct on the stack to avoid
++// dealing with the different calling conventions for passing 6
++// arguments.
++struct MethodHandleStubArguments {
++  const char* adaptername;
++  oopDesc* mh;
++  intptr_t* saved_regs;
++  intptr_t* entry_sp;
++};
++void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) {  }
 +
-+// ptr:   Address of a buffer to be zeroed.
-+// cnt:   Count in HeapWords.
-+//
-+// ptr, cnt, and t0 are clobbered.
-+address MacroAssembler::zero_words(Register ptr, Register cnt)
-+{
-+  assert(is_power_of_2(zero_words_block_size), "adjust this");
-+  assert(ptr == x28 && cnt == x29, "mismatch in register usage");
-+  assert_different_registers(cnt, t0);
-+
-+  BLOCK_COMMENT("zero_words {");
-+  mv(t0, zero_words_block_size);
-+  Label around, done, done16;
-+  bltu(cnt, t0, around);
-+  {
-+    RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks());
-+    assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated");
-+    if (StubRoutines::riscv::complete()) {
-+      address tpc = trampoline_call(zero_blocks);
-+      if (tpc == NULL) {
-+        DEBUG_ONLY(reset_labels(around));
-+        postcond(pc() == badAddress);
-+        return NULL;
-+      }
-+    } else {
-+      jal(zero_blocks);
-+    }
-+  }
-+  bind(around);
-+  for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) {
-+    Label l;
-+    andi(t0, cnt, i);
-+    beqz(t0, l);
-+    for (int j = 0; j < i; j++) {
-+      sd(zr, Address(ptr, 0));
-+      addi(ptr, ptr, 8);
-+    }
-+    bind(l);
-+  }
-+  {
-+    Label l;
-+    andi(t0, cnt, 1);
-+    beqz(t0, l);
-+    sd(zr, Address(ptr, 0));
-+    bind(l);
-+  }
-+  BLOCK_COMMENT("} zero_words");
-+  postcond(pc() != badAddress);
-+  return pc();
-+}
-+
-+#define SmallArraySize (18 * BytesPerLong)
-+
-+// base:  Address of a buffer to be zeroed, 8 bytes aligned.
-+// cnt:   Immediate count in HeapWords.
-+void MacroAssembler::zero_words(Register base, u_int64_t cnt)
-+{
-+  assert_different_registers(base, t0, t1);
-+
-+  BLOCK_COMMENT("zero_words {");
-+
-+  if (cnt <= SmallArraySize / BytesPerLong) {
-+    for (int i = 0; i < (int)cnt; i++) {
-+      sd(zr, Address(base, i * wordSize));
-+    }
-+  } else {
-+    const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll
-+    int remainder = cnt % unroll;
-+    for (int i = 0; i < remainder; i++) {
-+      sd(zr, Address(base, i * wordSize));
-+    }
-+
-+    Label loop;
-+    Register cnt_reg = t0;
-+    Register loop_base = t1;
-+    cnt = cnt - remainder;
-+    li(cnt_reg, cnt);
-+    add(loop_base, base, remainder * wordSize);
-+    bind(loop);
-+    sub(cnt_reg, cnt_reg, unroll);
-+    for (int i = 0; i < unroll; i++) {
-+      sd(zr, Address(loop_base, i * wordSize));
-+    }
-+    add(loop_base, loop_base, unroll * wordSize);
-+    bnez(cnt_reg, loop);
-+  }
-+
-+  BLOCK_COMMENT("} zero_words");
-+}
-+
-+// base:   Address of a buffer to be filled, 8 bytes aligned.
-+// cnt:    Count in 8-byte unit.
-+// value:  Value to be filled with.
-+// base will point to the end of the buffer after filling.
-+void MacroAssembler::fill_words(Register base, Register cnt, Register value)
-+{
-+//  Algorithm:
-+//
-+//    t0 = cnt & 7
-+//    cnt -= t0
-+//    p += t0
-+//    switch (t0):
-+//      switch start:
-+//      do while cnt
-+//        cnt -= 8
-+//          p[-8] = value
-+//        case 7:
-+//          p[-7] = value
-+//        case 6:
-+//          p[-6] = value
-+//          // ...
-+//        case 1:
-+//          p[-1] = value
-+//        case 0:
-+//          p += 8
-+//      do-while end
-+//    switch end
-+
-+  assert_different_registers(base, cnt, value, t0, t1);
-+
-+  Label fini, skip, entry, loop;
-+  const int unroll = 8; // Number of sd instructions we'll unroll
-+
-+  beqz(cnt, fini);
-+
-+  andi(t0, cnt, unroll - 1);
-+  sub(cnt, cnt, t0);
-+  // align 8, so first sd n % 8 = mod, next loop sd 8 * n.
-+  shadd(base, t0, base, t1, 3);
-+  la(t1, entry);
-+  slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst)
-+  sub(t1, t1, t0);
-+  jr(t1);
-+
-+  bind(loop);
-+  add(base, base, unroll * 8);
-+  for (int i = -unroll; i < 0; i++) {
-+    sd(value, Address(base, i * 8));
-+  }
-+  bind(entry);
-+  sub(cnt, cnt, unroll);
-+  bgez(cnt, loop);
-+
-+  bind(fini);
-+}
-+
-+#define FCVT_SAFE(FLOATCVT, FLOATEQ)                                                             \
-+void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) {           \
-+  Label L_Okay;                                                                                  \
-+  fscsr(zr);                                                                                     \
-+  FLOATCVT(dst, src);                                                                            \
-+  frcsr(tmp);                                                                                    \
-+  andi(tmp, tmp, 0x1E);                                                                          \
-+  beqz(tmp, L_Okay);                                                                             \
-+  FLOATEQ(tmp, src, src);                                                                        \
-+  bnez(tmp, L_Okay);                                                                             \
-+  mv(dst, zr);                                                                                   \
-+  bind(L_Okay);                                                                                  \
-+}
-+
-+FCVT_SAFE(fcvt_w_s, feq_s)
-+FCVT_SAFE(fcvt_l_s, feq_s)
-+FCVT_SAFE(fcvt_w_d, feq_d)
-+FCVT_SAFE(fcvt_l_d, feq_d)
-+
-+#undef FCVT_SAFE
-+
-+#define FCMP(FLOATTYPE, FLOATSIG)                                                       \
-+void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1,            \
-+                                         FloatRegister Rs2, int unordered_result) {     \
-+  Label Ldone;                                                                          \
-+  if (unordered_result < 0) {                                                           \
-+    /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */    \
-+    /* installs 1 if gt else 0 */                                                       \
-+    flt_##FLOATSIG(result, Rs2, Rs1);                                                   \
-+    /* Rs1 > Rs2, install 1 */                                                          \
-+    bgtz(result, Ldone);                                                                \
-+    feq_##FLOATSIG(result, Rs1, Rs2);                                                   \
-+    addi(result, result, -1);                                                           \
-+    /* Rs1 = Rs2, install 0 */                                                          \
-+    /* NaN or Rs1 < Rs2, install -1 */                                                  \
-+    bind(Ldone);                                                                        \
-+  } else {                                                                              \
-+    /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */    \
-+    /* installs 1 if gt or unordered else 0 */                                          \
-+    flt_##FLOATSIG(result, Rs1, Rs2);                                                   \
-+    /* Rs1 < Rs2, install -1 */                                                         \
-+    bgtz(result, Ldone);                                                                \
-+    feq_##FLOATSIG(result, Rs1, Rs2);                                                   \
-+    addi(result, result, -1);                                                           \
-+    /* Rs1 = Rs2, install 0 */                                                          \
-+    /* NaN or Rs1 > Rs2, install 1 */                                                   \
-+    bind(Ldone);                                                                        \
-+    neg(result, result);                                                                \
-+  }                                                                                     \
-+}
-+
-+FCMP(float, s);
-+FCMP(double, d);
-+
-+#undef FCMP
-+
-+// Zero words; len is in bytes
-+// Destroys all registers except addr
-+// len must be a nonzero multiple of wordSize
-+void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) {
-+  assert_different_registers(addr, len, tmp, t0, t1);
-+
-+#ifdef ASSERT
-+  {
-+    Label L;
-+    andi(t0, len, BytesPerWord - 1);
-+    beqz(t0, L);
-+    stop("len is not a multiple of BytesPerWord");
-+    bind(L);
-+  }
-+#endif // ASSERT
-+
-+#ifndef PRODUCT
-+  block_comment("zero memory");
-+#endif // PRODUCT
-+
-+  Label loop;
-+  Label entry;
-+
-+  // Algorithm:
-+  //
-+  //  t0 = cnt & 7
-+  //  cnt -= t0
-+  //  p += t0
-+  //  switch (t0) {
-+  //    do {
-+  //      cnt -= 8
-+  //        p[-8] = 0
-+  //      case 7:
-+  //        p[-7] = 0
-+  //      case 6:
-+  //        p[-6] = 0
-+  //        ...
-+  //      case 1:
-+  //        p[-1] = 0
-+  //      case 0:
-+  //        p += 8
-+  //     } while (cnt)
-+  //  }
-+
-+  const int unroll = 8;   // Number of sd(zr) instructions we'll unroll
-+
-+  srli(len, len, LogBytesPerWord);
-+  andi(t0, len, unroll - 1);  // t0 = cnt % unroll
-+  sub(len, len, t0);          // cnt -= unroll
-+  // tmp always points to the end of the region we're about to zero
-+  shadd(tmp, t0, addr, t1, LogBytesPerWord);
-+  la(t1, entry);
-+  slli(t0, t0, 2);
-+  sub(t1, t1, t0);
-+  jr(t1);
-+  bind(loop);
-+  sub(len, len, unroll);
-+  for (int i = -unroll; i < 0; i++) {
-+    Assembler::sd(zr, Address(tmp, i * wordSize));
-+  }
-+  bind(entry);
-+  add(tmp, tmp, unroll * wordSize);
-+  bnez(len, loop);
-+}
-+
-+// shift left by shamt and add
-+// Rd = (Rs1 << shamt) + Rs2
-+void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) {
-+  if (UseRVB) {
-+    if (shamt == 1) {
-+      sh1add(Rd, Rs1, Rs2);
-+      return;
-+    } else if (shamt == 2) {
-+      sh2add(Rd, Rs1, Rs2);
-+      return;
-+    } else if (shamt == 3) {
-+      sh3add(Rd, Rs1, Rs2);
-+      return;
-+    }
-+  }
++void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {  }
++#endif //PRODUCT
+diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
+new file mode 100644
+index 0000000000..65493eba76
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
+@@ -0,0 +1,57 @@
++/*
++ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  if (shamt != 0) {
-+    slli(tmp, Rs1, shamt);
-+    add(Rd, Rs2, tmp);
-+  } else {
-+    add(Rd, Rs1, Rs2);
-+  }
-+}
++// Platform-specific definitions for method handles.
++// These definitions are inlined into class MethodHandles.
 +
-+void MacroAssembler::zero_extend(Register dst, Register src, int bits) {
-+  if (UseRVB) {
-+    if (bits == 16) {
-+      zext_h(dst, src);
-+      return;
-+    } else if (bits == 32) {
-+      zext_w(dst, src);
-+      return;
-+    }
-+  }
++// Adapters
++enum /* platform_dependent_constants */ {
++  adapter_code_size = 32000 DEBUG_ONLY(+ 120000)
++};
 +
-+  if (bits == 8) {
-+    zext_b(dst, src);
-+  } else {
-+    slli(dst, src, XLEN - bits);
-+    srli(dst, dst, XLEN - bits);
-+  }
-+}
++public:
 +
-+void MacroAssembler::sign_extend(Register dst, Register src, int bits) {
-+  if (UseRVB) {
-+    if (bits == 8) {
-+      sext_b(dst, src);
-+      return;
-+    } else if (bits == 16) {
-+      sext_h(dst, src);
-+      return;
-+    }
-+  }
++  static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg);
 +
-+  if (bits == 32) {
-+    sext_w(dst, src);
-+  } else {
-+    slli(dst, src, XLEN - bits);
-+    srai(dst, dst, XLEN - bits);
-+  }
-+}
++  static void verify_klass(MacroAssembler* _masm,
++                           Register obj, SystemDictionary::WKID klass_id,
++                           const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
 +
-+void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp)
-+{
-+  if (src1 == src2) {
-+    mv(dst, zr);
-+    return;
-+  }
-+  Label done;
-+  Register left = src1;
-+  Register right = src2;
-+  if (dst == src1) {
-+    assert_different_registers(dst, src2, tmp);
-+    mv(tmp, src1);
-+    left = tmp;
-+  } else if (dst == src2) {
-+    assert_different_registers(dst, src1, tmp);
-+    mv(tmp, src2);
-+    right = tmp;
++  static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) {
++    verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle),
++                 "reference is a MH");
 +  }
 +
-+  // installs 1 if gt else 0
-+  slt(dst, right, left);
-+  bnez(dst, done);
-+  slt(dst, left, right);
-+  // dst = -1 if lt; else if eq , dst = 0
-+  neg(dst, dst);
-+  bind(done);
-+}
++  static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
 +
-+void MacroAssembler::safepoint_ifence() {
-+  ifence();
-+#ifndef PRODUCT
-+  if (VerifyCrossModifyFence) {
-+    // Clear the thread state.
-+    sb(zr, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset())));
-+  }
-+#endif
-+}
++  // Similar to InterpreterMacroAssembler::jump_from_interpreted.
++  // Takes care of special dispatch from single stepping too.
++  static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
++                                      bool for_compiler_entry);
 +
-+#ifndef PRODUCT
-+void MacroAssembler::verify_cross_modify_fence_not_required() {
-+  if (VerifyCrossModifyFence) {
-+    // Check if thread needs a cross modify fence.
-+    lbu(t0, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset())));
-+    Label fence_not_required;
-+    beqz(t0, fence_not_required);
-+    // If it does then fail.
-+    la(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::verify_cross_modify_fence_failure)));
-+    mv(c_rarg0, xthread);
-+    jalr(t0);
-+    bind(fence_not_required);
-+  }
-+}
-+#endif
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
++  static void jump_to_lambda_form(MacroAssembler* _masm,
++                                  Register recv, Register method_temp,
++                                  Register temp2,
++                                  bool for_compiler_entry);
+diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
 new file mode 100644
-index 00000000000..23e09475be1
+index 0000000000..27011ad128
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -0,0 +1,858 @@
++++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
+@@ -0,0 +1,417 @@
 +/*
 + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
@@ -26571,1058 +25813,1014 @@ index 00000000000..23e09475be1
 + *
 + */
 +
-+#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP
-+#define CPU_RISCV_MACROASSEMBLER_RISCV_HPP
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/compiledIC.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_riscv.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.hpp"
++#include "runtime/orderAccess.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/ostream.hpp"
++#ifdef COMPILER1
++#include "c1/c1_Runtime1.hpp"
++#endif
 +
-+#include "asm/assembler.hpp"
-+#include "metaprogramming/enableIf.hpp"
-+#include "oops/compressedOops.hpp"
-+#include "utilities/powerOfTwo.hpp"
++Register NativeInstruction::extract_rs1(address instr) {
++  assert_cond(instr != NULL);
++  return as_Register(Assembler::extract(((unsigned*)instr)[0], 19, 15));
++}
 +
-+// MacroAssembler extends Assembler by frequently used macros.
-+//
-+// Instructions for which a 'better' code sequence exists depending
-+// on arguments should also go in here.
++Register NativeInstruction::extract_rs2(address instr) {
++  assert_cond(instr != NULL);
++  return as_Register(Assembler::extract(((unsigned*)instr)[0], 24, 20));
++}
 +
-+class MacroAssembler: public Assembler {
++Register NativeInstruction::extract_rd(address instr) {
++  assert_cond(instr != NULL);
++  return as_Register(Assembler::extract(((unsigned*)instr)[0], 11, 7));
++}
 +
-+ public:
-+  MacroAssembler(CodeBuffer* code) : Assembler(code) {
-+  }
-+  virtual ~MacroAssembler() {}
++uint32_t NativeInstruction::extract_opcode(address instr) {
++  assert_cond(instr != NULL);
++  return Assembler::extract(((unsigned*)instr)[0], 6, 0);
++}
 +
-+  void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod);
++uint32_t NativeInstruction::extract_funct3(address instr) {
++  assert_cond(instr != NULL);
++  return Assembler::extract(((unsigned*)instr)[0], 14, 12);
++}
 +
-+  // Place a fence.i after code may have been modified due to a safepoint.
-+  void safepoint_ifence();
++bool NativeInstruction::is_pc_relative_at(address instr) {
++  // auipc + jalr
++  // auipc + addi
++  // auipc + load
++  // auipc + fload_load
++  return (is_auipc_at(instr)) &&
++         (is_addi_at(instr + instruction_size) ||
++          is_jalr_at(instr + instruction_size) ||
++          is_load_at(instr + instruction_size) ||
++          is_float_load_at(instr + instruction_size)) &&
++         check_pc_relative_data_dependency(instr);
++}
 +
-+  // Alignment
-+  void align(int modulus, int extra_offset = 0);
++// ie:ld(Rd, Label)
++bool NativeInstruction::is_load_pc_relative_at(address instr) {
++  return is_auipc_at(instr) && // auipc
++         is_ld_at(instr + instruction_size) && // ld
++         check_load_pc_relative_data_dependency(instr);
++}
 +
-+  // Stack frame creation/removal
-+  // Note that SP must be updated to the right place before saving/restoring RA and FP
-+  // because signal based thread suspend/resume could happen asynchronously.
-+  void enter() {
-+    addi(sp, sp, - 2 * wordSize);
-+    sd(ra, Address(sp, wordSize));
-+    sd(fp, Address(sp));
-+    addi(fp, sp, 2 * wordSize);
-+  }
++bool NativeInstruction::is_movptr_at(address instr) {
++  return is_lui_at(instr) && // Lui
++         is_addi_at(instr + instruction_size) && // Addi
++         is_slli_shift_at(instr + instruction_size * 2, 11) && // Slli Rd, Rs, 11
++         is_addi_at(instr + instruction_size * 3) && // Addi
++         is_slli_shift_at(instr + instruction_size * 4, 6) && // Slli Rd, Rs, 6
++         (is_addi_at(instr + instruction_size * 5) ||
++          is_jalr_at(instr + instruction_size * 5) ||
++          is_load_at(instr + instruction_size * 5)) && // Addi/Jalr/Load
++         check_movptr_data_dependency(instr);
++}
 +
-+  void leave() {
-+    addi(sp, fp, - 2 * wordSize);
-+    ld(fp, Address(sp));
-+    ld(ra, Address(sp, wordSize));
-+    addi(sp, sp, 2 * wordSize);
-+  }
++bool NativeInstruction::is_li32_at(address instr) {
++  return is_lui_at(instr) && // lui
++         is_addiw_at(instr + instruction_size) && // addiw
++         check_li32_data_dependency(instr);
++}
 +
++bool NativeInstruction::is_li64_at(address instr) {
++  return is_lui_at(instr) && // lui
++         is_addi_at(instr + instruction_size) && // addi
++         is_slli_shift_at(instr + instruction_size * 2, 12) &&  // Slli Rd, Rs, 12
++         is_addi_at(instr + instruction_size * 3) && // addi
++         is_slli_shift_at(instr + instruction_size * 4, 12) &&  // Slli Rd, Rs, 12
++         is_addi_at(instr + instruction_size * 5) && // addi
++         is_slli_shift_at(instr + instruction_size * 6, 8) &&   // Slli Rd, Rs, 8
++         is_addi_at(instr + instruction_size * 7) && // addi
++         check_li64_data_dependency(instr);
++}
 +
-+  // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
-+  // The pointer will be loaded into the thread register.
-+  void get_thread(Register thread);
++void NativeCall::verify() {
++  assert(NativeCall::is_call_at((address)this), "unexpected code at call site");
++}
 +
-+  // Support for VM calls
-+  //
-+  // It is imperative that all calls into the VM are handled via the call_VM macros.
-+  // They make sure that the stack linkage is setup correctly. call_VM's correspond
-+  // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
++address NativeCall::destination() const {
++  address addr = (address)this;
++  assert(NativeInstruction::is_jal_at(instruction_address()), "inst must be jal.");
++  address destination = MacroAssembler::target_addr_for_insn(instruction_address());
 +
-+  void call_VM(Register oop_result,
-+               address entry_point,
-+               bool check_exceptions = true);
-+  void call_VM(Register oop_result,
-+               address entry_point,
-+               Register arg_1,
-+               bool check_exceptions = true);
-+  void call_VM(Register oop_result,
-+               address entry_point,
-+               Register arg_1, Register arg_2,
-+               bool check_exceptions = true);
-+  void call_VM(Register oop_result,
-+               address entry_point,
-+               Register arg_1, Register arg_2, Register arg_3,
-+               bool check_exceptions = true);
++  // Do we use a trampoline stub for this call?
++  CodeBlob* cb = CodeCache::find_blob_unsafe(addr);   // Else we get assertion if nmethod is zombie.
++  assert(cb && cb->is_nmethod(), "sanity");
++  nmethod *nm = (nmethod *)cb;
++  if (nm != NULL && nm->stub_contains(destination) && is_NativeCallTrampolineStub_at(destination)) {
++    // Yes we do, so get the destination from the trampoline stub.
++    const address trampoline_stub_addr = destination;
++    destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination();
++  }
 +
-+  // Overloadings with last_Java_sp
-+  void call_VM(Register oop_result,
-+               Register last_java_sp,
-+               address entry_point,
-+               int number_of_arguments = 0,
-+               bool check_exceptions = true);
-+  void call_VM(Register oop_result,
-+               Register last_java_sp,
-+               address entry_point,
-+               Register arg_1,
-+               bool check_exceptions = true);
-+  void call_VM(Register oop_result,
-+               Register last_java_sp,
-+               address entry_point,
-+               Register arg_1, Register arg_2,
-+               bool check_exceptions = true);
-+  void call_VM(Register oop_result,
-+               Register last_java_sp,
-+               address entry_point,
-+               Register arg_1, Register arg_2, Register arg_3,
-+               bool check_exceptions = true);
++  return destination;
++}
 +
-+  void get_vm_result(Register oop_result, Register java_thread);
-+  void get_vm_result_2(Register metadata_result, Register java_thread);
++// Similar to replace_mt_safe, but just changes the destination. The
++// important thing is that free-running threads are able to execute this
++// call instruction at all times.
++//
++// Used in the runtime linkage of calls; see class CompiledIC.
++//
++// Add parameter assert_lock to switch off assertion
++// during code generation, where no patching lock is needed.
++void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
++  assert(!assert_lock ||
++         (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()),
++         "concurrent code patching");
 +
-+  // These always tightly bind to MacroAssembler::call_VM_leaf_base
-+  // bypassing the virtual implementation
-+  void call_VM_leaf(address entry_point,
-+                    int number_of_arguments = 0);
-+  void call_VM_leaf(address entry_point,
-+                    Register arg_0);
-+  void call_VM_leaf(address entry_point,
-+                    Register arg_0, Register arg_1);
-+  void call_VM_leaf(address entry_point,
-+                    Register arg_0, Register arg_1, Register arg_2);
++  ResourceMark rm;
++  address addr_call = addr_at(0);
++  assert(NativeCall::is_call_at(addr_call), "unexpected code at call site");
 +
-+  // These always tightly bind to MacroAssembler::call_VM_base
-+  // bypassing the virtual implementation
-+  void super_call_VM_leaf(address entry_point, Register arg_0);
-+  void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1);
-+  void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2);
-+  void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3);
++  // Patch the constant in the call's trampoline stub.
++  address trampoline_stub_addr = get_trampoline();
++  if (trampoline_stub_addr != NULL) {
++    assert (!is_NativeCallTrampolineStub_at(dest), "chained trampolines");
++    nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest);
++  }
 +
-+  // last Java Frame (fills frame anchor)
-+  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp);
-+  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp);
-+  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register tmp);
++  // Patch the call.
++  if (Assembler::reachable_from_branch_at(addr_call, dest)) {
++    set_destination(dest);
++  } else {
++    assert (trampoline_stub_addr != NULL, "we need a trampoline");
++    set_destination(trampoline_stub_addr);
++  }
 +
-+  // thread in the default location (xthread)
-+  void reset_last_Java_frame(bool clear_fp);
++  ICache::invalidate_range(addr_call, instruction_size);
++}
 +
-+  void call_native(address entry_point,
-+                   Register arg_0);
-+  void call_native_base(
-+    address entry_point,                // the entry point
-+    Label*  retaddr = NULL
-+  );
++address NativeCall::get_trampoline() {
++  address call_addr = addr_at(0);
 +
-+  virtual void call_VM_leaf_base(
-+    address entry_point,                // the entry point
-+    int     number_of_arguments,        // the number of arguments to pop after the call
-+    Label*  retaddr = NULL
-+  );
++  CodeBlob *code = CodeCache::find_blob(call_addr);
++  assert(code != NULL, "Could not find the containing code blob");
 +
-+  virtual void call_VM_leaf_base(
-+    address entry_point,                // the entry point
-+    int     number_of_arguments,        // the number of arguments to pop after the call
-+    Label&  retaddr) {
-+    call_VM_leaf_base(entry_point, number_of_arguments, &retaddr);
++  address jal_destination = MacroAssembler::pd_call_destination(call_addr);
++  if (code != NULL && code->contains(jal_destination) && is_NativeCallTrampolineStub_at(jal_destination)) {
++    return jal_destination;
 +  }
 +
-+  virtual void call_VM_base(           // returns the register containing the thread upon return
-+    Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
-+    Register java_thread,              // the thread if computed before     ; use noreg otherwise
-+    Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
-+    address  entry_point,              // the entry point
-+    int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
-+    bool     check_exceptions          // whether to check for pending exceptions after return
-+  );
++  if (code != NULL && code->is_nmethod()) {
++    return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code);
++  }
 +
-+  void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions);
++  return NULL;
++}
 +
-+  virtual void check_and_handle_earlyret(Register java_thread);
-+  virtual void check_and_handle_popframe(Register java_thread);
++// Inserts a native call instruction at a given pc
++void NativeCall::insert(address code_pos, address entry) { Unimplemented(); }
 +
-+  void resolve_weak_handle(Register result, Register tmp);
-+  void resolve_oop_handle(Register result, Register tmp = x15);
-+  void resolve_jobject(Register value, Register thread, Register tmp);
++//-------------------------------------------------------------------
 +
-+  void movoop(Register dst, jobject obj, bool immediate = false);
-+  void mov_metadata(Register dst, Metadata* obj);
-+  void bang_stack_size(Register size, Register tmp);
-+  void set_narrow_oop(Register dst, jobject obj);
-+  void set_narrow_klass(Register dst, Klass* k);
++void NativeMovConstReg::verify() {
++  if (!(nativeInstruction_at(instruction_address())->is_movptr() ||
++        is_auipc_at(instruction_address()))) {
++    fatal("should be MOVPTR or AUIPC");
++  }
++}
 +
-+  void load_mirror(Register dst, Register method, Register tmp = x15);
-+  void access_load_at(BasicType type, DecoratorSet decorators, Register dst,
-+                      Address src, Register tmp1, Register thread_tmp);
-+  void access_store_at(BasicType type, DecoratorSet decorators, Address dst,
-+                       Register src, Register tmp1, Register thread_tmp);
-+  void load_klass(Register dst, Register src);
-+  void store_klass(Register dst, Register src);
-+  void cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L);
++intptr_t NativeMovConstReg::data() const {
++  address addr = MacroAssembler::target_addr_for_insn(instruction_address());
++  if (maybe_cpool_ref(instruction_address())) {
++    return *(intptr_t*)addr;
++  } else {
++    return (intptr_t)addr;
++  }
++}
 +
-+  void encode_klass_not_null(Register r);
-+  void decode_klass_not_null(Register r);
-+  void encode_klass_not_null(Register dst, Register src, Register tmp = xheapbase);
-+  void decode_klass_not_null(Register dst, Register src, Register tmp = xheapbase);
-+  void decode_heap_oop_not_null(Register r);
-+  void decode_heap_oop_not_null(Register dst, Register src);
-+  void decode_heap_oop(Register d, Register s);
-+  void decode_heap_oop(Register r) { decode_heap_oop(r, r); }
-+  void encode_heap_oop(Register d, Register s);
-+  void encode_heap_oop(Register r) { encode_heap_oop(r, r); };
-+  void load_heap_oop(Register dst, Address src, Register tmp1 = noreg,
-+                     Register thread_tmp = noreg, DecoratorSet decorators = 0);
-+  void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
-+                              Register thread_tmp = noreg, DecoratorSet decorators = 0);
-+  void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
-+                      Register thread_tmp = noreg, DecoratorSet decorators = 0);
++void NativeMovConstReg::set_data(intptr_t x) {
++  if (maybe_cpool_ref(instruction_address())) {
++    address addr = MacroAssembler::target_addr_for_insn(instruction_address());
++    *(intptr_t*)addr = x;
++  } else {
++    // Store x into the instruction stream.
++    MacroAssembler::pd_patch_instruction_size(instruction_address(), (address)x);
++    ICache::invalidate_range(instruction_address(), movptr_instruction_size);
++  }
 +
-+  void store_klass_gap(Register dst, Register src);
++  // Find and replace the oop/metadata corresponding to this
++  // instruction in oops section.
++  CodeBlob* cb = CodeCache::find_blob(instruction_address());
++  nmethod* nm = cb->as_nmethod_or_null();
++  if (nm != NULL) {
++    RelocIterator iter(nm, instruction_address(), next_instruction_address());
++    while (iter.next()) {
++      if (iter.type() == relocInfo::oop_type) {
++        oop* oop_addr = iter.oop_reloc()->oop_addr();
++        *oop_addr = cast_to_oop(x);
++        break;
++      } else if (iter.type() == relocInfo::metadata_type) {
++        Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr();
++        *metadata_addr = (Metadata*)x;
++        break;
++      }
++    }
++  }
++}
 +
-+  // currently unimplemented
-+  // Used for storing NULL. All other oop constants should be
-+  // stored using routines that take a jobject.
-+  void store_heap_oop_null(Address dst);
++void NativeMovConstReg::print() {
++  tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT,
++                p2i(instruction_address()), data());
++}
 +
-+  // This dummy is to prevent a call to store_heap_oop from
-+  // converting a zero (linke NULL) into a Register by giving
-+  // the compiler two choices it can't resolve
++//-------------------------------------------------------------------
 +
-+  void store_heap_oop(Address dst, void* dummy);
++int NativeMovRegMem::offset() const  {
++  Unimplemented();
++  return 0;
++}
 +
-+  // Support for NULL-checks
-+  //
-+  // Generates code that causes a NULL OS exception if the content of reg is NULL.
-+  // If the accessed location is M[reg + offset] and the offset is known, provide the
-+  // offset. No explicit code generateion is needed if the offset is within a certain
-+  // range (0 <= offset <= page_size).
++void NativeMovRegMem::set_offset(int x) { Unimplemented(); }
 +
-+  virtual void null_check(Register reg, int offset = -1);
-+  static bool needs_explicit_null_check(intptr_t offset);
-+  static bool uses_implicit_null_check(void* address);
++void NativeMovRegMem::verify() {
++  Unimplemented();
++}
 +
-+  // idiv variant which deals with MINLONG as dividend and -1 as divisor
-+  int corrected_idivl(Register result, Register rs1, Register rs2,
-+                      bool want_remainder);
-+  int corrected_idivq(Register result, Register rs1, Register rs2,
-+                      bool want_remainder);
++//--------------------------------------------------------------------------------
 +
-+  // interface method calling
-+  void lookup_interface_method(Register recv_klass,
-+                               Register intf_klass,
-+                               RegisterOrConstant itable_index,
-+                               Register method_result,
-+                               Register scan_tmp,
-+                               Label& no_such_interface,
-+                               bool return_method = true);
++void NativeJump::verify() { }
 +
-+  // virtual method calling
-+  // n.n. x86 allows RegisterOrConstant for vtable_index
-+  void lookup_virtual_method(Register recv_klass,
-+                             RegisterOrConstant vtable_index,
-+                             Register method_result);
 +
-+  // Form an addres from base + offset in Rd. Rd my or may not
-+  // actually be used: you must use the Address that is returned. It
-+  // is up to you to ensure that the shift provided mathces the size
-+  // of your data.
-+  Address form_address(Register Rd, Register base, long byte_offset);
++void NativeJump::check_verified_entry_alignment(address entry, address verified_entry) {
++}
 +
-+  // allocation
-+  void tlab_allocate(
-+    Register obj,                   // result: pointer to object after successful allocation
-+    Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
-+    int      con_size_in_bytes,     // object size in bytes if   known at compile time
-+    Register tmp1,                  // temp register
-+    Register tmp2,                  // temp register
-+    Label&   slow_case,             // continuation point of fast allocation fails
-+    bool is_far = false
-+  );
 +
-+  void eden_allocate(
-+    Register obj,                   // result: pointer to object after successful allocation
-+    Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
-+    int      con_size_in_bytes,     // object size in bytes if   known at compile time
-+    Register tmp,                   // temp register
-+    Label&   slow_case,             // continuation point if fast allocation fails
-+    bool is_far = false
-+  );
++address NativeJump::jump_destination() const {
++  address dest = MacroAssembler::target_addr_for_insn(instruction_address());
 +
-+  // Test sub_klass against super_klass, with fast and slow paths.
++  // We use jump to self as the unresolved address which the inline
++  // cache code (and relocs) know about
 +
-+  // The fast path produces a tri-state answer: yes / no / maybe-slow.
-+  // One of the three labels can be NULL, meaning take the fall-through.
-+  // If super_check_offset is -1, the value is loaded up from super_klass.
-+  // No registers are killed, except tmp_reg
-+  void check_klass_subtype_fast_path(Register sub_klass,
-+                                     Register super_klass,
-+                                     Register tmp_reg,
-+                                     Label* L_success,
-+                                     Label* L_failure,
-+                                     Label* L_slow_path,
-+                                     Register super_check_offset = noreg);
++  // return -1 if jump to self
++  dest = (dest == (address) this) ? (address) -1 : dest;
++  return dest;
++};
 +
-+  // The reset of the type cehck; must be wired to a corresponding fast path.
-+  // It does not repeat the fast path logic, so don't use it standalone.
-+  // The tmp1_reg and tmp2_reg can be noreg, if no temps are avaliable.
-+  // Updates the sub's secondary super cache as necessary.
-+  void check_klass_subtype_slow_path(Register sub_klass,
-+                                     Register super_klass,
-+                                     Register tmp1_reg,
-+                                     Register tmp2_reg,
-+                                     Label* L_success,
-+                                     Label* L_failure);
++void NativeJump::set_jump_destination(address dest) {
++  // We use jump to self as the unresolved address which the inline
++  // cache code (and relocs) know about
++  if (dest == (address) -1)
++    dest = instruction_address();
 +
-+  void check_klass_subtype(Register sub_klass,
-+                           Register super_klass,
-+                           Register tmp_reg,
-+                           Label& L_success);
++  MacroAssembler::pd_patch_instruction(instruction_address(), dest);
++  ICache::invalidate_range(instruction_address(), instruction_size);
++}
 +
-+  Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
++//-------------------------------------------------------------------
 +
-+  // only if +VerifyOops
-+  void verify_oop(Register reg, const char* s = "broken oop");
-+  void verify_oop_addr(Address addr, const char* s = "broken oop addr");
++address NativeGeneralJump::jump_destination() const {
++  NativeMovConstReg* move = nativeMovConstReg_at(instruction_address());
++  address dest = (address) move->data();
 +
-+  void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {}
-+  void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {}
++  // We use jump to self as the unresolved address which the inline
++  // cache code (and relocs) know about
 +
-+#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
-+#define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
++  // return -1 if jump to self
++  dest = (dest == (address) this) ? (address) -1 : dest;
++  return dest;
++}
 +
-+  // A more convenient access to fence for our purposes
-+  // We used four bit to indicate the read and write bits in the predecessors and successors,
-+  // and extended i for r, o for w if UseConservativeFence enabled.
-+  enum Membar_mask_bits {
-+    StoreStore = 0b0101,               // (pred = ow   + succ =   ow)
-+    LoadStore  = 0b1001,               // (pred = ir   + succ =   ow)
-+    StoreLoad  = 0b0110,               // (pred = ow   + succ =   ir)
-+    LoadLoad   = 0b1010,               // (pred = ir   + succ =   ir)
-+    AnyAny     = LoadStore | StoreLoad // (pred = iorw + succ = iorw)
-+  };
++//-------------------------------------------------------------------
 +
-+  void membar(uint32_t order_constraint);
++bool NativeInstruction::is_safepoint_poll() {
++  return is_lwu_to_zr(address(this));
++}
 +
-+  static void membar_mask_to_pred_succ(uint32_t order_constraint, uint32_t& predecessor, uint32_t& successor) {
-+    predecessor = (order_constraint >> 2) & 0x3;
-+    successor = order_constraint & 0x3;
++bool NativeInstruction::is_lwu_to_zr(address instr) {
++  assert_cond(instr != NULL);
++  return (extract_opcode(instr) == 0b0000011 &&
++          extract_funct3(instr) == 0b110 &&
++          extract_rd(instr) == zr);         // zr
++}
 +
-+    // extend rw -> iorw:
-+    // 01(w) -> 0101(ow)
-+    // 10(r) -> 1010(ir)
-+    // 11(rw)-> 1111(iorw)
-+    if (UseConservativeFence) {
-+      predecessor |= predecessor << 2;
-+      successor |= successor << 2;
-+    }
-+  }
++// A 16-bit instruction with all bits ones is permanently reserved as an illegal instruction.
++bool NativeInstruction::is_sigill_zombie_not_entrant() {
++  // jvmci
++  return uint_at(0) == 0xffffffff;
++}
 +
-+  static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) {
-+    return ((predecessor & 0x3) << 2) | (successor & 0x3);
-+  }
++void NativeIllegalInstruction::insert(address code_pos) {
++  assert_cond(code_pos != NULL);
++  *(juint*)code_pos = 0xffffffff; // all bits ones is permanently reserved as an illegal instruction
++}
 +
-+  // prints msg, dumps registers and stops execution
-+  void stop(const char* msg);
++bool NativeInstruction::is_stop() {
++  return uint_at(0) == 0xffffffff; // an illegal instruction
++}
 +
-+  static void debug64(char* msg, int64_t pc, int64_t regs[]);
++//-------------------------------------------------------------------
 +
-+  void unimplemented(const char* what = "");
++// MT-safe inserting of a jump over a jump or a nop (used by
++// nmethod::make_not_entrant_or_zombie)
 +
-+  void should_not_reach_here() { stop("should not reach here"); }
++void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
 +
-+  static address target_addr_for_insn(address insn_addr);
++  assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch");
 +
-+  // Required platform-specific helpers for Label::patch_instructions.
-+  // They _shadow_ the declarations in AbstractAssembler, which are undefined.
-+  static int pd_patch_instruction_size(address branch, address target);
-+  static void pd_patch_instruction(address branch, address target, const char* file = NULL, int line = 0) {
-+    pd_patch_instruction_size(branch, target);
-+  }
-+  static address pd_call_destination(address branch) {
-+    return target_addr_for_insn(branch);
++  assert(nativeInstruction_at(verified_entry)->is_jump_or_nop() ||
++         nativeInstruction_at(verified_entry)->is_sigill_zombie_not_entrant(),
++         "riscv cannot replace non-jump with jump");
++
++  // Patch this nmethod atomically.
++  if (Assembler::reachable_from_branch_at(verified_entry, dest)) {
++    ptrdiff_t offset = dest - verified_entry;
++    guarantee(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction."); // 1M
++
++    uint32_t insn = 0;
++    address pInsn = (address)&insn;
++    Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1);
++    Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff);
++    Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1);
++    Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff);
++    Assembler::patch(pInsn, 11, 7, 0); // zero, no link jump
++    Assembler::patch(pInsn, 6, 0, 0b1101111); // j, (jal x0 offset)
++    *(unsigned int*)verified_entry = insn;
++  } else {
++    // We use an illegal instruction for marking a method as
++    // not_entrant or zombie.
++    NativeIllegalInstruction::insert(verified_entry);
 +  }
 +
-+  static int patch_oop(address insn_addr, address o);
-+  address emit_trampoline_stub(int insts_call_instruction_offset, address target);
-+  void emit_static_call_stub();
++  ICache::invalidate_range(verified_entry, instruction_size);
++}
 +
-+  // The following 4 methods return the offset of the appropriate move instruction
++void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
++  CodeBuffer cb(code_pos, instruction_size);
++  MacroAssembler a(&cb);
 +
-+  // Support for fast byte/short loading with zero extension (depending on particular CPU)
-+  int load_unsigned_byte(Register dst, Address src);
-+  int load_unsigned_short(Register dst, Address src);
++  int32_t offset = 0;
++  a.movptr_with_offset(t0, entry, offset); // lui, addi, slli, addi, slli
++  a.jalr(x0, t0, offset); // jalr
 +
-+  // Support for fast byte/short loading with sign extension (depending on particular CPU)
-+  int load_signed_byte(Register dst, Address src);
-+  int load_signed_short(Register dst, Address src);
++  ICache::invalidate_range(code_pos, instruction_size);
++}
 +
-+  // Load and store values by size and signed-ness
-+  void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
-+  void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
++// MT-safe patching of a long jump instruction.
++void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
++  ShouldNotCallThis();
++}
 +
-+ public:
-+  // Standard pseudoinstruction
-+  void nop();
-+  void mv(Register Rd, Register Rs);
-+  void notr(Register Rd, Register Rs);
-+  void neg(Register Rd, Register Rs);
-+  void negw(Register Rd, Register Rs);
-+  void sext_w(Register Rd, Register Rs);
-+  void zext_b(Register Rd, Register Rs);
-+  void seqz(Register Rd, Register Rs);          // set if = zero
-+  void snez(Register Rd, Register Rs);          // set if != zero
-+  void sltz(Register Rd, Register Rs);          // set if < zero
-+  void sgtz(Register Rd, Register Rs);          // set if > zero
 +
-+  // Float pseudoinstruction
-+  void fmv_s(FloatRegister Rd, FloatRegister Rs);
-+  void fabs_s(FloatRegister Rd, FloatRegister Rs);    // single-precision absolute value
-+  void fneg_s(FloatRegister Rd, FloatRegister Rs);
++address NativeCallTrampolineStub::destination(nmethod *nm) const {
++  return ptr_at(data_offset);
++}
 +
-+  // Double pseudoinstruction
-+  void fmv_d(FloatRegister Rd, FloatRegister Rs);
-+  void fabs_d(FloatRegister Rd, FloatRegister Rs);
-+  void fneg_d(FloatRegister Rd, FloatRegister Rs);
++void NativeCallTrampolineStub::set_destination(address new_destination) {
++  set_ptr_at(data_offset, new_destination);
++  OrderAccess::release();
++}
 +
-+  // Pseudoinstruction for control and status register
-+  void rdinstret(Register Rd);                  // read instruction-retired counter
-+  void rdcycle(Register Rd);                    // read cycle counter
-+  void rdtime(Register Rd);                     // read time
-+  void csrr(Register Rd, unsigned csr);         // read csr
-+  void csrw(unsigned csr, Register Rs);         // write csr
-+  void csrs(unsigned csr, Register Rs);         // set bits in csr
-+  void csrc(unsigned csr, Register Rs);         // clear bits in csr
-+  void csrwi(unsigned csr, unsigned imm);
-+  void csrsi(unsigned csr, unsigned imm);
-+  void csrci(unsigned csr, unsigned imm);
-+  void frcsr(Register Rd);                      // read float-point csr
-+  void fscsr(Register Rd, Register Rs);         // swap float-point csr
-+  void fscsr(Register Rs);                      // write float-point csr
-+  void frrm(Register Rd);                       // read float-point rounding mode
-+  void fsrm(Register Rd, Register Rs);          // swap float-point rounding mode
-+  void fsrm(Register Rs);                       // write float-point rounding mode
-+  void fsrmi(Register Rd, unsigned imm);
-+  void fsrmi(unsigned imm);
-+  void frflags(Register Rd);                    // read float-point exception flags
-+  void fsflags(Register Rd, Register Rs);       // swap float-point exception flags
-+  void fsflags(Register Rs);                    // write float-point exception flags
-+  void fsflagsi(Register Rd, unsigned imm);
-+  void fsflagsi(unsigned imm);
-+
-+  void beqz(Register Rs, const address &dest);
-+  void bnez(Register Rs, const address &dest);
-+  void blez(Register Rs, const address &dest);
-+  void bgez(Register Rs, const address &dest);
-+  void bltz(Register Rs, const address &dest);
-+  void bgtz(Register Rs, const address &dest);
-+  void la(Register Rd, Label &label);
-+  void la(Register Rd, const address &dest);
-+  void la(Register Rd, const Address &adr);
-+  //label
-+  void beqz(Register Rs, Label &l, bool is_far = false);
-+  void bnez(Register Rs, Label &l, bool is_far = false);
-+  void blez(Register Rs, Label &l, bool is_far = false);
-+  void bgez(Register Rs, Label &l, bool is_far = false);
-+  void bltz(Register Rs, Label &l, bool is_far = false);
-+  void bgtz(Register Rs, Label &l, bool is_far = false);
-+  void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+
-+  void push_reg(RegSet regs, Register stack) { if (regs.bits()) { push_reg(regs.bits(), stack); } }
-+  void pop_reg(RegSet regs, Register stack) { if (regs.bits()) { pop_reg(regs.bits(), stack); } }
-+  void push_reg(Register Rs);
-+  void pop_reg(Register Rd);
-+  int  push_reg(unsigned int bitset, Register stack);
-+  int  pop_reg(unsigned int bitset, Register stack);
-+  void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
-+  void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); }
-+#ifdef COMPILER2
-+  void push_vp(VectorRegSet regs, Register stack) { if (regs.bits()) push_vp(regs.bits(), stack); }
-+  void pop_vp(VectorRegSet regs, Register stack) { if (regs.bits()) pop_vp(regs.bits(), stack); }
-+#endif // COMPILER2
++uint32_t NativeMembar::get_kind() {
++  uint32_t insn = uint_at(0);
 +
-+  // Push and pop everything that might be clobbered by a native
-+  // runtime call except t0 and t1. (They are always
-+  // temporary registers, so we don't have to protect them.)
-+  // Additional registers can be excluded in a passed RegSet.
-+  void push_call_clobbered_registers_except(RegSet exclude);
-+  void pop_call_clobbered_registers_except(RegSet exclude);
++  uint32_t predecessor = Assembler::extract(insn, 27, 24);
++  uint32_t successor = Assembler::extract(insn, 23, 20);
 +
-+  void push_call_clobbered_registers() {
-+    push_call_clobbered_registers_except(RegSet());
-+  }
-+  void pop_call_clobbered_registers() {
-+    pop_call_clobbered_registers_except(RegSet());
-+  }
++  return MacroAssembler::pred_succ_to_membar_mask(predecessor, successor);
++}
 +
-+  void pusha();
-+  void popa();
-+  void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0);
-+  void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0);
++void NativeMembar::set_kind(uint32_t order_kind) {
++  uint32_t predecessor = 0;
++  uint32_t successor = 0;
 +
-+  // if heap base register is used - reinit it with the correct value
-+  void reinit_heapbase();
++  MacroAssembler::membar_mask_to_pred_succ(order_kind, predecessor, successor);
 +
-+  void bind(Label& L) {
-+    Assembler::bind(L);
-+    // fences across basic blocks should not be merged
-+    code()->clear_last_insn();
-+  }
++  uint32_t insn = uint_at(0);
++  address pInsn = (address) &insn;
++  Assembler::patch(pInsn, 27, 24, predecessor);
++  Assembler::patch(pInsn, 23, 20, successor);
 +
-+  // mv
-+  template<typename T, ENABLE_IF(std::is_integral<T>::value)>
-+  inline void mv(Register Rd, T o) {
-+    li(Rd, (int64_t)o);
-+  }
++  address membar = addr_at(0);
++  *(unsigned int*) membar = insn;
++}
+diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
+new file mode 100644
+index 0000000000..2e5c84ee3b
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
+@@ -0,0 +1,555 @@
++/*
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); }
++#ifndef CPU_RISCV_NATIVEINST_RISCV_HPP
++#define CPU_RISCV_NATIVEINST_RISCV_HPP
 +
-+  void mv(Register Rd, Address dest);
-+  void mv(Register Rd, address addr);
-+  void mv(Register Rd, RegisterOrConstant src);
++#include "asm/assembler.hpp"
++#include "runtime/icache.hpp"
++#include "runtime/os.hpp"
 +
-+  // logic
-+  void andrw(Register Rd, Register Rs1, Register Rs2);
-+  void orrw(Register Rd, Register Rs1, Register Rs2);
-+  void xorrw(Register Rd, Register Rs1, Register Rs2);
++// We have interfaces for the following instructions:
++// - NativeInstruction
++// - - NativeCall
++// - - NativeMovConstReg
++// - - NativeMovRegMem
++// - - NativeJump
++// - - NativeGeneralJump
++// - - NativeIllegalInstruction
++// - - NativeCallTrampolineStub
++// - - NativeMembar
 +
-+  // revb
-+  void revb_h_h(Register Rd, Register Rs, Register tmp = t0);                           // reverse bytes in halfword in lower 16 bits, sign-extend
-+  void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);      // reverse bytes in lower word, sign-extend
-+  void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0);                         // reverse bytes in halfword in lower 16 bits, zero-extend
-+  void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);    // reverse bytes in halfwords in lower 32 bits, zero-extend
-+  void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);  // reverse bytes in upper 16 bits (48:63) and move to lower
-+  void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);         // reverse bytes in each halfword
-+  void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);         // reverse bytes in each word
-+  void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);          // reverse bytes in doubleword
++// The base class for different kinds of native instruction abstractions.
++// Provides the primitive operations to manipulate code relative to this.
 +
-+  void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0);
-+  void andi(Register Rd, Register Rn, int64_t imm, Register tmp = t0);
-+  void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1);
++class NativeCall;
 +
-+  void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail);
-+  void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail);
-+  void cmpxchg(Register addr, Register expected,
-+               Register new_val,
-+               enum operand_size size,
-+               Assembler::Aqrl acquire, Assembler::Aqrl release,
-+               Register result, bool result_as_bool = false);
-+  void cmpxchg_weak(Register addr, Register expected,
-+                    Register new_val,
-+                    enum operand_size size,
-+                    Assembler::Aqrl acquire, Assembler::Aqrl release,
-+                    Register result);
-+  void cmpxchg_narrow_value_helper(Register addr, Register expected,
-+                                   Register new_val,
-+                                   enum operand_size size,
-+                                   Register tmp1, Register tmp2, Register tmp3);
-+  void cmpxchg_narrow_value(Register addr, Register expected,
-+                            Register new_val,
-+                            enum operand_size size,
-+                            Assembler::Aqrl acquire, Assembler::Aqrl release,
-+                            Register result, bool result_as_bool,
-+                            Register tmp1, Register tmp2, Register tmp3);
-+  void weak_cmpxchg_narrow_value(Register addr, Register expected,
-+                                 Register new_val,
-+                                 enum operand_size size,
-+                                 Assembler::Aqrl acquire, Assembler::Aqrl release,
-+                                 Register result,
-+                                 Register tmp1, Register tmp2, Register tmp3);
++class NativeInstruction {
++  friend class Relocation;
++  friend bool is_NativeCallTrampolineStub_at(address);
++ public:
++  enum {
++    instruction_size = 4,
++    compressed_instruction_size = 2,
++  };
 +
-+  void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
-+  void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
-+  void atomic_addal(Register prev, RegisterOrConstant incr, Register addr);
-+  void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr);
++  juint encoding() const {
++    return uint_at(0);
++  }
 +
-+  void atomic_xchg(Register prev, Register newv, Register addr);
-+  void atomic_xchgw(Register prev, Register newv, Register addr);
-+  void atomic_xchgal(Register prev, Register newv, Register addr);
-+  void atomic_xchgalw(Register prev, Register newv, Register addr);
-+  void atomic_xchgwu(Register prev, Register newv, Register addr);
-+  void atomic_xchgalwu(Register prev, Register newv, Register addr);
++  bool is_jal()                             const { return is_jal_at(addr_at(0));         }
++  bool is_movptr()                          const { return is_movptr_at(addr_at(0));      }
++  bool is_call()                            const { return is_call_at(addr_at(0));        }
++  bool is_jump()                            const { return is_jump_at(addr_at(0));        }
 +
-+  static bool far_branches() {
-+    return ReservedCodeCacheSize > branch_range;
++  static bool is_jal_at(address instr)        { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1101111; }
++  static bool is_jalr_at(address instr)       { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100111 && extract_funct3(instr) == 0b000; }
++  static bool is_branch_at(address instr)     { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100011; }
++  static bool is_ld_at(address instr)         { assert_cond(instr != NULL); return is_load_at(instr) && extract_funct3(instr) == 0b011; }
++  static bool is_load_at(address instr)       { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000011; }
++  static bool is_float_load_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000111; }
++  static bool is_auipc_at(address instr)      { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010111; }
++  static bool is_jump_at(address instr)       { assert_cond(instr != NULL); return is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr); }
++  static bool is_addi_at(address instr)       { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010011 && extract_funct3(instr) == 0b000; }
++  static bool is_addiw_at(address instr)      { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0011011 && extract_funct3(instr) == 0b000; }
++  static bool is_lui_at(address instr)        { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0110111; }
++  static bool is_slli_shift_at(address instr, uint32_t shift) {
++    assert_cond(instr != NULL);
++    return (extract_opcode(instr) == 0b0010011 && // opcode field
++            extract_funct3(instr) == 0b001 &&     // funct3 field, select the type of operation
++            Assembler::extract(((unsigned*)instr)[0], 25, 20) == shift);    // shamt field
 +  }
 +
-+  // Jumps that can reach anywhere in the code cache.
-+  // Trashes tmp.
-+  void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0);
-+  void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0);
++  static Register extract_rs1(address instr);
++  static Register extract_rs2(address instr);
++  static Register extract_rd(address instr);
++  static uint32_t extract_opcode(address instr);
++  static uint32_t extract_funct3(address instr);
 +
-+  static int far_branch_size() {
-+    if (far_branches()) {
-+      return 2 * 4;  // auipc + jalr, see far_call() & far_jump()
-+    } else {
-+      return 4;
-+    }
++  // the instruction sequence of movptr is as below:
++  //     lui
++  //     addi
++  //     slli
++  //     addi
++  //     slli
++  //     addi/jalr/load
++  static bool check_movptr_data_dependency(address instr) {
++    address lui = instr;
++    address addi1 = lui + instruction_size;
++    address slli1 = addi1 + instruction_size;
++    address addi2 = slli1 + instruction_size;
++    address slli2 = addi2 + instruction_size;
++    address last_instr = slli2 + instruction_size;
++    return extract_rs1(addi1) == extract_rd(lui) &&
++           extract_rs1(addi1) == extract_rd(addi1) &&
++           extract_rs1(slli1) == extract_rd(addi1) &&
++           extract_rs1(slli1) == extract_rd(slli1) &&
++           extract_rs1(addi2) == extract_rd(slli1) &&
++           extract_rs1(addi2) == extract_rd(addi2) &&
++           extract_rs1(slli2) == extract_rd(addi2) &&
++           extract_rs1(slli2) == extract_rd(slli2) &&
++           extract_rs1(last_instr) == extract_rd(slli2);
 +  }
 +
-+  void load_byte_map_base(Register reg);
-+
-+  void bang_stack_with_offset(int offset) {
-+    // stack grows down, caller passes positive offset
-+    assert(offset > 0, "must bang with negative offset");
-+    sub(t0, sp, offset);
-+    sd(zr, Address(t0));
++  // the instruction sequence of li64 is as below:
++  //     lui
++  //     addi
++  //     slli
++  //     addi
++  //     slli
++  //     addi
++  //     slli
++  //     addi
++  static bool check_li64_data_dependency(address instr) {
++    address lui = instr;
++    address addi1 = lui + instruction_size;
++    address slli1 = addi1 + instruction_size;
++    address addi2 = slli1 + instruction_size;
++    address slli2 = addi2 + instruction_size;
++    address addi3 = slli2 + instruction_size;
++    address slli3 = addi3 + instruction_size;
++    address addi4 = slli3 + instruction_size;
++    return extract_rs1(addi1) == extract_rd(lui) &&
++           extract_rs1(addi1) == extract_rd(addi1) &&
++           extract_rs1(slli1) == extract_rd(addi1) &&
++           extract_rs1(slli1) == extract_rd(slli1) &&
++           extract_rs1(addi2) == extract_rd(slli1) &&
++           extract_rs1(addi2) == extract_rd(addi2) &&
++           extract_rs1(slli2) == extract_rd(addi2) &&
++           extract_rs1(slli2) == extract_rd(slli2) &&
++           extract_rs1(addi3) == extract_rd(slli2) &&
++           extract_rs1(addi3) == extract_rd(addi3) &&
++           extract_rs1(slli3) == extract_rd(addi3) &&
++           extract_rs1(slli3) == extract_rd(slli3) &&
++           extract_rs1(addi4) == extract_rd(slli3) &&
++           extract_rs1(addi4) == extract_rd(addi4);
 +  }
 +
-+  void la_patchable(Register reg1, const Address &dest, int32_t &offset);
++  // the instruction sequence of li32 is as below:
++  //     lui
++  //     addiw
++  static bool check_li32_data_dependency(address instr) {
++    address lui = instr;
++    address addiw = lui + instruction_size;
 +
-+  virtual void _call_Unimplemented(address call_site) {
-+    mv(t1, call_site);
++    return extract_rs1(addiw) == extract_rd(lui) &&
++           extract_rs1(addiw) == extract_rd(addiw);
 +  }
 +
-+  #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
++  // the instruction sequence of pc-relative is as below:
++  //     auipc
++  //     jalr/addi/load/float_load
++  static bool check_pc_relative_data_dependency(address instr) {
++    address auipc = instr;
++    address last_instr = auipc + instruction_size;
 +
-+  // Frame creation and destruction shared between JITs.
-+  void build_frame(int framesize);
-+  void remove_frame(int framesize);
++    return extract_rs1(last_instr) == extract_rd(auipc);
++  }
 +
-+  void reserved_stack_check();
++  // the instruction sequence of load_label is as below:
++  //     auipc
++  //     load
++  static bool check_load_pc_relative_data_dependency(address instr) {
++    address auipc = instr;
++    address load = auipc + instruction_size;
 +
-+  void get_polling_page(Register dest, relocInfo::relocType rtype);
-+  address read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
++    return extract_rd(load) == extract_rd(auipc) &&
++           extract_rs1(load) == extract_rd(load);
++  }
 +
-+  address trampoline_call(Address entry, CodeBuffer* cbuf = NULL);
-+  address ic_call(address entry, jint method_index = 0);
++  static bool is_movptr_at(address instr);
++  static bool is_li32_at(address instr);
++  static bool is_li64_at(address instr);
++  static bool is_pc_relative_at(address branch);
++  static bool is_load_pc_relative_at(address branch);
 +
-+  void add_memory_int64(const Address dst, int64_t imm);
-+  void add_memory_int32(const Address dst, int32_t imm);
++  static bool is_call_at(address instr) {
++    if (is_jal_at(instr) || is_jalr_at(instr)) {
++      return true;
++    }
++    return false;
++  }
++  static bool is_lwu_to_zr(address instr);
 +
-+  void cmpptr(Register src1, Address src2, Label& equal);
++  inline bool is_nop();
++  inline bool is_jump_or_nop();
++  bool is_safepoint_poll();
++  bool is_sigill_zombie_not_entrant();
++  bool is_stop();
 +
-+  void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = NULL, Label* L_slow_path = NULL);
-+  void load_method_holder_cld(Register result, Register method);
-+  void load_method_holder(Register holder, Register method);
++ protected:
++  address addr_at(int offset) const    { return address(this) + offset; }
 +
-+  void compute_index(Register str1, Register trailing_zeros, Register match_mask,
-+                     Register result, Register char_tmp, Register tmp,
-+                     bool haystack_isL);
-+  void compute_match_mask(Register src, Register pattern, Register match_mask,
-+                          Register mask1, Register mask2);
++  jint int_at(int offset) const        { return *(jint*) addr_at(offset); }
++  juint uint_at(int offset) const      { return *(juint*) addr_at(offset); }
 +
-+#ifdef COMPILER2
-+  void mul_add(Register out, Register in, Register offset,
-+               Register len, Register k, Register tmp);
-+  void cad(Register dst, Register src1, Register src2, Register carry);
-+  void cadc(Register dst, Register src1, Register src2, Register carry);
-+  void adc(Register dst, Register src1, Register src2, Register carry);
-+  void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
-+                       Register src1, Register src2, Register carry);
-+  void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
-+                             Register y, Register y_idx, Register z,
-+                             Register carry, Register product,
-+                             Register idx, Register kdx);
-+  void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
-+                             Register y, Register y_idx, Register z,
-+                             Register carry, Register product,
-+                             Register idx, Register kdx);
-+  void multiply_128_x_128_loop(Register y, Register z,
-+                               Register carry, Register carry2,
-+                               Register idx, Register jdx,
-+                               Register yz_idx1, Register yz_idx2,
-+                               Register tmp, Register tmp3, Register tmp4,
-+                               Register tmp6, Register product_hi);
-+  void multiply_to_len(Register x, Register xlen, Register y, Register ylen,
-+                       Register z, Register zlen,
-+                       Register tmp1, Register tmp2, Register tmp3, Register tmp4,
-+                       Register tmp5, Register tmp6, Register product_hi);
-+#endif
++  address ptr_at(int offset) const     { return *(address*) addr_at(offset); }
 +
-+  void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
-+  void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
++  oop  oop_at (int offset) const       { return *(oop*) addr_at(offset); }
 +
-+  void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1);
 +
-+  void zero_words(Register base, u_int64_t cnt);
-+  address zero_words(Register ptr, Register cnt);
-+  void fill_words(Register base, Register cnt, Register value);
-+  void zero_memory(Register addr, Register len, Register tmp);
++  void set_int_at(int offset, jint  i)        { *(jint*)addr_at(offset) = i; }
++  void set_uint_at(int offset, jint  i)       { *(juint*)addr_at(offset) = i; }
++  void set_ptr_at (int offset, address  ptr)  { *(address*) addr_at(offset) = ptr; }
++  void set_oop_at (int offset, oop  o)        { *(oop*) addr_at(offset) = o; }
 +
-+  // shift left by shamt and add
-+  void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt);
++ public:
 +
-+  // Here the float instructions with safe deal with some exceptions.
-+  // e.g. convert from NaN, +Inf, -Inf to int, float, double
-+  // will trigger exception, we need to deal with these situations
-+  // to get correct results.
-+  void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0);
-+  void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0);
-+  void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
-+  void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
++  inline friend NativeInstruction* nativeInstruction_at(address addr);
 +
-+  // vector load/store unit-stride instructions
-+  void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
-+    switch (sew) {
-+      case Assembler::e64:
-+        vle64_v(vd, base, vm);
-+        break;
-+      case Assembler::e32:
-+        vle32_v(vd, base, vm);
-+        break;
-+      case Assembler::e16:
-+        vle16_v(vd, base, vm);
-+        break;
-+      case Assembler::e8: // fall through
-+      default:
-+        vle8_v(vd, base, vm);
-+        break;
-+    }
++  static bool maybe_cpool_ref(address instr) {
++    return is_auipc_at(instr);
 +  }
 +
-+  void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
-+    switch (sew) {
-+      case Assembler::e64:
-+        vse64_v(store_data, base, vm);
-+        break;
-+      case Assembler::e32:
-+        vse32_v(store_data, base, vm);
-+        break;
-+      case Assembler::e16:
-+        vse16_v(store_data, base, vm);
-+        break;
-+      case Assembler::e8: // fall through
-+      default:
-+        vse8_v(store_data, base, vm);
-+        break;
-+    }
++  bool is_membar() {
++    return (uint_at(0) & 0x7f) == 0b1111 && extract_funct3(addr_at(0)) == 0;
 +  }
++};
 +
-+  static const int zero_words_block_size;
-+
-+  void cast_primitive_type(BasicType type, Register Rt) {
-+    switch (type) {
-+      case T_BOOLEAN:
-+        sltu(Rt, zr, Rt);
-+        break;
-+      case T_CHAR   :
-+        zero_extend(Rt, Rt, 16);
-+        break;
-+      case T_BYTE   :
-+        sign_extend(Rt, Rt, 8);
-+        break;
-+      case T_SHORT  :
-+        sign_extend(Rt, Rt, 16);
-+        break;
-+      case T_INT    :
-+        addw(Rt, Rt, zr);
-+        break;
-+      case T_LONG   : /* nothing to do */        break;
-+      case T_VOID   : /* nothing to do */        break;
-+      case T_FLOAT  : /* nothing to do */        break;
-+      case T_DOUBLE : /* nothing to do */        break;
-+      default: ShouldNotReachHere();
-+    }
-+  }
++inline NativeInstruction* nativeInstruction_at(address addr) {
++  return (NativeInstruction*)addr;
++}
 +
-+  // float cmp with unordered_result
-+  void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
-+  void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
++// The natural type of an RISCV instruction is uint32_t
++inline NativeInstruction* nativeInstruction_at(uint32_t *addr) {
++  return (NativeInstruction*)addr;
++}
 +
-+  // Zero/Sign-extend
-+  void zero_extend(Register dst, Register src, int bits);
-+  void sign_extend(Register dst, Register src, int bits);
++inline NativeCall* nativeCall_at(address addr);
++// The NativeCall is an abstraction for accessing/manipulating native
++// call instructions (used to manipulate inline caches, primitive &
++// DSO calls, etc.).
 +
-+  // compare src1 and src2 and get -1/0/1 in dst.
-+  // if [src1 > src2], dst = 1;
-+  // if [src1 == src2], dst = 0;
-+  // if [src1 < src2], dst = -1;
-+  void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0);
++class NativeCall: public NativeInstruction {
++ public:
++  enum RISCV_specific_constants {
++    instruction_size            =    4,
++    instruction_offset          =    0,
++    displacement_offset         =    0,
++    return_address_offset       =    4
++  };
 +
-+  int push_fp(unsigned int bitset, Register stack);
-+  int pop_fp(unsigned int bitset, Register stack);
++  address instruction_address() const       { return addr_at(instruction_offset); }
++  address next_instruction_address() const  { return addr_at(return_address_offset); }
++  address return_address() const            { return addr_at(return_address_offset); }
++  address destination() const;
 +
-+  int push_vp(unsigned int bitset, Register stack);
-+  int pop_vp(unsigned int bitset, Register stack);
++  void set_destination(address dest) {
++    assert(is_jal(), "Should be jal instruction!");
++    intptr_t offset = (intptr_t)(dest - instruction_address());
++    assert((offset & 0x1) == 0, "bad alignment");
++    assert(is_imm_in_range(offset, 20, 1), "encoding constraint");
++    unsigned int insn = 0b1101111; // jal
++    address pInsn = (address)(&insn);
++    Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1);
++    Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff);
++    Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1);
++    Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff);
++    Assembler::patch(pInsn, 11, 7, ra->encoding()); // Rd must be x1, need ra
++    set_int_at(displacement_offset, insn);
++  }
 +
-+  // vext
-+  void vmnot_m(VectorRegister vd, VectorRegister vs);
-+  void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked);
-+  void vfneg_v(VectorRegister vd, VectorRegister vs);
++  void verify_alignment() {} // do nothing on riscv
++  void verify();
++  void print();
 +
-+private:
++  // Creation
++  inline friend NativeCall* nativeCall_at(address addr);
++  inline friend NativeCall* nativeCall_before(address return_address);
 +
-+#ifdef ASSERT
-+  // Template short-hand support to clean-up after a failed call to trampoline
-+  // call generation (see trampoline_call() below), when a set of Labels must
-+  // be reset (before returning).
-+  template<typename Label, typename... More>
-+  void reset_labels(Label& lbl, More&... more) {
-+    lbl.reset(); reset_labels(more...);
-+  }
-+  template<typename Label>
-+  void reset_labels(Label& lbl) {
-+    lbl.reset();
++  static bool is_call_before(address return_address) {
++    return is_call_at(return_address - NativeCall::return_address_offset);
 +  }
-+#endif
-+  void repne_scan(Register addr, Register value, Register count, Register tmp);
 +
-+  // Return true if an address is within the 48-bit RISCV64 address space.
-+  bool is_valid_riscv64_address(address addr) {
-+    return ((uintptr_t)addr >> 48) == 0;
-+  }
++  // MT-safe patching of a call instruction.
++  static void insert(address code_pos, address entry);
 +
-+  void ld_constant(Register dest, const Address &const_addr) {
-+    if (NearCpool) {
-+      ld(dest, const_addr);
-+    } else {
-+      int32_t offset = 0;
-+      la_patchable(dest, InternalAddress(const_addr.target()), offset);
-+      ld(dest, Address(dest, offset));
-+    }
-+  }
++  static void replace_mt_safe(address instr_addr, address code_buffer);
 +
-+  int bitset_to_regs(unsigned int bitset, unsigned char* regs);
-+  Address add_memory_helper(const Address dst);
++  // Similar to replace_mt_safe, but just changes the destination.  The
++  // important thing is that free-running threads are able to execute
++  // this call instruction at all times.  If the call is an immediate BL
++  // instruction we can simply rely on atomicity of 32-bit writes to
++  // make sure other threads will see no intermediate states.
 +
-+  void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire);
-+  void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release);
++  // We cannot rely on locks here, since the free-running threads must run at
++  // full speed.
++  //
++  // Used in the runtime linkage of calls; see class CompiledIC.
++  // (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.)
++
++  // The parameter assert_lock disables the assertion during code generation.
++  void set_destination_mt_safe(address dest, bool assert_lock = true);
 +
-+  // Check the current thread doesn't need a cross modify fence.
-+  void verify_cross_modify_fence_not_required() PRODUCT_RETURN;
++  address get_trampoline();
 +};
 +
-+#ifdef ASSERT
-+inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
-+#endif
++inline NativeCall* nativeCall_at(address addr) {
++  assert_cond(addr != NULL);
++  NativeCall* call = (NativeCall*)(addr - NativeCall::instruction_offset);
++  DEBUG_ONLY(call->verify());
++  return call;
++}
 +
-+/**
-+ * class SkipIfEqual:
-+ *
-+ * Instantiating this class will result in assembly code being output that will
-+ * jump around any code emitted between the creation of the instance and it's
-+ * automatic destruction at the end of a scope block, depending on the value of
-+ * the flag passed to the constructor, which will be checked at run-time.
-+ */
-+class SkipIfEqual {
-+ private:
-+  MacroAssembler* _masm;
-+  Label _label;
++inline NativeCall* nativeCall_before(address return_address) {
++  assert_cond(return_address != NULL);
++  NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset);
++  DEBUG_ONLY(call->verify());
++  return call;
++}
 +
++// An interface for accessing/manipulating native mov reg, imm instructions.
++// (used to manipulate inlined 64-bit data calls, etc.)
++class NativeMovConstReg: public NativeInstruction {
 + public:
-+   SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
-+   ~SkipIfEqual();
++  enum RISCV_specific_constants {
++    movptr_instruction_size             =    6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, addi.  See movptr().
++    movptr_with_offset_instruction_size =    5 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli. See movptr_with_offset().
++    load_pc_relative_instruction_size   =    2 * NativeInstruction::instruction_size, // auipc, ld
++    instruction_offset                  =    0,
++    displacement_offset                 =    0
++  };
++
++  address instruction_address() const       { return addr_at(instruction_offset); }
++  address next_instruction_address() const  {
++    // if the instruction at 5 * instruction_size is addi,
++    // it means a lui + addi + slli + addi + slli + addi instruction sequence,
++    // and the next instruction address should be addr_at(6 * instruction_size).
++    // However, when the instruction at 5 * instruction_size isn't addi,
++    // the next instruction address should be addr_at(5 * instruction_size)
++    if (nativeInstruction_at(instruction_address())->is_movptr()) {
++      if (is_addi_at(addr_at(movptr_with_offset_instruction_size))) {
++        // Assume: lui, addi, slli, addi, slli, addi
++        return addr_at(movptr_instruction_size);
++      } else {
++        // Assume: lui, addi, slli, addi, slli
++        return addr_at(movptr_with_offset_instruction_size);
++      }
++    } else if (is_load_pc_relative_at(instruction_address())) {
++      // Assume: auipc, ld
++      return addr_at(load_pc_relative_instruction_size);
++    }
++    guarantee(false, "Unknown instruction in NativeMovConstReg");
++    return NULL;
++  }
++
++  intptr_t data() const;
++  void set_data(intptr_t x);
++
++  void flush() {
++    if (!maybe_cpool_ref(instruction_address())) {
++      ICache::invalidate_range(instruction_address(), movptr_instruction_size);
++    }
++  }
++
++  void verify();
++  void print();
++
++  // Creation
++  inline friend NativeMovConstReg* nativeMovConstReg_at(address addr);
++  inline friend NativeMovConstReg* nativeMovConstReg_before(address addr);
 +};
 +
-+#endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
-new file mode 100644
-index 00000000000..ef968ccd96d
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
-@@ -0,0 +1,31 @@
-+/*
-+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++inline NativeMovConstReg* nativeMovConstReg_at(address addr) {
++  assert_cond(addr != NULL);
++  NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_offset);
++  DEBUG_ONLY(test->verify());
++  return test;
++}
 +
-+#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
-+#define CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
++inline NativeMovConstReg* nativeMovConstReg_before(address addr) {
++  assert_cond(addr != NULL);
++  NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset);
++  DEBUG_ONLY(test->verify());
++  return test;
++}
 +
-+// Still empty.
++// RISCV should not use C1 runtime patching, but still implement
++// NativeMovRegMem to keep some compilers happy.
++class NativeMovRegMem: public NativeInstruction {
++ public:
++  enum RISCV_specific_constants {
++    instruction_size            =    NativeInstruction::instruction_size,
++    instruction_offset          =    0,
++    data_offset                 =    0,
++    next_instruction_offset     =    NativeInstruction::instruction_size
++  };
 +
-+#endif // CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
-diff --git a/src/hotspot/cpu/riscv/matcher_riscv.hpp b/src/hotspot/cpu/riscv/matcher_riscv.hpp
-new file mode 100644
-index 00000000000..23a75d20502
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/matcher_riscv.hpp
-@@ -0,0 +1,169 @@
-+/*
-+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2021, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  int instruction_start() const { return instruction_offset; }
 +
-+#ifndef CPU_RISCV_MATCHER_RISCV_HPP
-+#define CPU_RISCV_MATCHER_RISCV_HPP
++  address instruction_address() const { return addr_at(instruction_offset); }
 +
-+  // Defined within class Matcher
++  int num_bytes_to_end_of_patch() const { return instruction_offset + instruction_size; }
 +
-+  // false => size gets scaled to BytesPerLong, ok.
-+  static const bool init_array_count_is_in_bytes = false;
++  int offset() const;
 +
-+  // Whether this platform implements the scalable vector feature
-+  static const bool implements_scalable_vector = true;
++  void set_offset(int x);
 +
-+  static const bool supports_scalable_vector() {
-+    return UseRVV;
++  void add_offset_in_bytes(int add_offset) {
++    set_offset(offset() + add_offset);
 +  }
 +
-+  // riscv supports misaligned vectors store/load.
-+  static constexpr bool misaligned_vectors_ok() {
-+    return true;
-+  }
++  void verify();
++  void print();
 +
-+  // Whether code generation need accurate ConvI2L types.
-+  static const bool convi2l_type_required = false;
++ private:
++  inline friend NativeMovRegMem* nativeMovRegMem_at(address addr);
++};
++
++inline NativeMovRegMem* nativeMovRegMem_at(address addr) {
++  NativeMovRegMem* test = (NativeMovRegMem*)(addr - NativeMovRegMem::instruction_offset);
++  DEBUG_ONLY(test->verify());
++  return test;
++}
++
++class NativeJump: public NativeInstruction {
++ public:
++  enum RISCV_specific_constants {
++    instruction_size            =    NativeInstruction::instruction_size,
++    instruction_offset          =    0,
++    data_offset                 =    0,
++    next_instruction_offset     =    NativeInstruction::instruction_size
++  };
++
++  address instruction_address() const       { return addr_at(instruction_offset); }
++  address next_instruction_address() const  { return addr_at(instruction_size); }
++  address jump_destination() const;
++  void set_jump_destination(address dest);
 +
-+  // Does the CPU require late expand (see block.cpp for description of late expand)?
-+  static const bool require_postalloc_expand = false;
++  // Creation
++  inline friend NativeJump* nativeJump_at(address address);
 +
-+  // Do we need to mask the count passed to shift instructions or does
-+  // the cpu only look at the lower 5/6 bits anyway?
-+  static const bool need_masked_shift_count = false;
++  void verify();
 +
-+  // No support for generic vector operands.
-+  static const bool supports_generic_vector_operands = false;
++  // Insertion of native jump instruction
++  static void insert(address code_pos, address entry);
++  // MT-safe insertion of native jump at verified method entry
++  static void check_verified_entry_alignment(address entry, address verified_entry);
++  static void patch_verified_entry(address entry, address verified_entry, address dest);
++};
 +
-+  static constexpr bool isSimpleConstant64(jlong value) {
-+    // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
-+    // Probably always true, even if a temp register is required.
-+    return true;
-+  }
++inline NativeJump* nativeJump_at(address addr) {
++  NativeJump* jump = (NativeJump*)(addr - NativeJump::instruction_offset);
++  DEBUG_ONLY(jump->verify());
++  return jump;
++}
 +
-+  // Use conditional move (CMOVL)
-+  static constexpr int long_cmove_cost() {
-+    // long cmoves are no more expensive than int cmoves
-+    return 0;
-+  }
++class NativeGeneralJump: public NativeJump {
++public:
++  enum RISCV_specific_constants {
++    instruction_size            =    6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, jalr
++    instruction_offset          =    0,
++    data_offset                 =    0,
++    next_instruction_offset     =    6 * NativeInstruction::instruction_size  // lui, addi, slli, addi, slli, jalr
++  };
 +
-+  static constexpr int float_cmove_cost() {
-+    // float cmoves are no more expensive than int cmoves
-+    return 0;
-+  }
++  address jump_destination() const;
 +
-+  // This affects two different things:
-+  //  - how Decode nodes are matched
-+  //  - how ImplicitNullCheck opportunities are recognized
-+  // If true, the matcher will try to remove all Decodes and match them
-+  // (as operands) into nodes. NullChecks are not prepared to deal with
-+  // Decodes by final_graph_reshaping().
-+  // If false, final_graph_reshaping() forces the decode behind the Cmp
-+  // for a NullCheck. The matcher matches the Decode node into a register.
-+  // Implicit_null_check optimization moves the Decode along with the
-+  // memory operation back up before the NullCheck.
-+  static bool narrow_oop_use_complex_address() {
-+    return CompressedOops::shift() == 0;
-+  }
++  static void insert_unconditional(address code_pos, address entry);
++  static void replace_mt_safe(address instr_addr, address code_buffer);
++};
 +
-+  static bool narrow_klass_use_complex_address() {
-+    return false;
-+  }
++inline NativeGeneralJump* nativeGeneralJump_at(address addr) {
++  assert_cond(addr != NULL);
++  NativeGeneralJump* jump = (NativeGeneralJump*)(addr);
++  debug_only(jump->verify();)
++  return jump;
++}
 +
-+  static bool const_oop_prefer_decode() {
-+    // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
-+    return CompressedOops::base() == NULL;
-+  }
++class NativeIllegalInstruction: public NativeInstruction {
++ public:
++  // Insert illegal opcode as specific address
++  static void insert(address code_pos);
++};
 +
-+  static bool const_klass_prefer_decode() {
-+    // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
-+    return CompressedKlassPointers::base() == NULL;
-+  }
++inline bool NativeInstruction::is_nop()         {
++  uint32_t insn = *(uint32_t*)addr_at(0);
++  return insn == 0x13;
++}
 +
-+  // Is it better to copy float constants, or load them directly from
-+  // memory?  Intel can load a float constant from a direct address,
-+  // requiring no extra registers.  Most RISCs will have to materialize
-+  // an address into a register first, so they would do better to copy
-+  // the constant from stack.
-+  static const bool rematerialize_float_constants = false;
++inline bool NativeInstruction::is_jump_or_nop() {
++  return is_nop() || is_jump();
++}
 +
-+  // If CPU can load and store mis-aligned doubles directly then no
-+  // fixup is needed.  Else we split the double into 2 integer pieces
-+  // and move it piece-by-piece.  Only happens when passing doubles into
-+  // C code as the Java calling convention forces doubles to be aligned.
-+  static const bool misaligned_doubles_ok = true;
++// Call trampoline stubs.
++class NativeCallTrampolineStub : public NativeInstruction {
++ public:
 +
-+  // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
-+  static const bool strict_fp_requires_explicit_rounding = false;
++  enum RISCV_specific_constants {
++    // Refer to function emit_trampoline_stub.
++    instruction_size = 3 * NativeInstruction::instruction_size + wordSize, // auipc + ld + jr + target address
++    data_offset      = 3 * NativeInstruction::instruction_size,            // auipc + ld + jr
++  };
 +
-+  // Are floats converted to double when stored to stack during
-+  // deoptimization?
-+  static constexpr bool float_in_double() { return false; }
++  address destination(nmethod *nm = NULL) const;
++  void set_destination(address new_destination);
++  ptrdiff_t destination_offset() const;
++};
 +
-+  // Do ints take an entire long register or just half?
-+  // The relevant question is how the int is callee-saved:
-+  // the whole long is written but de-opt'ing will have to extract
-+  // the relevant 32 bits.
-+  static const bool int_in_long = true;
++inline bool is_NativeCallTrampolineStub_at(address addr) {
++  // Ensure that the stub is exactly
++  //      ld   t0, L--->auipc + ld
++  //      jr   t0
++  // L:
 +
-+  // Does the CPU supports vector variable shift instructions?
-+  static constexpr bool supports_vector_variable_shifts(void) {
-+    return false;
++  // judge inst + register + imm
++  // 1). check the instructions: auipc + ld + jalr
++  // 2). check if auipc[11:7] == t0 and ld[11:7] == t0 and ld[19:15] == t0 && jr[19:15] == t0
++  // 3). check if the offset in ld[31:20] equals the data_offset
++  assert_cond(addr != NULL);
++  const int instr_size = NativeInstruction::instruction_size;
++  if (NativeInstruction::is_auipc_at(addr) &&
++      NativeInstruction::is_ld_at(addr + instr_size) &&
++      NativeInstruction::is_jalr_at(addr + 2 * instr_size) &&
++      (NativeInstruction::extract_rd(addr)                    == x5) &&
++      (NativeInstruction::extract_rd(addr + instr_size)       == x5) &&
++      (NativeInstruction::extract_rs1(addr + instr_size)      == x5) &&
++      (NativeInstruction::extract_rs1(addr + 2 * instr_size)  == x5) &&
++      (Assembler::extract(((unsigned*)addr)[1], 31, 20) == NativeCallTrampolineStub::data_offset)) {
++    return true;
 +  }
++  return false;
++}
 +
-+  // Does the CPU supports vector variable rotate instructions?
-+  static constexpr bool supports_vector_variable_rotates(void) {
-+    return false;
-+  }
++inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) {
++  assert_cond(addr != NULL);
++  assert(is_NativeCallTrampolineStub_at(addr), "no call trampoline found");
++  return (NativeCallTrampolineStub*)addr;
++}
 +
-+  // Does the CPU supports vector constant rotate instructions?
-+  static constexpr bool supports_vector_constant_rotates(int shift) {
-+    return false;
-+  }
++class NativeMembar : public NativeInstruction {
++public:
++  uint32_t get_kind();
++  void set_kind(uint32_t order_kind);
++};
 +
-+  // Does the CPU supports vector unsigned comparison instructions?
-+  static const bool supports_vector_comparison_unsigned(int vlen, BasicType bt) {
-+    return false;
-+  }
++inline NativeMembar *NativeMembar_at(address addr) {
++  assert_cond(addr != NULL);
++  assert(nativeInstruction_at(addr)->is_membar(), "no membar found");
++  return (NativeMembar*)addr;
++}
 +
-+  // Some microarchitectures have mask registers used on vectors
-+  static const bool has_predicated_vectors(void) {
-+    return false;
-+  }
++#endif // CPU_RISCV_NATIVEINST_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.hpp b/src/hotspot/cpu/riscv/registerMap_riscv.hpp
+new file mode 100644
+index 0000000000..fef8ca9b64
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/registerMap_riscv.hpp
+@@ -0,0 +1,42 @@
++/*
++ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  // true means we have fast l2f convers
-+  // false means that conversion is done by runtime call
-+  static constexpr bool convL2FSupported(void) {
-+      return true;
-+  }
++#ifndef CPU_RISCV_REGISTERMAP_RISCV_HPP
++#define CPU_RISCV_REGISTERMAP_RISCV_HPP
 +
-+  // Implements a variant of EncodeISOArrayNode that encode ASCII only
-+  static const bool supports_encode_ascii_array = false;
++// machine-dependent implemention for register maps
++  friend class frame;
 +
-+  // Returns pre-selection estimated size of a vector operation.
-+  static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
-+    return 0;
-+  }
++ private:
++  // This is the hook for finding a register in an "well-known" location,
++  // such as a register block of a predetermined format.
++  address pd_location(VMReg reg) const { return NULL; }
 +
-+#endif // CPU_RISCV_MATCHER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
++  // no PD state to clear or copy:
++  void pd_clear() {}
++  void pd_initialize() {}
++  void pd_initialize_from(const RegisterMap* map) {}
++
++#endif // CPU_RISCV_REGISTERMAP_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/register_definitions_riscv.cpp b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp
 new file mode 100644
-index 00000000000..1f7c0c87c21
+index 0000000000..583f67573c
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-@@ -0,0 +1,461 @@
++++ b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp
+@@ -0,0 +1,192 @@
 +/*
-+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -27648,449 +26846,180 @@ index 00000000000..1f7c0c87c21
 + */
 +
 +#include "precompiled.hpp"
-+#include "asm/macroAssembler.hpp"
-+#include "classfile/javaClasses.inline.hpp"
-+#include "classfile/vmClasses.hpp"
-+#include "interpreter/interpreter.hpp"
-+#include "interpreter/interpreterRuntime.hpp"
-+#include "memory/allocation.inline.hpp"
-+#include "prims/jvmtiExport.hpp"
-+#include "prims/methodHandles.hpp"
-+#include "runtime/flags/flagSetting.hpp"
-+#include "runtime/frame.inline.hpp"
-+#include "runtime/stubRoutines.hpp"
-+
-+#define __ _masm->
-+
-+#ifdef PRODUCT
-+#define BLOCK_COMMENT(str) /* nothing */
-+#else
-+#define BLOCK_COMMENT(str) __ block_comment(str)
-+#endif
++#include "asm/assembler.hpp"
++#include "asm/register.hpp"
++#include "interp_masm_riscv.hpp"
++#include "register_riscv.hpp"
 +
-+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++REGISTER_DEFINITION(Register, noreg);
 +
-+void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
-+  assert_cond(_masm != NULL);
-+  if (VerifyMethodHandles) {
-+    verify_klass(_masm, klass_reg, VM_CLASS_ID(java_lang_Class),
-+                 "MH argument is a Class");
-+  }
-+  __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset()));
-+}
++REGISTER_DEFINITION(Register, x0);
++REGISTER_DEFINITION(Register, x1);
++REGISTER_DEFINITION(Register, x2);
++REGISTER_DEFINITION(Register, x3);
++REGISTER_DEFINITION(Register, x4);
++REGISTER_DEFINITION(Register, x5);
++REGISTER_DEFINITION(Register, x6);
++REGISTER_DEFINITION(Register, x7);
++REGISTER_DEFINITION(Register, x8);
++REGISTER_DEFINITION(Register, x9);
++REGISTER_DEFINITION(Register, x10);
++REGISTER_DEFINITION(Register, x11);
++REGISTER_DEFINITION(Register, x12);
++REGISTER_DEFINITION(Register, x13);
++REGISTER_DEFINITION(Register, x14);
++REGISTER_DEFINITION(Register, x15);
++REGISTER_DEFINITION(Register, x16);
++REGISTER_DEFINITION(Register, x17);
++REGISTER_DEFINITION(Register, x18);
++REGISTER_DEFINITION(Register, x19);
++REGISTER_DEFINITION(Register, x20);
++REGISTER_DEFINITION(Register, x21);
++REGISTER_DEFINITION(Register, x22);
++REGISTER_DEFINITION(Register, x23);
++REGISTER_DEFINITION(Register, x24);
++REGISTER_DEFINITION(Register, x25);
++REGISTER_DEFINITION(Register, x26);
++REGISTER_DEFINITION(Register, x27);
++REGISTER_DEFINITION(Register, x28);
++REGISTER_DEFINITION(Register, x29);
++REGISTER_DEFINITION(Register, x30);
++REGISTER_DEFINITION(Register, x31);
 +
-+#ifdef ASSERT
-+static int check_nonzero(const char* xname, int x) {
-+  assert(x != 0, "%s should be nonzero", xname);
-+  return x;
-+}
-+#define NONZERO(x) check_nonzero(#x, x)
-+#else //ASSERT
-+#define NONZERO(x) (x)
-+#endif //PRODUCT
++REGISTER_DEFINITION(FloatRegister, fnoreg);
 +
-+#ifdef ASSERT
-+void MethodHandles::verify_klass(MacroAssembler* _masm,
-+                                 Register obj, vmClassID klass_id,
-+                                 const char* error_message) {
-+  assert_cond(_masm != NULL);
-+  InstanceKlass** klass_addr = vmClasses::klass_addr_at(klass_id);
-+  Klass* klass = vmClasses::klass_at(klass_id);
-+  Register temp = t1;
-+  Register temp2 = t0; // used by MacroAssembler::cmpptr
-+  Label L_ok, L_bad;
-+  BLOCK_COMMENT("verify_klass {");
-+  __ verify_oop(obj);
-+  __ beqz(obj, L_bad);
-+  __ push_reg(RegSet::of(temp, temp2), sp);
-+  __ load_klass(temp, obj);
-+  __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok);
-+  intptr_t super_check_offset = klass->super_check_offset();
-+  __ ld(temp, Address(temp, super_check_offset));
-+  __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok);
-+  __ pop_reg(RegSet::of(temp, temp2), sp);
-+  __ bind(L_bad);
-+  __ stop(error_message);
-+  __ BIND(L_ok);
-+  __ pop_reg(RegSet::of(temp, temp2), sp);
-+  BLOCK_COMMENT("} verify_klass");
-+}
-+
-+void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {}
-+
-+#endif //ASSERT
-+
-+void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
-+                                            bool for_compiler_entry) {
-+  assert_cond(_masm != NULL);
-+  assert(method == xmethod, "interpreter calling convention");
-+  Label L_no_such_method;
-+  __ beqz(xmethod, L_no_such_method);
-+  __ verify_method_ptr(method);
-+
-+  if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
-+    Label run_compiled_code;
-+    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
-+    // compiled code in threads for which the event is enabled.  Check here for
-+    // interp_only_mode if these events CAN be enabled.
-+
-+    __ lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset()));
-+    __ beqz(t0, run_compiled_code);
-+    __ ld(t0, Address(method, Method::interpreter_entry_offset()));
-+    __ jr(t0);
-+    __ BIND(run_compiled_code);
-+  }
-+
-+  const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() :
-+                                                     Method::from_interpreted_offset();
-+  __ ld(t0,Address(method, entry_offset));
-+  __ jr(t0);
-+  __ bind(L_no_such_method);
-+  __ far_jump(RuntimeAddress(StubRoutines::throw_AbstractMethodError_entry()));
-+}
-+
-+void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
-+                                        Register recv, Register method_temp,
-+                                        Register temp2,
-+                                        bool for_compiler_entry) {
-+  assert_cond(_masm != NULL);
-+  BLOCK_COMMENT("jump_to_lambda_form {");
-+  // This is the initial entry point of a lazy method handle.
-+  // After type checking, it picks up the invoker from the LambdaForm.
-+  assert_different_registers(recv, method_temp, temp2);
-+  assert(recv != noreg, "required register");
-+  assert(method_temp == xmethod, "required register for loading method");
-+
-+  // Load the invoker, as MH -> MH.form -> LF.vmentry
-+  __ verify_oop(recv);
-+  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset())), temp2);
-+  __ verify_oop(method_temp);
-+  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset())), temp2);
-+  __ verify_oop(method_temp);
-+  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset())), temp2);
-+  __ verify_oop(method_temp);
-+  __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())), noreg, noreg);
-+
-+  if (VerifyMethodHandles && !for_compiler_entry) {
-+    // make sure recv is already on stack
-+    __ ld(temp2, Address(method_temp, Method::const_offset()));
-+    __ load_sized_value(temp2,
-+                        Address(temp2, ConstMethod::size_of_parameters_offset()),
-+                        sizeof(u2), /*is_signed*/ false);
-+    Label L;
-+    __ ld(t0, __ argument_address(temp2, -1));
-+    __ beq(recv, t0, L);
-+    __ ld(x10, __ argument_address(temp2, -1));
-+    __ ebreak();
-+    __ BIND(L);
-+  }
-+
-+  jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry);
-+  BLOCK_COMMENT("} jump_to_lambda_form");
-+}
-+
-+// Code generation
-+address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
-+                                                                vmIntrinsics::ID iid) {
-+  assert_cond(_masm != NULL);
-+  const bool not_for_compiler_entry = false;  // this is the interpreter entry
-+  assert(is_signature_polymorphic(iid), "expected invoke iid");
-+  if (iid == vmIntrinsics::_invokeGeneric ||
-+      iid == vmIntrinsics::_compiledLambdaForm) {
-+    // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
-+    // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
-+    // They all allow an appendix argument.
-+    __ ebreak();           // empty stubs make SG sick
-+    return NULL;
-+  }
-+
-+  // No need in interpreter entry for linkToNative for now.
-+  // Interpreter calls compiled entry through i2c.
-+  if (iid == vmIntrinsics::_linkToNative) {
-+    __ ebreak();
-+    return NULL;
-+  }
-+
-+  // x30: sender SP (must preserve; see prepare_to_jump_from_interpreted)
-+  // xmethod: Method*
-+  // x13: argument locator (parameter slot count, added to sp)
-+  // x11: used as temp to hold mh or receiver
-+  // x10, x29: garbage temps, blown away
-+  Register argp   = x13;   // argument list ptr, live on error paths
-+  Register mh     = x11;   // MH receiver; dies quickly and is recycled
-+
-+  // here's where control starts out:
-+  __ align(CodeEntryAlignment);
-+  address entry_point = __ pc();
-+
-+  if (VerifyMethodHandles) {
-+    assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
-+
-+    Label L;
-+    BLOCK_COMMENT("verify_intrinsic_id {");
-+    __ lhu(t0, Address(xmethod, Method::intrinsic_id_offset_in_bytes()));
-+    __ mv(t1, (int) iid);
-+    __ beq(t0, t1, L);
-+    if (iid == vmIntrinsics::_linkToVirtual ||
-+        iid == vmIntrinsics::_linkToSpecial) {
-+      // could do this for all kinds, but would explode assembly code size
-+      trace_method_handle(_masm, "bad Method*::intrinsic_id");
-+    }
-+    __ ebreak();
-+    __ bind(L);
-+    BLOCK_COMMENT("} verify_intrinsic_id");
-+  }
-+
-+  // First task:  Find out how big the argument list is.
-+  Address x13_first_arg_addr;
-+  int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
-+  assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
-+  if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
-+    __ ld(argp, Address(xmethod, Method::const_offset()));
-+    __ load_sized_value(argp,
-+                        Address(argp, ConstMethod::size_of_parameters_offset()),
-+                        sizeof(u2), /*is_signed*/ false);
-+    x13_first_arg_addr = __ argument_address(argp, -1);
-+  } else {
-+    DEBUG_ONLY(argp = noreg);
-+  }
-+
-+  if (!is_signature_polymorphic_static(iid)) {
-+    __ ld(mh, x13_first_arg_addr);
-+    DEBUG_ONLY(argp = noreg);
-+  }
-+
-+  // x13_first_arg_addr is live!
-+
-+  trace_method_handle_interpreter_entry(_masm, iid);
-+  if (iid == vmIntrinsics::_invokeBasic) {
-+    generate_method_handle_dispatch(_masm, iid, mh, noreg, not_for_compiler_entry);
-+  } else {
-+    // Adjust argument list by popping the trailing MemberName argument.
-+    Register recv = noreg;
-+    if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
-+      // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
-+      __ ld(recv = x12, x13_first_arg_addr);
-+    }
-+    DEBUG_ONLY(argp = noreg);
-+    Register xmember = xmethod;  // MemberName ptr; incoming method ptr is dead now
-+    __ pop_reg(xmember);             // extract last argument
-+    generate_method_handle_dispatch(_masm, iid, recv, xmember, not_for_compiler_entry);
-+  }
-+
-+  return entry_point;
-+}
-+
-+
-+void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
-+                                                    vmIntrinsics::ID iid,
-+                                                    Register receiver_reg,
-+                                                    Register member_reg,
-+                                                    bool for_compiler_entry) {
-+  assert_cond(_masm != NULL);
-+  assert(is_signature_polymorphic(iid), "expected invoke iid");
-+  // temps used in this code are not used in *either* compiled or interpreted calling sequences
-+  Register temp1 = x7;
-+  Register temp2 = x28;
-+  Register temp3 = x29;  // x30 is live by this point: it contains the sender SP
-+  if (for_compiler_entry) {
-+    assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment");
-+    assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
-+    assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
-+    assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
-+  }
-+
-+  assert_different_registers(temp1, temp2, temp3, receiver_reg);
-+  assert_different_registers(temp1, temp2, temp3, member_reg);
-+
-+  if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) {
-+    if (iid == vmIntrinsics::_linkToNative) {
-+      assert(for_compiler_entry, "only compiler entry is supported");
-+    }
-+    // indirect through MH.form.vmentry.vmtarget
-+    jump_to_lambda_form(_masm, receiver_reg, xmethod, temp1, for_compiler_entry);
-+  } else {
-+    // The method is a member invoker used by direct method handles.
-+    if (VerifyMethodHandles) {
-+      // make sure the trailing argument really is a MemberName (caller responsibility)
-+      verify_klass(_masm, member_reg, VM_CLASS_ID(java_lang_invoke_MemberName),
-+                   "MemberName required for invokeVirtual etc.");
-+    }
-+
-+    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset()));
-+    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset()));
-+    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset()));
-+    Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset()));
-+
-+    Register temp1_recv_klass = temp1;
-+    if (iid != vmIntrinsics::_linkToStatic) {
-+      __ verify_oop(receiver_reg);
-+      if (iid == vmIntrinsics::_linkToSpecial) {
-+        // Don't actually load the klass; just null-check the receiver.
-+        __ null_check(receiver_reg);
-+      } else {
-+        // load receiver klass itself
-+        __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
-+        __ load_klass(temp1_recv_klass, receiver_reg);
-+        __ verify_klass_ptr(temp1_recv_klass);
-+      }
-+      BLOCK_COMMENT("check_receiver {");
-+      // The receiver for the MemberName must be in receiver_reg.
-+      // Check the receiver against the MemberName.clazz
-+      if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
-+        // Did not load it above...
-+        __ load_klass(temp1_recv_klass, receiver_reg);
-+        __ verify_klass_ptr(temp1_recv_klass);
-+      }
-+      if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
-+        Label L_ok;
-+        Register temp2_defc = temp2;
-+        __ load_heap_oop(temp2_defc, member_clazz, temp3);
-+        load_klass_from_Class(_masm, temp2_defc);
-+        __ verify_klass_ptr(temp2_defc);
-+        __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok);
-+        // If we get here, the type check failed!
-+        __ ebreak();
-+        __ bind(L_ok);
-+      }
-+      BLOCK_COMMENT("} check_receiver");
-+    }
-+    if (iid == vmIntrinsics::_linkToSpecial ||
-+        iid == vmIntrinsics::_linkToStatic) {
-+      DEBUG_ONLY(temp1_recv_klass = noreg);  // these guys didn't load the recv_klass
-+    }
-+
-+    // Live registers at this point:
-+    //  member_reg - MemberName that was the trailing argument
-+    //  temp1_recv_klass - klass of stacked receiver, if needed
-+    //  x30 - interpreter linkage (if interpreted)
-+    //  x11 ... x10 - compiler arguments (if compiled)
-+
-+    Label L_incompatible_class_change_error;
-+    switch (iid) {
-+      case vmIntrinsics::_linkToSpecial:
-+        if (VerifyMethodHandles) {
-+          verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
-+        }
-+        __ load_heap_oop(xmethod, member_vmtarget);
-+        __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg);
-+        break;
-+
-+      case vmIntrinsics::_linkToStatic:
-+        if (VerifyMethodHandles) {
-+          verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
-+        }
-+        __ load_heap_oop(xmethod, member_vmtarget);
-+        __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg);
-+        break;
-+
-+      case vmIntrinsics::_linkToVirtual:
-+      {
-+        // same as TemplateTable::invokevirtual,
-+        // minus the CP setup and profiling:
-+
-+        if (VerifyMethodHandles) {
-+          verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
-+        }
-+
-+        // pick out the vtable index from the MemberName, and then we can discard it:
-+        Register temp2_index = temp2;
-+        __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg);
-+
-+        if (VerifyMethodHandles) {
-+          Label L_index_ok;
-+          __ bgez(temp2_index, L_index_ok);
-+          __ ebreak();
-+          __ BIND(L_index_ok);
-+        }
-+
-+        // Note:  The verifier invariants allow us to ignore MemberName.clazz and vmtarget
-+        // at this point.  And VerifyMethodHandles has already checked clazz, if needed.
-+
-+        // get target Method* & entry point
-+        __ lookup_virtual_method(temp1_recv_klass, temp2_index, xmethod);
-+        break;
-+      }
-+
-+      case vmIntrinsics::_linkToInterface:
-+      {
-+        // same as TemplateTable::invokeinterface
-+        // (minus the CP setup and profiling, with different argument motion)
-+        if (VerifyMethodHandles) {
-+          verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
-+        }
-+
-+        Register temp3_intf = temp3;
-+        __ load_heap_oop(temp3_intf, member_clazz);
-+        load_klass_from_Class(_masm, temp3_intf);
-+        __ verify_klass_ptr(temp3_intf);
-+
-+        Register rindex = xmethod;
-+        __ access_load_at(T_ADDRESS, IN_HEAP, rindex, member_vmindex, noreg, noreg);
-+        if (VerifyMethodHandles) {
-+          Label L;
-+          __ bgez(rindex, L);
-+          __ ebreak();
-+          __ bind(L);
-+        }
++REGISTER_DEFINITION(FloatRegister, f0);
++REGISTER_DEFINITION(FloatRegister, f1);
++REGISTER_DEFINITION(FloatRegister, f2);
++REGISTER_DEFINITION(FloatRegister, f3);
++REGISTER_DEFINITION(FloatRegister, f4);
++REGISTER_DEFINITION(FloatRegister, f5);
++REGISTER_DEFINITION(FloatRegister, f6);
++REGISTER_DEFINITION(FloatRegister, f7);
++REGISTER_DEFINITION(FloatRegister, f8);
++REGISTER_DEFINITION(FloatRegister, f9);
++REGISTER_DEFINITION(FloatRegister, f10);
++REGISTER_DEFINITION(FloatRegister, f11);
++REGISTER_DEFINITION(FloatRegister, f12);
++REGISTER_DEFINITION(FloatRegister, f13);
++REGISTER_DEFINITION(FloatRegister, f14);
++REGISTER_DEFINITION(FloatRegister, f15);
++REGISTER_DEFINITION(FloatRegister, f16);
++REGISTER_DEFINITION(FloatRegister, f17);
++REGISTER_DEFINITION(FloatRegister, f18);
++REGISTER_DEFINITION(FloatRegister, f19);
++REGISTER_DEFINITION(FloatRegister, f20);
++REGISTER_DEFINITION(FloatRegister, f21);
++REGISTER_DEFINITION(FloatRegister, f22);
++REGISTER_DEFINITION(FloatRegister, f23);
++REGISTER_DEFINITION(FloatRegister, f24);
++REGISTER_DEFINITION(FloatRegister, f25);
++REGISTER_DEFINITION(FloatRegister, f26);
++REGISTER_DEFINITION(FloatRegister, f27);
++REGISTER_DEFINITION(FloatRegister, f28);
++REGISTER_DEFINITION(FloatRegister, f29);
++REGISTER_DEFINITION(FloatRegister, f30);
++REGISTER_DEFINITION(FloatRegister, f31);
 +
-+        // given intf, index, and recv klass, dispatch to the implementation method
-+        __ lookup_interface_method(temp1_recv_klass, temp3_intf,
-+                                   // note: next two args must be the same:
-+                                   rindex, xmethod,
-+                                   temp2,
-+                                   L_incompatible_class_change_error);
-+        break;
-+      }
++REGISTER_DEFINITION(VectorRegister, vnoreg);
 +
-+      default:
-+        fatal("unexpected intrinsic %d: %s", vmIntrinsics::as_int(iid), vmIntrinsics::name_at(iid));
-+        break;
-+    }
++REGISTER_DEFINITION(VectorRegister, v0);
++REGISTER_DEFINITION(VectorRegister, v1);
++REGISTER_DEFINITION(VectorRegister, v2);
++REGISTER_DEFINITION(VectorRegister, v3);
++REGISTER_DEFINITION(VectorRegister, v4);
++REGISTER_DEFINITION(VectorRegister, v5);
++REGISTER_DEFINITION(VectorRegister, v6);
++REGISTER_DEFINITION(VectorRegister, v7);
++REGISTER_DEFINITION(VectorRegister, v8);
++REGISTER_DEFINITION(VectorRegister, v9);
++REGISTER_DEFINITION(VectorRegister, v10);
++REGISTER_DEFINITION(VectorRegister, v11);
++REGISTER_DEFINITION(VectorRegister, v12);
++REGISTER_DEFINITION(VectorRegister, v13);
++REGISTER_DEFINITION(VectorRegister, v14);
++REGISTER_DEFINITION(VectorRegister, v15);
++REGISTER_DEFINITION(VectorRegister, v16);
++REGISTER_DEFINITION(VectorRegister, v17);
++REGISTER_DEFINITION(VectorRegister, v18);
++REGISTER_DEFINITION(VectorRegister, v19);
++REGISTER_DEFINITION(VectorRegister, v20);
++REGISTER_DEFINITION(VectorRegister, v21);
++REGISTER_DEFINITION(VectorRegister, v22);
++REGISTER_DEFINITION(VectorRegister, v23);
++REGISTER_DEFINITION(VectorRegister, v24);
++REGISTER_DEFINITION(VectorRegister, v25);
++REGISTER_DEFINITION(VectorRegister, v26);
++REGISTER_DEFINITION(VectorRegister, v27);
++REGISTER_DEFINITION(VectorRegister, v28);
++REGISTER_DEFINITION(VectorRegister, v29);
++REGISTER_DEFINITION(VectorRegister, v30);
++REGISTER_DEFINITION(VectorRegister, v31);
 +
-+    // live at this point:  xmethod, x30 (if interpreted)
++REGISTER_DEFINITION(Register, c_rarg0);
++REGISTER_DEFINITION(Register, c_rarg1);
++REGISTER_DEFINITION(Register, c_rarg2);
++REGISTER_DEFINITION(Register, c_rarg3);
++REGISTER_DEFINITION(Register, c_rarg4);
++REGISTER_DEFINITION(Register, c_rarg5);
++REGISTER_DEFINITION(Register, c_rarg6);
++REGISTER_DEFINITION(Register, c_rarg7);
 +
-+    // After figuring out which concrete method to call, jump into it.
-+    // Note that this works in the interpreter with no data motion.
-+    // But the compiled version will require that r2_recv be shifted out.
-+    __ verify_method_ptr(xmethod);
-+    jump_from_method_handle(_masm, xmethod, temp1, for_compiler_entry);
-+    if (iid == vmIntrinsics::_linkToInterface) {
-+      __ bind(L_incompatible_class_change_error);
-+      __ far_jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));
-+    }
-+  }
++REGISTER_DEFINITION(FloatRegister, c_farg0);
++REGISTER_DEFINITION(FloatRegister, c_farg1);
++REGISTER_DEFINITION(FloatRegister, c_farg2);
++REGISTER_DEFINITION(FloatRegister, c_farg3);
++REGISTER_DEFINITION(FloatRegister, c_farg4);
++REGISTER_DEFINITION(FloatRegister, c_farg5);
++REGISTER_DEFINITION(FloatRegister, c_farg6);
++REGISTER_DEFINITION(FloatRegister, c_farg7);
 +
-+}
++REGISTER_DEFINITION(Register, j_rarg0);
++REGISTER_DEFINITION(Register, j_rarg1);
++REGISTER_DEFINITION(Register, j_rarg2);
++REGISTER_DEFINITION(Register, j_rarg3);
++REGISTER_DEFINITION(Register, j_rarg4);
++REGISTER_DEFINITION(Register, j_rarg5);
++REGISTER_DEFINITION(Register, j_rarg6);
++REGISTER_DEFINITION(Register, j_rarg7);
 +
-+#ifndef PRODUCT
-+void trace_method_handle_stub(const char* adaptername,
-+                              oopDesc* mh,
-+                              intptr_t* saved_regs,
-+                              intptr_t* entry_sp) {  }
++REGISTER_DEFINITION(FloatRegister, j_farg0);
++REGISTER_DEFINITION(FloatRegister, j_farg1);
++REGISTER_DEFINITION(FloatRegister, j_farg2);
++REGISTER_DEFINITION(FloatRegister, j_farg3);
++REGISTER_DEFINITION(FloatRegister, j_farg4);
++REGISTER_DEFINITION(FloatRegister, j_farg5);
++REGISTER_DEFINITION(FloatRegister, j_farg6);
++REGISTER_DEFINITION(FloatRegister, j_farg7);
 +
-+// The stub wraps the arguments in a struct on the stack to avoid
-+// dealing with the different calling conventions for passing 6
-+// arguments.
-+struct MethodHandleStubArguments {
-+  const char* adaptername;
-+  oopDesc* mh;
-+  intptr_t* saved_regs;
-+  intptr_t* entry_sp;
-+};
-+void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) {  }
++REGISTER_DEFINITION(Register, zr);
++REGISTER_DEFINITION(Register, gp);
++REGISTER_DEFINITION(Register, tp);
++REGISTER_DEFINITION(Register, xmethod);
++REGISTER_DEFINITION(Register, ra);
++REGISTER_DEFINITION(Register, sp);
++REGISTER_DEFINITION(Register, fp);
++REGISTER_DEFINITION(Register, xheapbase);
++REGISTER_DEFINITION(Register, xcpool);
++REGISTER_DEFINITION(Register, xmonitors);
++REGISTER_DEFINITION(Register, xlocals);
++REGISTER_DEFINITION(Register, xthread);
++REGISTER_DEFINITION(Register, xbcp);
++REGISTER_DEFINITION(Register, xdispatch);
++REGISTER_DEFINITION(Register, esp);
 +
-+void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {  }
-+#endif //PRODUCT
-diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
++REGISTER_DEFINITION(Register, t0);
++REGISTER_DEFINITION(Register, t1);
++REGISTER_DEFINITION(Register, t2);
+diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp
 new file mode 100644
-index 00000000000..f73aba29d67
+index 0000000000..ef60cb3bb0
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
-@@ -0,0 +1,57 @@
++++ b/src/hotspot/cpu/riscv/register_riscv.cpp
+@@ -0,0 +1,64 @@
 +/*
-+ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -28113,47 +27042,53 @@ index 00000000000..f73aba29d67
 + *
 + */
 +
-+// Platform-specific definitions for method handles.
-+// These definitions are inlined into class MethodHandles.
++#include "precompiled.hpp"
++#include "register_riscv.hpp"
 +
-+// Adapters
-+enum /* platform_dependent_constants */ {
-+  adapter_code_size = 32000 DEBUG_ONLY(+ 120000)
-+};
++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers *
++                                          RegisterImpl::max_slots_per_register;
 +
-+public:
++const int ConcreteRegisterImpl::max_fpr =
++    ConcreteRegisterImpl::max_gpr +
++    FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register;
 +
-+  static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg);
++const char* RegisterImpl::name() const {
++  static const char *const names[number_of_registers] = {
++    "zr", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "fp", "x9",
++    "c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7",
++    "x18", "x19", "esp", "xdispatch", "xbcp", "xthread", "xlocals",
++    "xmonitors", "xcpool", "xheapbase", "x28", "x29", "x30", "xmethod"
++  };
++  return is_valid() ? names[encoding()] : "noreg";
++}
 +
-+  static void verify_klass(MacroAssembler* _masm,
-+                           Register obj, vmClassID klass_id,
-+                           const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
++const char* FloatRegisterImpl::name() const {
++  static const char *const names[number_of_registers] = {
++    "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",
++    "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
++    "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
++    "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"
++  };
++  return is_valid() ? names[encoding()] : "noreg";
++}
 +
-+  static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) {
-+    verify_klass(_masm, mh_reg, VM_CLASS_ID(java_lang_invoke_MethodHandle),
-+                 "reference is a MH");
-+  }
-+
-+  static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
-+
-+  // Similar to InterpreterMacroAssembler::jump_from_interpreted.
-+  // Takes care of special dispatch from single stepping too.
-+  static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
-+                                      bool for_compiler_entry);
-+
-+  static void jump_to_lambda_form(MacroAssembler* _masm,
-+                                  Register recv, Register method_temp,
-+                                  Register temp2,
-+                                  bool for_compiler_entry);
-diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
++const char* VectorRegisterImpl::name() const {
++  static const char *const names[number_of_registers] = {
++    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
++    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
++    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
++    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"
++  };
++  return is_valid() ? names[encoding()] : "noreg";
++}
+diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp
 new file mode 100644
-index 00000000000..0a05c577860
+index 0000000000..f64a06eb89
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
-@@ -0,0 +1,429 @@
++++ b/src/hotspot/cpu/riscv/register_riscv.hpp
+@@ -0,0 +1,381 @@
 +/*
-+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -28177,419 +27112,489 @@ index 00000000000..0a05c577860
 + *
 + */
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.hpp"
-+#include "code/compiledIC.hpp"
-+#include "memory/resourceArea.hpp"
-+#include "nativeInst_riscv.hpp"
-+#include "oops/oop.inline.hpp"
-+#include "runtime/handles.hpp"
-+#include "runtime/orderAccess.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "runtime/stubRoutines.hpp"
-+#include "utilities/ostream.hpp"
-+#ifdef COMPILER1
-+#include "c1/c1_Runtime1.hpp"
-+#endif
++#ifndef CPU_RISCV_REGISTER_RISCV_HPP
++#define CPU_RISCV_REGISTER_RISCV_HPP
 +
-+Register NativeInstruction::extract_rs1(address instr) {
-+  assert_cond(instr != NULL);
-+  return as_Register(Assembler::extract(((unsigned*)instr)[0], 19, 15));
-+}
++#include "asm/register.hpp"
 +
-+Register NativeInstruction::extract_rs2(address instr) {
-+  assert_cond(instr != NULL);
-+  return as_Register(Assembler::extract(((unsigned*)instr)[0], 24, 20));
-+}
++#define CSR_FFLAGS   0x001        // Floating-Point Accrued Exceptions.
++#define CSR_FRM      0x002        // Floating-Point Dynamic Rounding Mode.
++#define CSR_FCSR     0x003        // Floating-Point Control and Status Register (frm + fflags).
++#define CSR_VSTART   0x008        // Vector start position
++#define CSR_VXSAT    0x009        // Fixed-Point Saturate Flag
++#define CSR_VXRM     0x00A        // Fixed-Point Rounding Mode
++#define CSR_VCSR     0x00F        // Vector control and status register
++#define CSR_VL       0xC20        // Vector length
++#define CSR_VTYPE    0xC21        // Vector data type register
++#define CSR_VLENB    0xC22        // VLEN/8 (vector register length in bytes)
++#define CSR_CYCLE    0xc00        // Cycle counter for RDCYCLE instruction.
++#define CSR_TIME     0xc01        // Timer for RDTIME instruction.
++#define CSR_INSTERT  0xc02        // Instructions-retired counter for RDINSTRET instruction.
 +
-+Register NativeInstruction::extract_rd(address instr) {
-+  assert_cond(instr != NULL);
-+  return as_Register(Assembler::extract(((unsigned*)instr)[0], 11, 7));
-+}
++class VMRegImpl;
++typedef VMRegImpl* VMReg;
 +
-+uint32_t NativeInstruction::extract_opcode(address instr) {
-+  assert_cond(instr != NULL);
-+  return Assembler::extract(((unsigned*)instr)[0], 6, 0);
-+}
++// Use Register as shortcut
++class RegisterImpl;
++typedef RegisterImpl* Register;
 +
-+uint32_t NativeInstruction::extract_funct3(address instr) {
-+  assert_cond(instr != NULL);
-+  return Assembler::extract(((unsigned*)instr)[0], 14, 12);
++inline Register as_Register(int encoding) {
++  return (Register)(intptr_t) encoding;
 +}
 +
-+bool NativeInstruction::is_pc_relative_at(address instr) {
-+  // auipc + jalr
-+  // auipc + addi
-+  // auipc + load
-+  // auipc + fload_load
-+  return (is_auipc_at(instr)) &&
-+         (is_addi_at(instr + instruction_size) ||
-+          is_jalr_at(instr + instruction_size) ||
-+          is_load_at(instr + instruction_size) ||
-+          is_float_load_at(instr + instruction_size)) &&
-+         check_pc_relative_data_dependency(instr);
-+}
++class RegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++    number_of_registers      = 32,
++    max_slots_per_register   = 2,
 +
-+// ie:ld(Rd, Label)
-+bool NativeInstruction::is_load_pc_relative_at(address instr) {
-+  return is_auipc_at(instr) && // auipc
-+         is_ld_at(instr + instruction_size) && // ld
-+         check_load_pc_relative_data_dependency(instr);
-+}
++    // integer registers x8 - x15 and floating-point registers f8 - f15 are allocatable
++    // for compressed instructions. See Table 17.2 in spec.
++    compressed_register_base = 8,
++    compressed_register_top  = 15,
++  };
 +
-+bool NativeInstruction::is_movptr_at(address instr) {
-+  return is_lui_at(instr) && // Lui
-+         is_addi_at(instr + instruction_size) && // Addi
-+         is_slli_shift_at(instr + instruction_size * 2, 11) && // Slli Rd, Rs, 11
-+         is_addi_at(instr + instruction_size * 3) && // Addi
-+         is_slli_shift_at(instr + instruction_size * 4, 5) && // Slli Rd, Rs, 5
-+         (is_addi_at(instr + instruction_size * 5) ||
-+          is_jalr_at(instr + instruction_size * 5) ||
-+          is_load_at(instr + instruction_size * 5)) && // Addi/Jalr/Load
-+         check_movptr_data_dependency(instr);
-+}
++  // derived registers, offsets, and addresses
++  const Register successor() const { return as_Register(encoding() + 1); }
 +
-+bool NativeInstruction::is_li32_at(address instr) {
-+  return is_lui_at(instr) && // lui
-+         is_addiw_at(instr + instruction_size) && // addiw
-+         check_li32_data_dependency(instr);
-+}
++  // construction
++  inline friend Register as_Register(int encoding);
 +
-+bool NativeInstruction::is_li64_at(address instr) {
-+  return is_lui_at(instr) && // lui
-+         is_addi_at(instr + instruction_size) && // addi
-+         is_slli_shift_at(instr + instruction_size * 2, 12) &&  // Slli Rd, Rs, 12
-+         is_addi_at(instr + instruction_size * 3) && // addi
-+         is_slli_shift_at(instr + instruction_size * 4, 12) &&  // Slli Rd, Rs, 12
-+         is_addi_at(instr + instruction_size * 5) && // addi
-+         is_slli_shift_at(instr + instruction_size * 6, 8) &&   // Slli Rd, Rs, 8
-+         is_addi_at(instr + instruction_size * 7) && // addi
-+         check_li64_data_dependency(instr);
-+}
++  VMReg as_VMReg() const;
 +
-+void NativeCall::verify() {
-+  assert(NativeCall::is_call_at((address)this), "unexpected code at call site");
-+}
++  // accessors
++  int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
++  int encoding_nocheck() const    { return (intptr_t)this; }
++  bool is_valid() const           { return (unsigned)encoding_nocheck() < number_of_registers; }
++  const char* name() const;
 +
-+address NativeCall::destination() const {
-+  address addr = (address)this;
-+  assert(NativeInstruction::is_jal_at(instruction_address()), "inst must be jal.");
-+  address destination = MacroAssembler::target_addr_for_insn(instruction_address());
++  // for rvc
++  int compressed_encoding() const {
++    assert(is_compressed_valid(), "invalid compressed register");
++    return encoding() - compressed_register_base;
++  }
 +
-+  // Do we use a trampoline stub for this call?
-+  CodeBlob* cb = CodeCache::find_blob_unsafe(addr);   // Else we get assertion if nmethod is zombie.
-+  assert(cb && cb->is_nmethod(), "sanity");
-+  nmethod *nm = (nmethod *)cb;
-+  if (nm != NULL && nm->stub_contains(destination) && is_NativeCallTrampolineStub_at(destination)) {
-+    // Yes we do, so get the destination from the trampoline stub.
-+    const address trampoline_stub_addr = destination;
-+    destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination();
++  int compressed_encoding_nocheck() const {
++    return encoding_nocheck() - compressed_register_base;
 +  }
 +
-+  return destination;
-+}
++  bool is_compressed_valid() const {
++    return encoding_nocheck() >= compressed_register_base &&
++           encoding_nocheck() <= compressed_register_top;
++  }
 +
-+// Similar to replace_mt_safe, but just changes the destination. The
-+// important thing is that free-running threads are able to execute this
-+// call instruction at all times.
-+//
-+// Used in the runtime linkage of calls; see class CompiledIC.
-+//
-+// Add parameter assert_lock to switch off assertion
-+// during code generation, where no patching lock is needed.
-+void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
-+  assert(!assert_lock ||
-+         (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()) ||
-+         CompiledICLocker::is_safe(addr_at(0)),
-+         "concurrent code patching");
++  // Return the bit which represents this register.  This is intended
++  // to be ORed into a bitmask: for usage see class RegSet below.
++  uint64_t bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; }
++};
 +
-+  ResourceMark rm;
-+  address addr_call = addr_at(0);
-+  assert(NativeCall::is_call_at(addr_call), "unexpected code at call site");
++// The integer registers of the RISCV architecture
 +
-+  // Patch the constant in the call's trampoline stub.
-+  address trampoline_stub_addr = get_trampoline();
-+  if (trampoline_stub_addr != NULL) {
-+    assert (!is_NativeCallTrampolineStub_at(dest), "chained trampolines");
-+    nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest);
-+  }
++CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
 +
-+  // Patch the call.
-+  if (Assembler::reachable_from_branch_at(addr_call, dest)) {
-+    set_destination(dest);
-+  } else {
-+    assert (trampoline_stub_addr != NULL, "we need a trampoline");
-+    set_destination(trampoline_stub_addr);
-+  }
++CONSTANT_REGISTER_DECLARATION(Register, x0,    (0));
++CONSTANT_REGISTER_DECLARATION(Register, x1,    (1));
++CONSTANT_REGISTER_DECLARATION(Register, x2,    (2));
++CONSTANT_REGISTER_DECLARATION(Register, x3,    (3));
++CONSTANT_REGISTER_DECLARATION(Register, x4,    (4));
++CONSTANT_REGISTER_DECLARATION(Register, x5,    (5));
++CONSTANT_REGISTER_DECLARATION(Register, x6,    (6));
++CONSTANT_REGISTER_DECLARATION(Register, x7,    (7));
++CONSTANT_REGISTER_DECLARATION(Register, x8,    (8));
++CONSTANT_REGISTER_DECLARATION(Register, x9,    (9));
++CONSTANT_REGISTER_DECLARATION(Register, x10,  (10));
++CONSTANT_REGISTER_DECLARATION(Register, x11,  (11));
++CONSTANT_REGISTER_DECLARATION(Register, x12,  (12));
++CONSTANT_REGISTER_DECLARATION(Register, x13,  (13));
++CONSTANT_REGISTER_DECLARATION(Register, x14,  (14));
++CONSTANT_REGISTER_DECLARATION(Register, x15,  (15));
++CONSTANT_REGISTER_DECLARATION(Register, x16,  (16));
++CONSTANT_REGISTER_DECLARATION(Register, x17,  (17));
++CONSTANT_REGISTER_DECLARATION(Register, x18,  (18));
++CONSTANT_REGISTER_DECLARATION(Register, x19,  (19));
++CONSTANT_REGISTER_DECLARATION(Register, x20,  (20));
++CONSTANT_REGISTER_DECLARATION(Register, x21,  (21));
++CONSTANT_REGISTER_DECLARATION(Register, x22,  (22));
++CONSTANT_REGISTER_DECLARATION(Register, x23,  (23));
++CONSTANT_REGISTER_DECLARATION(Register, x24,  (24));
++CONSTANT_REGISTER_DECLARATION(Register, x25,  (25));
++CONSTANT_REGISTER_DECLARATION(Register, x26,  (26));
++CONSTANT_REGISTER_DECLARATION(Register, x27,  (27));
++CONSTANT_REGISTER_DECLARATION(Register, x28,  (28));
++CONSTANT_REGISTER_DECLARATION(Register, x29,  (29));
++CONSTANT_REGISTER_DECLARATION(Register, x30,  (30));
++CONSTANT_REGISTER_DECLARATION(Register, x31,  (31));
 +
-+  ICache::invalidate_range(addr_call, instruction_size);
-+}
++// Use FloatRegister as shortcut
++class FloatRegisterImpl;
++typedef FloatRegisterImpl* FloatRegister;
 +
-+address NativeCall::get_trampoline() {
-+  address call_addr = addr_at(0);
++inline FloatRegister as_FloatRegister(int encoding) {
++  return (FloatRegister)(intptr_t) encoding;
++}
 +
-+  CodeBlob *code = CodeCache::find_blob(call_addr);
-+  assert(code != NULL, "Could not find the containing code blob");
++// The implementation of floating point registers for the architecture
++class FloatRegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++    number_of_registers     = 32,
++    max_slots_per_register  = 2,
 +
-+  address jal_destination = MacroAssembler::pd_call_destination(call_addr);
-+  if (code != NULL && code->contains(jal_destination) && is_NativeCallTrampolineStub_at(jal_destination)) {
-+    return jal_destination;
-+  }
++    // float registers in the range of [f8~f15] correspond to RVC. Please see Table 16.2 in spec.
++    compressed_register_base = 8,
++    compressed_register_top  = 15,
++  };
 +
-+  if (code != NULL && code->is_nmethod()) {
-+    return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code);
-+  }
++  // construction
++  inline friend FloatRegister as_FloatRegister(int encoding);
 +
-+  return NULL;
-+}
++  VMReg as_VMReg() const;
 +
-+// Inserts a native call instruction at a given pc
-+void NativeCall::insert(address code_pos, address entry) { Unimplemented(); }
++  // derived registers, offsets, and addresses
++  FloatRegister successor() const { return as_FloatRegister(encoding() + 1); }
 +
-+//-------------------------------------------------------------------
++  // accessors
++  int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
++  int encoding_nocheck() const    { return (intptr_t)this; }
++  int is_valid() const            { return (unsigned)encoding_nocheck() < number_of_registers; }
++  const char* name() const;
 +
-+void NativeMovConstReg::verify() {
-+  if (!(nativeInstruction_at(instruction_address())->is_movptr() ||
-+        is_auipc_at(instruction_address()))) {
-+    fatal("should be MOVPTR or AUIPC");
++  // for rvc
++  int compressed_encoding() const {
++    assert(is_compressed_valid(), "invalid compressed register");
++    return encoding() - compressed_register_base;
 +  }
-+}
 +
-+intptr_t NativeMovConstReg::data() const {
-+  address addr = MacroAssembler::target_addr_for_insn(instruction_address());
-+  if (maybe_cpool_ref(instruction_address())) {
-+    return *(intptr_t*)addr;
-+  } else {
-+    return (intptr_t)addr;
++  int compressed_encoding_nocheck() const {
++    return encoding_nocheck() - compressed_register_base;
 +  }
-+}
 +
-+void NativeMovConstReg::set_data(intptr_t x) {
-+  if (maybe_cpool_ref(instruction_address())) {
-+    address addr = MacroAssembler::target_addr_for_insn(instruction_address());
-+    *(intptr_t*)addr = x;
-+  } else {
-+    // Store x into the instruction stream.
-+    MacroAssembler::pd_patch_instruction_size(instruction_address(), (address)x);
-+    ICache::invalidate_range(instruction_address(), movptr_instruction_size);
++  bool is_compressed_valid() const {
++    return encoding_nocheck() >= compressed_register_base &&
++           encoding_nocheck() <= compressed_register_top;
 +  }
++};
 +
-+  // Find and replace the oop/metadata corresponding to this
-+  // instruction in oops section.
-+  CodeBlob* cb = CodeCache::find_blob(instruction_address());
-+  nmethod* nm = cb->as_nmethod_or_null();
-+  if (nm != NULL) {
-+    RelocIterator iter(nm, instruction_address(), next_instruction_address());
-+    while (iter.next()) {
-+      if (iter.type() == relocInfo::oop_type) {
-+        oop* oop_addr = iter.oop_reloc()->oop_addr();
-+        *oop_addr = cast_to_oop(x);
-+        break;
-+      } else if (iter.type() == relocInfo::metadata_type) {
-+        Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr();
-+        *metadata_addr = (Metadata*)x;
-+        break;
-+      }
-+    }
-+  }
-+}
++// The float registers of the RISCV architecture
 +
-+void NativeMovConstReg::print() {
-+  tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT,
-+                p2i(instruction_address()), data());
-+}
++CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1));
 +
-+//-------------------------------------------------------------------
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f0     , ( 0));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f1     , ( 1));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f2     , ( 2));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f3     , ( 3));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f4     , ( 4));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f5     , ( 5));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f6     , ( 6));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f7     , ( 7));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f8     , ( 8));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f9     , ( 9));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f10    , (10));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f11    , (11));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f12    , (12));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f13    , (13));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f14    , (14));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f15    , (15));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f16    , (16));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f17    , (17));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f18    , (18));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f19    , (19));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f20    , (20));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f21    , (21));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f22    , (22));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f23    , (23));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f24    , (24));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f25    , (25));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f26    , (26));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f27    , (27));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f28    , (28));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f29    , (29));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f30    , (30));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f31    , (31));
 +
-+int NativeMovRegMem::offset() const  {
-+  Unimplemented();
-+  return 0;
++// Use VectorRegister as shortcut
++class VectorRegisterImpl;
++typedef VectorRegisterImpl* VectorRegister;
++
++inline VectorRegister as_VectorRegister(int encoding) {
++  return (VectorRegister)(intptr_t) encoding;
 +}
 +
-+void NativeMovRegMem::set_offset(int x) { Unimplemented(); }
++// The implementation of vector registers for RVV
++class VectorRegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++    number_of_registers    = 32,
++    max_slots_per_register = 4
++  };
 +
-+void NativeMovRegMem::verify() {
-+  Unimplemented();
-+}
++  // construction
++  inline friend VectorRegister as_VectorRegister(int encoding);
 +
-+//--------------------------------------------------------------------------------
++  VMReg as_VMReg() const;
 +
-+void NativeJump::verify() { }
++  // derived registers, offsets, and addresses
++  VectorRegister successor() const { return as_VectorRegister(encoding() + 1); }
 +
++  // accessors
++  int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
++  int encoding_nocheck() const    { return (intptr_t)this; }
++  bool is_valid() const           { return (unsigned)encoding_nocheck() < number_of_registers; }
++  const char* name() const;
 +
-+void NativeJump::check_verified_entry_alignment(address entry, address verified_entry) {
-+}
++};
 +
++// The vector registers of RVV
++CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg , (-1));
 +
-+address NativeJump::jump_destination() const {
-+  address dest = MacroAssembler::target_addr_for_insn(instruction_address());
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v0     , ( 0));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v1     , ( 1));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v2     , ( 2));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v3     , ( 3));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v4     , ( 4));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v5     , ( 5));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v6     , ( 6));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v7     , ( 7));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v8     , ( 8));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v9     , ( 9));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v10    , (10));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v11    , (11));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v12    , (12));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v13    , (13));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v14    , (14));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v15    , (15));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v16    , (16));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v17    , (17));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v18    , (18));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v19    , (19));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v20    , (20));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v21    , (21));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v22    , (22));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v23    , (23));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v24    , (24));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v25    , (25));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v26    , (26));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v27    , (27));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v28    , (28));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v29    , (29));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v30    , (30));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v31    , (31));
 +
-+  // We use jump to self as the unresolved address which the inline
-+  // cache code (and relocs) know about
-+  // As a special case we also use sequence movptr_with_offset(r,0), jalr(r,0)
-+  // i.e. jump to 0 when we need leave space for a wide immediate
-+  // load
 +
-+  // return -1 if jump to self or to 0
-+  if ((dest == (address) this) || dest == 0) {
-+    dest = (address) -1;
-+  }
++// Need to know the total number of registers of all sorts for SharedInfo.
++// Define a class that exports it.
++class ConcreteRegisterImpl : public AbstractRegisterImpl {
++ public:
++  enum {
++  // A big enough number for C2: all the registers plus flags
++  // This number must be large enough to cover REG_COUNT (defined by c2) registers.
++  // There is no requirement that any ordering here matches any ordering c2 gives
++  // it's optoregs.
 +
-+  return dest;
++    number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers +
++                           FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers)
++  };
++
++  // added to make it compile
++  static const int max_gpr;
++  static const int max_fpr;
 +};
 +
-+void NativeJump::set_jump_destination(address dest) {
-+  // We use jump to self as the unresolved address which the inline
-+  // cache code (and relocs) know about
-+  if (dest == (address) -1)
-+    dest = instruction_address();
++// A set of registers
++class RegSet {
++  uint32_t _bitset;
 +
-+  MacroAssembler::pd_patch_instruction(instruction_address(), dest);
-+  ICache::invalidate_range(instruction_address(), instruction_size);
-+}
++  RegSet(uint32_t bitset) : _bitset(bitset) { }
 +
-+//-------------------------------------------------------------------
++public:
 +
-+address NativeGeneralJump::jump_destination() const {
-+  NativeMovConstReg* move = nativeMovConstReg_at(instruction_address());
-+  address dest = (address) move->data();
++  RegSet() : _bitset(0) { }
 +
-+  // We use jump to self as the unresolved address which the inline
-+  // cache code (and relocs) know about
-+  // As a special case we also use jump to 0 when first generating
-+  // a general jump
++  RegSet(Register r1) : _bitset(r1->bit()) { }
 +
-+  // return -1 if jump to self or to 0
-+  if ((dest == (address) this) || dest == 0) {
-+    dest = (address) -1;
++  RegSet operator+(const RegSet aSet) const {
++    RegSet result(_bitset | aSet._bitset);
++    return result;
 +  }
 +
-+  return dest;
-+}
++  RegSet operator-(const RegSet aSet) const {
++    RegSet result(_bitset & ~aSet._bitset);
++    return result;
++  }
 +
-+//-------------------------------------------------------------------
++  RegSet &operator+=(const RegSet aSet) {
++    *this = *this + aSet;
++    return *this;
++  }
 +
-+bool NativeInstruction::is_safepoint_poll() {
-+  return is_lwu_to_zr(address(this));
-+}
++  RegSet &operator-=(const RegSet aSet) {
++    *this = *this - aSet;
++    return *this;
++  }
 +
-+bool NativeInstruction::is_lwu_to_zr(address instr) {
-+  assert_cond(instr != NULL);
-+  return (extract_opcode(instr) == 0b0000011 &&
-+          extract_funct3(instr) == 0b110 &&
-+          extract_rd(instr) == zr);         // zr
-+}
-+
-+// A 16-bit instruction with all bits ones is permanently reserved as an illegal instruction.
-+bool NativeInstruction::is_sigill_zombie_not_entrant() {
-+  // jvmci
-+  return uint_at(0) == 0xffffffff;
-+}
-+
-+void NativeIllegalInstruction::insert(address code_pos) {
-+  assert_cond(code_pos != NULL);
-+  *(juint*)code_pos = 0xffffffff; // all bits ones is permanently reserved as an illegal instruction
-+}
++  static RegSet of(Register r1) {
++    return RegSet(r1);
++  }
 +
-+bool NativeInstruction::is_stop() {
-+  return uint_at(0) == 0xffffffff; // an illegal instruction
-+}
++  static RegSet of(Register r1, Register r2) {
++    return of(r1) + r2;
++  }
 +
-+//-------------------------------------------------------------------
++  static RegSet of(Register r1, Register r2, Register r3) {
++    return of(r1, r2) + r3;
++  }
 +
-+// MT-safe inserting of a jump over a jump or a nop (used by
-+// nmethod::make_not_entrant_or_zombie)
++  static RegSet of(Register r1, Register r2, Register r3, Register r4) {
++    return of(r1, r2, r3) + r4;
++  }
 +
-+void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
++  static RegSet range(Register start, Register end) {
++    uint32_t bits = ~0;
++    bits <<= start->encoding();
++    bits <<= 31 - end->encoding();
++    bits >>= 31 - end->encoding();
 +
-+  assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch");
++    return RegSet(bits);
++  }
 +
-+  assert(nativeInstruction_at(verified_entry)->is_jump_or_nop() ||
-+         nativeInstruction_at(verified_entry)->is_sigill_zombie_not_entrant(),
-+         "riscv cannot replace non-jump with jump");
++  uint32_t bits() const { return _bitset; }
 +
-+  // Patch this nmethod atomically.
-+  if (Assembler::reachable_from_branch_at(verified_entry, dest)) {
-+    ptrdiff_t offset = dest - verified_entry;
-+    guarantee(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction."); // 1M
++private:
 +
-+    uint32_t insn = 0;
-+    address pInsn = (address)&insn;
-+    Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1);
-+    Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff);
-+    Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1);
-+    Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff);
-+    Assembler::patch(pInsn, 11, 7, 0); // zero, no link jump
-+    Assembler::patch(pInsn, 6, 0, 0b1101111); // j, (jal x0 offset)
-+    *(unsigned int*)verified_entry = insn;
-+  } else {
-+    // We use an illegal instruction for marking a method as
-+    // not_entrant or zombie.
-+    NativeIllegalInstruction::insert(verified_entry);
++  Register first() {
++    uint32_t first = _bitset & -_bitset;
++    return first ? as_Register(exact_log2(first)) : noreg;
 +  }
++};
 +
-+  ICache::invalidate_range(verified_entry, instruction_size);
-+}
++#endif // CPU_RISCV_REGISTER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
+new file mode 100644
+index 0000000000..047ea2276c
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
+@@ -0,0 +1,112 @@
++/*
++ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
-+  CodeBuffer cb(code_pos, instruction_size);
-+  MacroAssembler a(&cb);
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/relocInfo.hpp"
++#include "nativeInst_riscv.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/safepoint.hpp"
 +
-+  int32_t offset = 0;
-+  a.movptr_with_offset(t0, entry, offset); // lui, addi, slli, addi, slli
-+  a.jalr(x0, t0, offset); // jalr
++void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
++  if (verify_only) {
++    return;
++  }
 +
-+  ICache::invalidate_range(code_pos, instruction_size);
-+}
++  int bytes;
 +
-+// MT-safe patching of a long jump instruction.
-+void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
-+  ShouldNotCallThis();
++  switch (type()) {
++    case relocInfo::oop_type: {
++      oop_Relocation *reloc = (oop_Relocation *)this;
++      if (NativeInstruction::is_load_pc_relative_at(addr())) {
++        address constptr = (address)code()->oop_addr_at(reloc->oop_index());
++        bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr);
++        assert(*(address*)constptr == x, "error in oop relocation");
++      } else {
++        bytes = MacroAssembler::patch_oop(addr(), x);
++      }
++      break;
++    }
++    default:
++      bytes = MacroAssembler::pd_patch_instruction_size(addr(), x);
++      break;
++  }
++  ICache::invalidate_range(addr(), bytes);
 +}
 +
-+
-+address NativeCallTrampolineStub::destination(nmethod *nm) const {
-+  return ptr_at(data_offset);
++address Relocation::pd_call_destination(address orig_addr) {
++  assert(is_call(), "should be an address instruction here");
++  if (NativeCall::is_call_at(addr())) {
++    address trampoline = nativeCall_at(addr())->get_trampoline();
++    if (trampoline != NULL) {
++      return nativeCallTrampolineStub_at(trampoline)->destination();
++    }
++  }
++  if (orig_addr != NULL) {
++    // the extracted address from the instructions in address orig_addr
++    address new_addr = MacroAssembler::pd_call_destination(orig_addr);
++    // If call is branch to self, don't try to relocate it, just leave it
++    // as branch to self. This happens during code generation if the code
++    // buffer expands. It will be relocated to the trampoline above once
++    // code generation is complete.
++    new_addr = (new_addr == orig_addr) ? addr() : new_addr;
++    return new_addr;
++  }
++  return MacroAssembler::pd_call_destination(addr());
 +}
 +
-+void NativeCallTrampolineStub::set_destination(address new_destination) {
-+  set_ptr_at(data_offset, new_destination);
-+  OrderAccess::release();
++void Relocation::pd_set_call_destination(address x) {
++  assert(is_call(), "should be an address instruction here");
++  if (NativeCall::is_call_at(addr())) {
++    address trampoline = nativeCall_at(addr())->get_trampoline();
++    if (trampoline != NULL) {
++      nativeCall_at(addr())->set_destination_mt_safe(x, /* assert_lock */false);
++      return;
++    }
++  }
++  MacroAssembler::pd_patch_instruction_size(addr(), x);
++  address pd_call = pd_call_destination(addr());
++  assert(pd_call == x, "fail in reloc");
 +}
 +
-+uint32_t NativeMembar::get_kind() {
-+  uint32_t insn = uint_at(0);
-+
-+  uint32_t predecessor = Assembler::extract(insn, 27, 24);
-+  uint32_t successor = Assembler::extract(insn, 23, 20);
-+
-+  return MacroAssembler::pred_succ_to_membar_mask(predecessor, successor);
++address* Relocation::pd_address_in_code() {
++  assert(NativeCall::is_load_pc_relative_at(addr()), "Not the expected instruction sequence!");
++  return (address*)(MacroAssembler::target_addr_for_insn(addr()));
 +}
 +
-+void NativeMembar::set_kind(uint32_t order_kind) {
-+  uint32_t predecessor = 0;
-+  uint32_t successor = 0;
-+
-+  MacroAssembler::membar_mask_to_pred_succ(order_kind, predecessor, successor);
++address Relocation::pd_get_address_from_code() {
++  return MacroAssembler::pd_call_destination(addr());
++}
 +
-+  uint32_t insn = uint_at(0);
-+  address pInsn = (address) &insn;
-+  Assembler::patch(pInsn, 27, 24, predecessor);
-+  Assembler::patch(pInsn, 23, 20, successor);
++void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
++  if (NativeInstruction::maybe_cpool_ref(addr())) {
++    address old_addr = old_addr_for(addr(), src, dest);
++    MacroAssembler::pd_patch_instruction_size(addr(), MacroAssembler::target_addr_for_insn(old_addr));
++  }
++}
 +
-+  address membar = addr_at(0);
-+  *(unsigned int*) membar = insn;
++void metadata_Relocation::pd_fix_value(address x) {
 +}
-diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
+diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.hpp b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp
 new file mode 100644
-index 00000000000..718b2e3de6c
+index 0000000000..840ed935d8
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
-@@ -0,0 +1,572 @@
++++ b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp
+@@ -0,0 +1,44 @@
 +/*
-+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -28612,7635 +27617,6143 @@ index 00000000000..718b2e3de6c
 + *
 + */
 +
-+#ifndef CPU_RISCV_NATIVEINST_RISCV_HPP
-+#define CPU_RISCV_NATIVEINST_RISCV_HPP
-+
-+#include "asm/assembler.hpp"
-+#include "runtime/icache.hpp"
-+#include "runtime/os.hpp"
-+
-+// We have interfaces for the following instructions:
-+// - NativeInstruction
-+// - - NativeCall
-+// - - NativeMovConstReg
-+// - - NativeMovRegMem
-+// - - NativeJump
-+// - - NativeGeneralJump
-+// - - NativeIllegalInstruction
-+// - - NativeCallTrampolineStub
-+// - - NativeMembar
-+// - - NativeFenceI
-+
-+// The base class for different kinds of native instruction abstractions.
-+// Provides the primitive operations to manipulate code relative to this.
-+
-+class NativeCall;
++#ifndef CPU_RISCV_RELOCINFO_RISCV_HPP
++#define CPU_RISCV_RELOCINFO_RISCV_HPP
 +
-+class NativeInstruction {
-+  friend class Relocation;
-+  friend bool is_NativeCallTrampolineStub_at(address);
-+ public:
++  // machine-dependent parts of class relocInfo
++ private:
 +  enum {
-+    instruction_size = 4,
-+    compressed_instruction_size = 2,
++    // Relocations are byte-aligned.
++    offset_unit        =  1,
++    // Must be at least 1 for RelocInfo::narrow_oop_in_const.
++    format_width       =  1
 +  };
 +
-+  juint encoding() const {
-+    return uint_at(0);
-+  }
++ public:
 +
-+  bool is_jal()                             const { return is_jal_at(addr_at(0));         }
-+  bool is_movptr()                          const { return is_movptr_at(addr_at(0));      }
-+  bool is_call()                            const { return is_call_at(addr_at(0));        }
-+  bool is_jump()                            const { return is_jump_at(addr_at(0));        }
++  // This platform has no oops in the code that are not also
++  // listed in the oop section.
++  static bool mustIterateImmediateOopsInCode() { return false; }
 +
-+  static bool is_jal_at(address instr)        { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1101111; }
-+  static bool is_jalr_at(address instr)       { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100111 && extract_funct3(instr) == 0b000; }
-+  static bool is_branch_at(address instr)     { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100011; }
-+  static bool is_ld_at(address instr)         { assert_cond(instr != NULL); return is_load_at(instr) && extract_funct3(instr) == 0b011; }
-+  static bool is_load_at(address instr)       { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000011; }
-+  static bool is_float_load_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000111; }
-+  static bool is_auipc_at(address instr)      { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010111; }
-+  static bool is_jump_at(address instr)       { assert_cond(instr != NULL); return is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr); }
-+  static bool is_addi_at(address instr)       { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010011 && extract_funct3(instr) == 0b000; }
-+  static bool is_addiw_at(address instr)      { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0011011 && extract_funct3(instr) == 0b000; }
-+  static bool is_lui_at(address instr)        { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0110111; }
-+  static bool is_slli_shift_at(address instr, uint32_t shift) {
-+    assert_cond(instr != NULL);
-+    return (extract_opcode(instr) == 0b0010011 && // opcode field
-+            extract_funct3(instr) == 0b001 &&     // funct3 field, select the type of operation
-+            Assembler::extract(((unsigned*)instr)[0], 25, 20) == shift);    // shamt field
-+  }
++#endif // CPU_RISCV_RELOCINFO_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
+new file mode 100644
+index 0000000000..02d6167629
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -0,0 +1,10280 @@
++//
++// Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
 +
-+  static Register extract_rs1(address instr);
-+  static Register extract_rs2(address instr);
-+  static Register extract_rd(address instr);
-+  static uint32_t extract_opcode(address instr);
-+  static uint32_t extract_funct3(address instr);
++// RISCV Architecture Description File
 +
-+  // the instruction sequence of movptr is as below:
-+  //     lui
-+  //     addi
-+  //     slli
-+  //     addi
-+  //     slli
-+  //     addi/jalr/load
-+  static bool check_movptr_data_dependency(address instr) {
-+    address lui = instr;
-+    address addi1 = lui + instruction_size;
-+    address slli1 = addi1 + instruction_size;
-+    address addi2 = slli1 + instruction_size;
-+    address slli2 = addi2 + instruction_size;
-+    address last_instr = slli2 + instruction_size;
-+    return extract_rs1(addi1) == extract_rd(lui) &&
-+           extract_rs1(addi1) == extract_rd(addi1) &&
-+           extract_rs1(slli1) == extract_rd(addi1) &&
-+           extract_rs1(slli1) == extract_rd(slli1) &&
-+           extract_rs1(addi2) == extract_rd(slli1) &&
-+           extract_rs1(addi2) == extract_rd(addi2) &&
-+           extract_rs1(slli2) == extract_rd(addi2) &&
-+           extract_rs1(slli2) == extract_rd(slli2) &&
-+           extract_rs1(last_instr) == extract_rd(slli2);
-+  }
++//----------REGISTER DEFINITION BLOCK------------------------------------------
++// This information is used by the matcher and the register allocator to
++// describe individual registers and classes of registers within the target
++// archtecture.
 +
-+  // the instruction sequence of li64 is as below:
-+  //     lui
-+  //     addi
-+  //     slli
-+  //     addi
-+  //     slli
-+  //     addi
-+  //     slli
-+  //     addi
-+  static bool check_li64_data_dependency(address instr) {
-+    address lui = instr;
-+    address addi1 = lui + instruction_size;
-+    address slli1 = addi1 + instruction_size;
-+    address addi2 = slli1 + instruction_size;
-+    address slli2 = addi2 + instruction_size;
-+    address addi3 = slli2 + instruction_size;
-+    address slli3 = addi3 + instruction_size;
-+    address addi4 = slli3 + instruction_size;
-+    return extract_rs1(addi1) == extract_rd(lui) &&
-+           extract_rs1(addi1) == extract_rd(addi1) &&
-+           extract_rs1(slli1) == extract_rd(addi1) &&
-+           extract_rs1(slli1) == extract_rd(slli1) &&
-+           extract_rs1(addi2) == extract_rd(slli1) &&
-+           extract_rs1(addi2) == extract_rd(addi2) &&
-+           extract_rs1(slli2) == extract_rd(addi2) &&
-+           extract_rs1(slli2) == extract_rd(slli2) &&
-+           extract_rs1(addi3) == extract_rd(slli2) &&
-+           extract_rs1(addi3) == extract_rd(addi3) &&
-+           extract_rs1(slli3) == extract_rd(addi3) &&
-+           extract_rs1(slli3) == extract_rd(slli3) &&
-+           extract_rs1(addi4) == extract_rd(slli3) &&
-+           extract_rs1(addi4) == extract_rd(addi4);
-+  }
++register %{
++//----------Architecture Description Register Definitions----------------------
++// General Registers
++// "reg_def"  name ( register save type, C convention save type,
++//                   ideal register type, encoding );
++// Register Save Types:
++//
++// NS  = No-Save:       The register allocator assumes that these registers
++//                      can be used without saving upon entry to the method, &
++//                      that they do not need to be saved at call sites.
++//
++// SOC = Save-On-Call:  The register allocator assumes that these registers
++//                      can be used without saving upon entry to the method,
++//                      but that they must be saved at call sites.
++//
++// SOE = Save-On-Entry: The register allocator assumes that these registers
++//                      must be saved before using them upon entry to the
++//                      method, but they do not need to be saved at call
++//                      sites.
++//
++// AS  = Always-Save:   The register allocator assumes that these registers
++//                      must be saved before using them upon entry to the
++//                      method, & that they must be saved at call sites.
++//
++// Ideal Register Type is used to determine how to save & restore a
++// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
++// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
++//
++// The encoding number is the actual bit-pattern placed into the opcodes.
 +
-+  // the instruction sequence of li32 is as below:
-+  //     lui
-+  //     addiw
-+  static bool check_li32_data_dependency(address instr) {
-+    address lui = instr;
-+    address addiw = lui + instruction_size;
++// We must define the 64 bit int registers in two 32 bit halves, the
++// real lower register and a virtual upper half register. upper halves
++// are used by the register allocator but are not actually supplied as
++// operands to memory ops.
++//
++// follow the C1 compiler in making registers
++//
++//   x7, x9-x17, x27-x31 volatile (caller save)
++//   x0-x4, x8, x23 system (no save, no allocate)
++//   x5-x6 non-allocatable (so we can use them as temporary regs)
 +
-+    return extract_rs1(addiw) == extract_rd(lui) &&
-+           extract_rs1(addiw) == extract_rd(addiw);
-+  }
++//
++// as regards Java usage. we don't use any callee save registers
++// because this makes it difficult to de-optimise a frame (see comment
++// in x86 implementation of Deoptimization::unwind_callee_save_values)
++//
 +
-+  // the instruction sequence of pc-relative is as below:
-+  //     auipc
-+  //     jalr/addi/load/float_load
-+  static bool check_pc_relative_data_dependency(address instr) {
-+    address auipc = instr;
-+    address last_instr = auipc + instruction_size;
++// General Registers
 +
-+    return extract_rs1(last_instr) == extract_rd(auipc);
-+  }
++reg_def R0      ( NS,  NS,  Op_RegI, 0,  x0->as_VMReg()         ); // zr
++reg_def R0_H    ( NS,  NS,  Op_RegI, 0,  x0->as_VMReg()->next() );
++reg_def R1      ( NS,  SOC, Op_RegI, 1,  x1->as_VMReg()         ); // ra
++reg_def R1_H    ( NS,  SOC, Op_RegI, 1,  x1->as_VMReg()->next() );
++reg_def R2      ( NS,  SOE, Op_RegI, 2,  x2->as_VMReg()         ); // sp
++reg_def R2_H    ( NS,  SOE, Op_RegI, 2,  x2->as_VMReg()->next() );
++reg_def R3      ( NS,  NS,  Op_RegI, 3,  x3->as_VMReg()         ); // gp
++reg_def R3_H    ( NS,  NS,  Op_RegI, 3,  x3->as_VMReg()->next() );
++reg_def R4      ( NS,  NS,  Op_RegI, 4,  x4->as_VMReg()         ); // tp
++reg_def R4_H    ( NS,  NS,  Op_RegI, 4,  x4->as_VMReg()->next() );
++reg_def R7      ( SOC, SOC, Op_RegI, 7,  x7->as_VMReg()         );
++reg_def R7_H    ( SOC, SOC, Op_RegI, 7,  x7->as_VMReg()->next() );
++reg_def R8      ( NS,  SOE, Op_RegI, 8,  x8->as_VMReg()         ); // fp
++reg_def R8_H    ( NS,  SOE, Op_RegI, 8,  x8->as_VMReg()->next() );
++reg_def R9      ( SOC, SOE, Op_RegI, 9,  x9->as_VMReg()         );
++reg_def R9_H    ( SOC, SOE, Op_RegI, 9,  x9->as_VMReg()->next() );
++reg_def R10     ( SOC, SOC, Op_RegI, 10, x10->as_VMReg()        );
++reg_def R10_H   ( SOC, SOC, Op_RegI, 10, x10->as_VMReg()->next());
++reg_def R11     ( SOC, SOC, Op_RegI, 11, x11->as_VMReg()        );
++reg_def R11_H   ( SOC, SOC, Op_RegI, 11, x11->as_VMReg()->next());
++reg_def R12     ( SOC, SOC, Op_RegI, 12, x12->as_VMReg()        );
++reg_def R12_H   ( SOC, SOC, Op_RegI, 12, x12->as_VMReg()->next());
++reg_def R13     ( SOC, SOC, Op_RegI, 13, x13->as_VMReg()        );
++reg_def R13_H   ( SOC, SOC, Op_RegI, 13, x13->as_VMReg()->next());
++reg_def R14     ( SOC, SOC, Op_RegI, 14, x14->as_VMReg()        );
++reg_def R14_H   ( SOC, SOC, Op_RegI, 14, x14->as_VMReg()->next());
++reg_def R15     ( SOC, SOC, Op_RegI, 15, x15->as_VMReg()        );
++reg_def R15_H   ( SOC, SOC, Op_RegI, 15, x15->as_VMReg()->next());
++reg_def R16     ( SOC, SOC, Op_RegI, 16, x16->as_VMReg()        );
++reg_def R16_H   ( SOC, SOC, Op_RegI, 16, x16->as_VMReg()->next());
++reg_def R17     ( SOC, SOC, Op_RegI, 17, x17->as_VMReg()        );
++reg_def R17_H   ( SOC, SOC, Op_RegI, 17, x17->as_VMReg()->next());
++reg_def R18     ( SOC, SOE, Op_RegI, 18, x18->as_VMReg()        );
++reg_def R18_H   ( SOC, SOE, Op_RegI, 18, x18->as_VMReg()->next());
++reg_def R19     ( SOC, SOE, Op_RegI, 19, x19->as_VMReg()        );
++reg_def R19_H   ( SOC, SOE, Op_RegI, 19, x19->as_VMReg()->next());
++reg_def R20     ( SOC, SOE, Op_RegI, 20, x20->as_VMReg()        ); // caller esp
++reg_def R20_H   ( SOC, SOE, Op_RegI, 20, x20->as_VMReg()->next());
++reg_def R21     ( SOC, SOE, Op_RegI, 21, x21->as_VMReg()        );
++reg_def R21_H   ( SOC, SOE, Op_RegI, 21, x21->as_VMReg()->next());
++reg_def R22     ( SOC, SOE, Op_RegI, 22, x22->as_VMReg()        );
++reg_def R22_H   ( SOC, SOE, Op_RegI, 22, x22->as_VMReg()->next());
++reg_def R23     ( NS,  SOE, Op_RegI, 23, x23->as_VMReg()        ); // java thread
++reg_def R23_H   ( NS,  SOE, Op_RegI, 23, x23->as_VMReg()->next());
++reg_def R24     ( SOC, SOE, Op_RegI, 24, x24->as_VMReg()        );
++reg_def R24_H   ( SOC, SOE, Op_RegI, 24, x24->as_VMReg()->next());
++reg_def R25     ( SOC, SOE, Op_RegI, 25, x25->as_VMReg()        );
++reg_def R25_H   ( SOC, SOE, Op_RegI, 25, x25->as_VMReg()->next());
++reg_def R26     ( SOC, SOE, Op_RegI, 26, x26->as_VMReg()        );
++reg_def R26_H   ( SOC, SOE, Op_RegI, 26, x26->as_VMReg()->next());
++reg_def R27     ( SOC, SOE, Op_RegI, 27, x27->as_VMReg()        ); // heapbase
++reg_def R27_H   ( SOC, SOE, Op_RegI, 27, x27->as_VMReg()->next());
++reg_def R28     ( SOC, SOC, Op_RegI, 28, x28->as_VMReg()        );
++reg_def R28_H   ( SOC, SOC, Op_RegI, 28, x28->as_VMReg()->next());
++reg_def R29     ( SOC, SOC, Op_RegI, 29, x29->as_VMReg()        );
++reg_def R29_H   ( SOC, SOC, Op_RegI, 29, x29->as_VMReg()->next());
++reg_def R30     ( SOC, SOC, Op_RegI, 30, x30->as_VMReg()        );
++reg_def R30_H   ( SOC, SOC, Op_RegI, 30, x30->as_VMReg()->next());
++reg_def R31     ( SOC, SOC, Op_RegI, 31, x31->as_VMReg()        );
++reg_def R31_H   ( SOC, SOC, Op_RegI, 31, x31->as_VMReg()->next());
 +
-+  // the instruction sequence of load_label is as below:
-+  //     auipc
-+  //     load
-+  static bool check_load_pc_relative_data_dependency(address instr) {
-+    address auipc = instr;
-+    address load = auipc + instruction_size;
++// ----------------------------
++// Float/Double Registers
++// ----------------------------
 +
-+    return extract_rd(load) == extract_rd(auipc) &&
-+           extract_rs1(load) == extract_rd(load);
-+  }
++// Double Registers
 +
-+  static bool is_movptr_at(address instr);
-+  static bool is_li32_at(address instr);
-+  static bool is_li64_at(address instr);
-+  static bool is_pc_relative_at(address branch);
-+  static bool is_load_pc_relative_at(address branch);
++// The rules of ADL require that double registers be defined in pairs.
++// Each pair must be two 32-bit values, but not necessarily a pair of
++// single float registers. In each pair, ADLC-assigned register numbers
++// must be adjacent, with the lower number even. Finally, when the
++// CPU stores such a register pair to memory, the word associated with
++// the lower ADLC-assigned number must be stored to the lower address.
 +
-+  static bool is_call_at(address instr) {
-+    if (is_jal_at(instr) || is_jalr_at(instr)) {
-+      return true;
-+    }
-+    return false;
-+  }
-+  static bool is_lwu_to_zr(address instr);
++// RISCV has 32 floating-point registers. Each can store a single
++// or double precision floating-point value.
 +
-+  inline bool is_nop();
-+  inline bool is_jump_or_nop();
-+  bool is_safepoint_poll();
-+  bool is_sigill_zombie_not_entrant();
-+  bool is_stop();
++// for Java use float registers f0-f31 are always save on call whereas
++// the platform ABI treats f8-f9 and f18-f27 as callee save). Other
++// float registers are SOC as per the platform spec
 +
-+ protected:
-+  address addr_at(int offset) const    { return address(this) + offset; }
++reg_def F0    ( SOC, SOC, Op_RegF,  0,  f0->as_VMReg()          );
++reg_def F0_H  ( SOC, SOC, Op_RegF,  0,  f0->as_VMReg()->next()  );
++reg_def F1    ( SOC, SOC, Op_RegF,  1,  f1->as_VMReg()          );
++reg_def F1_H  ( SOC, SOC, Op_RegF,  1,  f1->as_VMReg()->next()  );
++reg_def F2    ( SOC, SOC, Op_RegF,  2,  f2->as_VMReg()          );
++reg_def F2_H  ( SOC, SOC, Op_RegF,  2,  f2->as_VMReg()->next()  );
++reg_def F3    ( SOC, SOC, Op_RegF,  3,  f3->as_VMReg()          );
++reg_def F3_H  ( SOC, SOC, Op_RegF,  3,  f3->as_VMReg()->next()  );
++reg_def F4    ( SOC, SOC, Op_RegF,  4,  f4->as_VMReg()          );
++reg_def F4_H  ( SOC, SOC, Op_RegF,  4,  f4->as_VMReg()->next()  );
++reg_def F5    ( SOC, SOC, Op_RegF,  5,  f5->as_VMReg()          );
++reg_def F5_H  ( SOC, SOC, Op_RegF,  5,  f5->as_VMReg()->next()  );
++reg_def F6    ( SOC, SOC, Op_RegF,  6,  f6->as_VMReg()          );
++reg_def F6_H  ( SOC, SOC, Op_RegF,  6,  f6->as_VMReg()->next()  );
++reg_def F7    ( SOC, SOC, Op_RegF,  7,  f7->as_VMReg()          );
++reg_def F7_H  ( SOC, SOC, Op_RegF,  7,  f7->as_VMReg()->next()  );
++reg_def F8    ( SOC, SOE, Op_RegF,  8,  f8->as_VMReg()          );
++reg_def F8_H  ( SOC, SOE, Op_RegF,  8,  f8->as_VMReg()->next()  );
++reg_def F9    ( SOC, SOE, Op_RegF,  9,  f9->as_VMReg()          );
++reg_def F9_H  ( SOC, SOE, Op_RegF,  9,  f9->as_VMReg()->next()  );
++reg_def F10   ( SOC, SOC, Op_RegF,  10, f10->as_VMReg()         );
++reg_def F10_H ( SOC, SOC, Op_RegF,  10, f10->as_VMReg()->next() );
++reg_def F11   ( SOC, SOC, Op_RegF,  11, f11->as_VMReg()         );
++reg_def F11_H ( SOC, SOC, Op_RegF,  11, f11->as_VMReg()->next() );
++reg_def F12   ( SOC, SOC, Op_RegF,  12, f12->as_VMReg()         );
++reg_def F12_H ( SOC, SOC, Op_RegF,  12, f12->as_VMReg()->next() );
++reg_def F13   ( SOC, SOC, Op_RegF,  13, f13->as_VMReg()         );
++reg_def F13_H ( SOC, SOC, Op_RegF,  13, f13->as_VMReg()->next() );
++reg_def F14   ( SOC, SOC, Op_RegF,  14, f14->as_VMReg()         );
++reg_def F14_H ( SOC, SOC, Op_RegF,  14, f14->as_VMReg()->next() );
++reg_def F15   ( SOC, SOC, Op_RegF,  15, f15->as_VMReg()         );
++reg_def F15_H ( SOC, SOC, Op_RegF,  15, f15->as_VMReg()->next() );
++reg_def F16   ( SOC, SOC, Op_RegF,  16, f16->as_VMReg()         );
++reg_def F16_H ( SOC, SOC, Op_RegF,  16, f16->as_VMReg()->next() );
++reg_def F17   ( SOC, SOC, Op_RegF,  17, f17->as_VMReg()         );
++reg_def F17_H ( SOC, SOC, Op_RegF,  17, f17->as_VMReg()->next() );
++reg_def F18   ( SOC, SOE, Op_RegF,  18, f18->as_VMReg()         );
++reg_def F18_H ( SOC, SOE, Op_RegF,  18, f18->as_VMReg()->next() );
++reg_def F19   ( SOC, SOE, Op_RegF,  19, f19->as_VMReg()         );
++reg_def F19_H ( SOC, SOE, Op_RegF,  19, f19->as_VMReg()->next() );
++reg_def F20   ( SOC, SOE, Op_RegF,  20, f20->as_VMReg()         );
++reg_def F20_H ( SOC, SOE, Op_RegF,  20, f20->as_VMReg()->next() );
++reg_def F21   ( SOC, SOE, Op_RegF,  21, f21->as_VMReg()         );
++reg_def F21_H ( SOC, SOE, Op_RegF,  21, f21->as_VMReg()->next() );
++reg_def F22   ( SOC, SOE, Op_RegF,  22, f22->as_VMReg()         );
++reg_def F22_H ( SOC, SOE, Op_RegF,  22, f22->as_VMReg()->next() );
++reg_def F23   ( SOC, SOE, Op_RegF,  23, f23->as_VMReg()         );
++reg_def F23_H ( SOC, SOE, Op_RegF,  23, f23->as_VMReg()->next() );
++reg_def F24   ( SOC, SOE, Op_RegF,  24, f24->as_VMReg()         );
++reg_def F24_H ( SOC, SOE, Op_RegF,  24, f24->as_VMReg()->next() );
++reg_def F25   ( SOC, SOE, Op_RegF,  25, f25->as_VMReg()         );
++reg_def F25_H ( SOC, SOE, Op_RegF,  25, f25->as_VMReg()->next() );
++reg_def F26   ( SOC, SOE, Op_RegF,  26, f26->as_VMReg()         );
++reg_def F26_H ( SOC, SOE, Op_RegF,  26, f26->as_VMReg()->next() );
++reg_def F27   ( SOC, SOE, Op_RegF,  27, f27->as_VMReg()         );
++reg_def F27_H ( SOC, SOE, Op_RegF,  27, f27->as_VMReg()->next() );
++reg_def F28   ( SOC, SOC, Op_RegF,  28, f28->as_VMReg()         );
++reg_def F28_H ( SOC, SOC, Op_RegF,  28, f28->as_VMReg()->next() );
++reg_def F29   ( SOC, SOC, Op_RegF,  29, f29->as_VMReg()         );
++reg_def F29_H ( SOC, SOC, Op_RegF,  29, f29->as_VMReg()->next() );
++reg_def F30   ( SOC, SOC, Op_RegF,  30, f30->as_VMReg()         );
++reg_def F30_H ( SOC, SOC, Op_RegF,  30, f30->as_VMReg()->next() );
++reg_def F31   ( SOC, SOC, Op_RegF,  31, f31->as_VMReg()         );
++reg_def F31_H ( SOC, SOC, Op_RegF,  31, f31->as_VMReg()->next() );
 +
-+  jint int_at(int offset) const        { return *(jint*) addr_at(offset); }
-+  juint uint_at(int offset) const      { return *(juint*) addr_at(offset); }
++// ----------------------------
++// Special Registers
++// ----------------------------
 +
-+  address ptr_at(int offset) const     { return *(address*) addr_at(offset); }
++// On riscv, the physical flag register is missing, so we use t1 instead,
++// to bridge the RegFlag semantics in share/opto
 +
-+  oop  oop_at (int offset) const       { return *(oop*) addr_at(offset); }
++reg_def RFLAGS   (SOC, SOC, Op_RegFlags, 6, x6->as_VMReg()        );
 +
++// Specify priority of register selection within phases of register
++// allocation.  Highest priority is first.  A useful heuristic is to
++// give registers a low priority when they are required by machine
++// instructions, like EAX and EDX on I486, and choose no-save registers
++// before save-on-call, & save-on-call before save-on-entry.  Registers
++// which participate in fixed calling sequences should come last.
++// Registers which are used as pairs must fall on an even boundary.
 +
-+  void set_int_at(int offset, jint  i)        { *(jint*)addr_at(offset) = i; }
-+  void set_uint_at(int offset, jint  i)       { *(juint*)addr_at(offset) = i; }
-+  void set_ptr_at (int offset, address  ptr)  { *(address*) addr_at(offset) = ptr; }
-+  void set_oop_at (int offset, oop  o)        { *(oop*) addr_at(offset) = o; }
++alloc_class chunk0(
++    // volatiles
++    R7,  R7_H,
++    R28, R28_H,
++    R29, R29_H,
++    R30, R30_H,
++    R31, R31_H,
 +
-+ public:
++    // arg registers
++    R10, R10_H,
++    R11, R11_H,
++    R12, R12_H,
++    R13, R13_H,
++    R14, R14_H,
++    R15, R15_H,
++    R16, R16_H,
++    R17, R17_H,
 +
-+  inline friend NativeInstruction* nativeInstruction_at(address addr);
++    // non-volatiles
++    R9,  R9_H,
++    R18, R18_H,
++    R19, R19_H,
++    R20, R20_H,
++    R21, R21_H,
++    R22, R22_H,
++    R24, R24_H,
++    R25, R25_H,
++    R26, R26_H,
 +
-+  static bool maybe_cpool_ref(address instr) {
-+    return is_auipc_at(instr);
-+  }
++    // non-allocatable registers
++    R23, R23_H, // java thread
++    R27, R27_H, // heapbase
++    R4,  R4_H,  // thread
++    R8,  R8_H,  // fp
++    R0,  R0_H,  // zero
++    R1,  R1_H,  // ra
++    R2,  R2_H,  // sp
++    R3,  R3_H,  // gp
++);
 +
-+  bool is_membar() {
-+    return (uint_at(0) & 0x7f) == 0b1111 && extract_funct3(addr_at(0)) == 0;
-+  }
-+};
++alloc_class chunk1(
 +
-+inline NativeInstruction* nativeInstruction_at(address addr) {
-+  return (NativeInstruction*)addr;
-+}
++    // no save
++    F0,  F0_H,
++    F1,  F1_H,
++    F2,  F2_H,
++    F3,  F3_H,
++    F4,  F4_H,
++    F5,  F5_H,
++    F6,  F6_H,
++    F7,  F7_H,
++    F28, F28_H,
++    F29, F29_H,
++    F30, F30_H,
++    F31, F31_H,
 +
-+// The natural type of an RISCV instruction is uint32_t
-+inline NativeInstruction* nativeInstruction_at(uint32_t *addr) {
-+  return (NativeInstruction*)addr;
-+}
++    // arg registers
++    F10, F10_H,
++    F11, F11_H,
++    F12, F12_H,
++    F13, F13_H,
++    F14, F14_H,
++    F15, F15_H,
++    F16, F16_H,
++    F17, F17_H,
 +
-+inline NativeCall* nativeCall_at(address addr);
-+// The NativeCall is an abstraction for accessing/manipulating native
-+// call instructions (used to manipulate inline caches, primitive &
-+// DSO calls, etc.).
++    // non-volatiles
++    F8,  F8_H,
++    F9,  F9_H,
++    F18, F18_H,
++    F19, F19_H,
++    F20, F20_H,
++    F21, F21_H,
++    F22, F22_H,
++    F23, F23_H,
++    F24, F24_H,
++    F25, F25_H,
++    F26, F26_H,
++    F27, F27_H,
++);
 +
-+class NativeCall: public NativeInstruction {
-+ public:
-+  enum RISCV_specific_constants {
-+    instruction_size            =    4,
-+    instruction_offset          =    0,
-+    displacement_offset         =    0,
-+    return_address_offset       =    4
-+  };
++alloc_class chunk2(RFLAGS);
 +
-+  address instruction_address() const       { return addr_at(instruction_offset); }
-+  address next_instruction_address() const  { return addr_at(return_address_offset); }
-+  address return_address() const            { return addr_at(return_address_offset); }
-+  address destination() const;
++//----------Architecture Description Register Classes--------------------------
++// Several register classes are automatically defined based upon information in
++// this architecture description.
++// 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
++// 2) reg_class compiler_method_reg        ( /* as def'd in frame section */ )
++// 2) reg_class interpreter_method_reg     ( /* as def'd in frame section */ )
++// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
++//
 +
-+  void set_destination(address dest) {
-+    assert(is_jal(), "Should be jal instruction!");
-+    intptr_t offset = (intptr_t)(dest - instruction_address());
-+    assert((offset & 0x1) == 0, "bad alignment");
-+    assert(is_imm_in_range(offset, 20, 1), "encoding constraint");
-+    unsigned int insn = 0b1101111; // jal
-+    address pInsn = (address)(&insn);
-+    Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1);
-+    Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff);
-+    Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1);
-+    Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff);
-+    Assembler::patch(pInsn, 11, 7, ra->encoding()); // Rd must be x1, need ra
-+    set_int_at(displacement_offset, insn);
-+  }
++// Class for all 32 bit general purpose registers
++reg_class all_reg32(
++    R0,
++    R1,
++    R2,
++    R3,
++    R4,
++    R7,
++    R8,
++    R9,
++    R10,
++    R11,
++    R12,
++    R13,
++    R14,
++    R15,
++    R16,
++    R17,
++    R18,
++    R19,
++    R20,
++    R21,
++    R22,
++    R23,
++    R24,
++    R25,
++    R26,
++    R27,
++    R28,
++    R29,
++    R30,
++    R31
++);
 +
-+  void verify_alignment() {} // do nothing on riscv
-+  void verify();
-+  void print();
++// Class for any 32 bit integer registers (excluding zr)
++reg_class any_reg32 %{
++  return _ANY_REG32_mask;
++%}
 +
-+  // Creation
-+  inline friend NativeCall* nativeCall_at(address addr);
-+  inline friend NativeCall* nativeCall_before(address return_address);
++// Singleton class for R10 int register
++reg_class int_r10_reg(R10);
 +
-+  static bool is_call_before(address return_address) {
-+    return is_call_at(return_address - NativeCall::return_address_offset);
-+  }
++// Singleton class for R12 int register
++reg_class int_r12_reg(R12);
 +
-+  // MT-safe patching of a call instruction.
-+  static void insert(address code_pos, address entry);
++// Singleton class for R13 int register
++reg_class int_r13_reg(R13);
 +
-+  static void replace_mt_safe(address instr_addr, address code_buffer);
++// Singleton class for R14 int register
++reg_class int_r14_reg(R14);
 +
-+  // Similar to replace_mt_safe, but just changes the destination.  The
-+  // important thing is that free-running threads are able to execute
-+  // this call instruction at all times.  If the call is an immediate BL
-+  // instruction we can simply rely on atomicity of 32-bit writes to
-+  // make sure other threads will see no intermediate states.
++// Class for all long integer registers
++reg_class all_reg(
++    R0,  R0_H,
++    R1,  R1_H,
++    R2,  R2_H,
++    R3,  R3_H,
++    R4,  R4_H,
++    R7,  R7_H,
++    R8,  R8_H,
++    R9,  R9_H,
++    R10, R10_H,
++    R11, R11_H,
++    R12, R12_H,
++    R13, R13_H,
++    R14, R14_H,
++    R15, R15_H,
++    R16, R16_H,
++    R17, R17_H,
++    R18, R18_H,
++    R19, R19_H,
++    R20, R20_H,
++    R21, R21_H,
++    R22, R22_H,
++    R23, R23_H,
++    R24, R24_H,
++    R25, R25_H,
++    R26, R26_H,
++    R27, R27_H,
++    R28, R28_H,
++    R29, R29_H,
++    R30, R30_H,
++    R31, R31_H
++);
 +
-+  // We cannot rely on locks here, since the free-running threads must run at
-+  // full speed.
-+  //
-+  // Used in the runtime linkage of calls; see class CompiledIC.
-+  // (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.)
++// Class for all long integer registers (excluding zr)
++reg_class any_reg %{
++  return _ANY_REG_mask;
++%}
 +
-+  // The parameter assert_lock disables the assertion during code generation.
-+  void set_destination_mt_safe(address dest, bool assert_lock = true);
++// Class for non-allocatable 32 bit registers
++reg_class non_allocatable_reg32(
++    R0,                       // zr
++    R1,                       // ra
++    R2,                       // sp
++    R3,                       // gp
++    R4,                       // tp
++    R23                       // java thread
++);
 +
-+  address get_trampoline();
-+};
++// Class for non-allocatable 64 bit registers
++reg_class non_allocatable_reg(
++    R0,  R0_H,                // zr
++    R1,  R1_H,                // ra
++    R2,  R2_H,                // sp
++    R3,  R3_H,                // gp
++    R4,  R4_H,                // tp
++    R23, R23_H                // java thread
++);
 +
-+inline NativeCall* nativeCall_at(address addr) {
-+  assert_cond(addr != NULL);
-+  NativeCall* call = (NativeCall*)(addr - NativeCall::instruction_offset);
-+#ifdef ASSERT
-+  call->verify();
-+#endif
-+  return call;
-+}
++reg_class no_special_reg32 %{
++  return _NO_SPECIAL_REG32_mask;
++%}
 +
-+inline NativeCall* nativeCall_before(address return_address) {
-+  assert_cond(return_address != NULL);
-+  NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset);
-+#ifdef ASSERT
-+  call->verify();
-+#endif
-+  return call;
-+}
++reg_class no_special_reg %{
++  return _NO_SPECIAL_REG_mask;
++%}
 +
-+// An interface for accessing/manipulating native mov reg, imm instructions.
-+// (used to manipulate inlined 64-bit data calls, etc.)
-+class NativeMovConstReg: public NativeInstruction {
-+ public:
-+  enum RISCV_specific_constants {
-+    movptr_instruction_size             =    6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, addi.  See movptr().
-+    movptr_with_offset_instruction_size =    5 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli. See movptr_with_offset().
-+    load_pc_relative_instruction_size   =    2 * NativeInstruction::instruction_size, // auipc, ld
-+    instruction_offset                  =    0,
-+    displacement_offset                 =    0
-+  };
++reg_class ptr_reg %{
++  return _PTR_REG_mask;
++%}
 +
-+  address instruction_address() const       { return addr_at(instruction_offset); }
-+  address next_instruction_address() const  {
-+    // if the instruction at 5 * instruction_size is addi,
-+    // it means a lui + addi + slli + addi + slli + addi instruction sequence,
-+    // and the next instruction address should be addr_at(6 * instruction_size).
-+    // However, when the instruction at 5 * instruction_size isn't addi,
-+    // the next instruction address should be addr_at(5 * instruction_size)
-+    if (nativeInstruction_at(instruction_address())->is_movptr()) {
-+      if (is_addi_at(addr_at(movptr_with_offset_instruction_size))) {
-+        // Assume: lui, addi, slli, addi, slli, addi
-+        return addr_at(movptr_instruction_size);
-+      } else {
-+        // Assume: lui, addi, slli, addi, slli
-+        return addr_at(movptr_with_offset_instruction_size);
-+      }
-+    } else if (is_load_pc_relative_at(instruction_address())) {
-+      // Assume: auipc, ld
-+      return addr_at(load_pc_relative_instruction_size);
-+    }
-+    guarantee(false, "Unknown instruction in NativeMovConstReg");
-+    return NULL;
-+  }
++reg_class no_special_ptr_reg %{
++  return _NO_SPECIAL_PTR_REG_mask;
++%}
 +
-+  intptr_t data() const;
-+  void  set_data(intptr_t x);
++// Class for 64 bit register r10
++reg_class r10_reg(
++    R10, R10_H
++);
 +
-+  void flush() {
-+    if (!maybe_cpool_ref(instruction_address())) {
-+      ICache::invalidate_range(instruction_address(), movptr_instruction_size);
-+    }
-+  }
++// Class for 64 bit register r11
++reg_class r11_reg(
++    R11, R11_H
++);
 +
-+  void  verify();
-+  void  print();
++// Class for 64 bit register r12
++reg_class r12_reg(
++    R12, R12_H
++);
 +
-+  // Creation
-+  inline friend NativeMovConstReg* nativeMovConstReg_at(address addr);
-+  inline friend NativeMovConstReg* nativeMovConstReg_before(address addr);
-+};
++// Class for 64 bit register r13
++reg_class r13_reg(
++    R13, R13_H
++);
 +
-+inline NativeMovConstReg* nativeMovConstReg_at(address addr) {
-+  assert_cond(addr != NULL);
-+  NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_offset);
-+#ifdef ASSERT
-+  test->verify();
-+#endif
-+  return test;
-+}
++// Class for 64 bit register r14
++reg_class r14_reg(
++    R14, R14_H
++);
 +
-+inline NativeMovConstReg* nativeMovConstReg_before(address addr) {
-+  assert_cond(addr != NULL);
-+  NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset);
-+#ifdef ASSERT
-+  test->verify();
-+#endif
-+  return test;
-+}
++// Class for 64 bit register r15
++reg_class r15_reg(
++    R15, R15_H
++);
 +
-+// RISCV should not use C1 runtime patching, so just leave NativeMovRegMem Unimplemented.
-+class NativeMovRegMem: public NativeInstruction {
-+ public:
-+  int instruction_start() const {
-+    Unimplemented();
-+    return 0;
-+  }
++// Class for 64 bit register r16
++reg_class r16_reg(
++    R16, R16_H
++);
 +
-+  address instruction_address() const {
-+    Unimplemented();
-+    return NULL;
-+  }
++// Class for method register
++reg_class method_reg(
++    R31, R31_H
++);
 +
-+  int num_bytes_to_end_of_patch() const {
-+    Unimplemented();
-+    return 0;
-+  }
++// Class for heapbase register
++reg_class heapbase_reg(
++    R27, R27_H
++);
 +
-+  int offset() const;
++// Class for java thread register
++reg_class java_thread_reg(
++    R23, R23_H
++);
 +
-+  void set_offset(int x);
++reg_class r28_reg(
++    R28, R28_H
++);
 +
-+  void add_offset_in_bytes(int add_offset) { Unimplemented(); }
++reg_class r29_reg(
++    R29, R29_H
++);
 +
-+  void verify();
-+  void print();
++reg_class r30_reg(
++    R30, R30_H
++);
 +
-+ private:
-+  inline friend NativeMovRegMem* nativeMovRegMem_at (address addr);
-+};
++// Class for zero registesr
++reg_class zr_reg(
++    R0, R0_H
++);
 +
-+inline NativeMovRegMem* nativeMovRegMem_at (address addr) {
-+  Unimplemented();
-+  return NULL;
-+}
++// Class for thread register
++reg_class thread_reg(
++    R4, R4_H
++);
 +
-+class NativeJump: public NativeInstruction {
-+ public:
-+  enum RISCV_specific_constants {
-+    instruction_size            =    NativeInstruction::instruction_size,
-+    instruction_offset          =    0,
-+    data_offset                 =    0,
-+    next_instruction_offset     =    NativeInstruction::instruction_size
-+  };
++// Class for frame pointer register
++reg_class fp_reg(
++    R8, R8_H
++);
 +
-+  address instruction_address() const       { return addr_at(instruction_offset); }
-+  address next_instruction_address() const  { return addr_at(instruction_size); }
-+  address jump_destination() const;
-+  void set_jump_destination(address dest);
++// Class for link register
++reg_class ra_reg(
++    R1, R1_H
++);
 +
-+  // Creation
-+  inline friend NativeJump* nativeJump_at(address address);
++// Class for long sp register
++reg_class sp_reg(
++    R2, R2_H
++);
 +
-+  void verify();
++// Class for all float registers
++reg_class float_reg(
++    F0,
++    F1,
++    F2,
++    F3,
++    F4,
++    F5,
++    F6,
++    F7,
++    F8,
++    F9,
++    F10,
++    F11,
++    F12,
++    F13,
++    F14,
++    F15,
++    F16,
++    F17,
++    F18,
++    F19,
++    F20,
++    F21,
++    F22,
++    F23,
++    F24,
++    F25,
++    F26,
++    F27,
++    F28,
++    F29,
++    F30,
++    F31
++);
 +
-+  // Insertion of native jump instruction
-+  static void insert(address code_pos, address entry);
-+  // MT-safe insertion of native jump at verified method entry
-+  static void check_verified_entry_alignment(address entry, address verified_entry);
-+  static void patch_verified_entry(address entry, address verified_entry, address dest);
-+};
++// Double precision float registers have virtual `high halves' that
++// are needed by the allocator.
++// Class for all double registers
++reg_class double_reg(
++    F0,  F0_H,
++    F1,  F1_H,
++    F2,  F2_H,
++    F3,  F3_H,
++    F4,  F4_H,
++    F5,  F5_H,
++    F6,  F6_H,
++    F7,  F7_H,
++    F8,  F8_H,
++    F9,  F9_H,
++    F10, F10_H,
++    F11, F11_H,
++    F12, F12_H,
++    F13, F13_H,
++    F14, F14_H,
++    F15, F15_H,
++    F16, F16_H,
++    F17, F17_H,
++    F18, F18_H,
++    F19, F19_H,
++    F20, F20_H,
++    F21, F21_H,
++    F22, F22_H,
++    F23, F23_H,
++    F24, F24_H,
++    F25, F25_H,
++    F26, F26_H,
++    F27, F27_H,
++    F28, F28_H,
++    F29, F29_H,
++    F30, F30_H,
++    F31, F31_H
++);
 +
-+inline NativeJump* nativeJump_at(address addr) {
-+  NativeJump* jump = (NativeJump*)(addr - NativeJump::instruction_offset);
-+#ifdef ASSERT
-+  jump->verify();
-+#endif
-+  return jump;
-+}
++// Class for 64 bit register f0
++reg_class f0_reg(
++    F0, F0_H
++);
 +
-+class NativeGeneralJump: public NativeJump {
-+public:
-+  enum RISCV_specific_constants {
-+    instruction_size            =    6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, jalr
-+    instruction_offset          =    0,
-+    data_offset                 =    0,
-+    next_instruction_offset     =    6 * NativeInstruction::instruction_size  // lui, addi, slli, addi, slli, jalr
-+  };
++// Class for 64 bit register f1
++reg_class f1_reg(
++    F1, F1_H
++);
 +
-+  address jump_destination() const;
++// Class for 64 bit register f2
++reg_class f2_reg(
++    F2, F2_H
++);
 +
-+  static void insert_unconditional(address code_pos, address entry);
-+  static void replace_mt_safe(address instr_addr, address code_buffer);
-+};
++// Class for 64 bit register f3
++reg_class f3_reg(
++    F3, F3_H
++);
 +
-+inline NativeGeneralJump* nativeGeneralJump_at(address addr) {
-+  assert_cond(addr != NULL);
-+  NativeGeneralJump* jump = (NativeGeneralJump*)(addr);
-+  debug_only(jump->verify();)
-+  return jump;
-+}
++// class for condition codes
++reg_class reg_flags(RFLAGS);
++%}
 +
-+class NativeIllegalInstruction: public NativeInstruction {
-+ public:
-+  // Insert illegal opcode as specific address
-+  static void insert(address code_pos);
-+};
++//----------DEFINITION BLOCK---------------------------------------------------
++// Define name --> value mappings to inform the ADLC of an integer valued name
++// Current support includes integer values in the range [0, 0x7FFFFFFF]
++// Format:
++//        int_def  <name>         ( <int_value>, <expression>);
++// Generated Code in ad_<arch>.hpp
++//        #define  <name>   (<expression>)
++//        // value == <int_value>
++// Generated code in ad_<arch>.cpp adlc_verification()
++//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
++//
 +
-+inline bool NativeInstruction::is_nop()         {
-+  uint32_t insn = *(uint32_t*)addr_at(0);
-+  return insn == 0x13;
-+}
++// we follow the ppc-aix port in using a simple cost model which ranks
++// register operations as cheap, memory ops as more expensive and
++// branches as most expensive. the first two have a low as well as a
++// normal cost. huge cost appears to be a way of saying don't do
++// something
 +
-+inline bool NativeInstruction::is_jump_or_nop() {
-+  return is_nop() || is_jump();
-+}
++definitions %{
++  // The default cost (of a register move instruction).
++  int_def DEFAULT_COST         (  100,               100);
++  int_def ALU_COST             (  100,  1 * DEFAULT_COST);          // unknown, const, arith, shift, slt,
++                                                                    // multi, auipc, nop, logical, move
++  int_def LOAD_COST            (  300,  3 * DEFAULT_COST);          // load, fpload
++  int_def STORE_COST           (  100,  1 * DEFAULT_COST);          // store, fpstore
++  int_def XFER_COST            (  300,  3 * DEFAULT_COST);          // mfc, mtc, fcvt, fmove, fcmp
++  int_def BRANCH_COST          (  200,  2 * DEFAULT_COST);          // branch, jmp, call
++  int_def IMUL_COST            ( 1000, 10 * DEFAULT_COST);          // imul
++  int_def IDIVSI_COST          ( 3400, 34 * DEFAULT_COST);          // idivdi
++  int_def IDIVDI_COST          ( 6600, 66 * DEFAULT_COST);          // idivsi
++  int_def FMUL_SINGLE_COST     (  500,  5 * DEFAULT_COST);          // fadd, fmul, fmadd
++  int_def FMUL_DOUBLE_COST     (  700,  7 * DEFAULT_COST);          // fadd, fmul, fmadd
++  int_def FDIV_COST            ( 2000, 20 * DEFAULT_COST);          // fdiv
++  int_def FSQRT_COST           ( 2500, 25 * DEFAULT_COST);          // fsqrt
++  int_def VOLATILE_REF_COST    ( 1000, 10 * DEFAULT_COST);
++%}
 +
-+// Call trampoline stubs.
-+class NativeCallTrampolineStub : public NativeInstruction {
-+ public:
 +
-+  enum RISCV_specific_constants {
-+    // Refer to function emit_trampoline_stub.
-+    instruction_size = 3 * NativeInstruction::instruction_size + wordSize, // auipc + ld + jr + target address
-+    data_offset      = 3 * NativeInstruction::instruction_size,            // auipc + ld + jr
-+  };
 +
-+  address destination(nmethod *nm = NULL) const;
-+  void set_destination(address new_destination);
-+  ptrdiff_t destination_offset() const;
-+};
++//----------SOURCE BLOCK-------------------------------------------------------
++// This is a block of C++ code which provides values, functions, and
++// definitions necessary in the rest of the architecture description
 +
-+inline bool is_NativeCallTrampolineStub_at(address addr) {
-+  // Ensure that the stub is exactly
-+  //      ld   t0, L--->auipc + ld
-+  //      jr   t0
-+  // L:
++source_hpp %{
 +
-+  // judge inst + register + imm
-+  // 1). check the instructions: auipc + ld + jalr
-+  // 2). check if auipc[11:7] == t0 and ld[11:7] == t0 and ld[19:15] == t0 && jr[19:15] == t0
-+  // 3). check if the offset in ld[31:20] equals the data_offset
-+  assert_cond(addr != NULL);
-+  const int instr_size = NativeInstruction::instruction_size;
-+  if (NativeInstruction::is_auipc_at(addr) &&
-+      NativeInstruction::is_ld_at(addr + instr_size) &&
-+      NativeInstruction::is_jalr_at(addr + 2 * instr_size) &&
-+      (NativeInstruction::extract_rd(addr)                    == x5) &&
-+      (NativeInstruction::extract_rd(addr + instr_size)       == x5) &&
-+      (NativeInstruction::extract_rs1(addr + instr_size)      == x5) &&
-+      (NativeInstruction::extract_rs1(addr + 2 * instr_size)  == x5) &&
-+      (Assembler::extract(((unsigned*)addr)[1], 31, 20) == NativeCallTrampolineStub::data_offset)) {
-+    return true;
-+  }
-+  return false;
-+}
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/cardTable.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "opto/addnode.hpp"
++#include "opto/convertnode.hpp"
 +
-+inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) {
-+  assert_cond(addr != NULL);
-+  assert(is_NativeCallTrampolineStub_at(addr), "no call trampoline found");
-+  return (NativeCallTrampolineStub*)addr;
-+}
++extern RegMask _ANY_REG32_mask;
++extern RegMask _ANY_REG_mask;
++extern RegMask _PTR_REG_mask;
++extern RegMask _NO_SPECIAL_REG32_mask;
++extern RegMask _NO_SPECIAL_REG_mask;
++extern RegMask _NO_SPECIAL_PTR_REG_mask;
 +
-+class NativeMembar : public NativeInstruction {
-+public:
-+  uint32_t get_kind();
-+  void set_kind(uint32_t order_kind);
-+};
++class CallStubImpl {
 +
-+inline NativeMembar *NativeMembar_at(address addr) {
-+  assert_cond(addr != NULL);
-+  assert(nativeInstruction_at(addr)->is_membar(), "no membar found");
-+  return (NativeMembar*)addr;
-+}
++  //--------------------------------------------------------------
++  //---<  Used for optimization in Compile::shorten_branches  >---
++  //--------------------------------------------------------------
 +
-+class NativeFenceI : public NativeInstruction {
-+public:
-+  static inline int instruction_size() {
-+    // 2 for fence.i + fence
-+    return (UseConservativeFence ? 2 : 1) * NativeInstruction::instruction_size;
++ public:
++  // Size of call trampoline stub.
++  static uint size_call_trampoline() {
++    return 0; // no call trampolines on this platform
++  }
++
++  // number of relocations needed by a call trampoline stub
++  static uint reloc_call_trampoline() {
++    return 0; // no call trampolines on this platform
 +  }
 +};
 +
-+#endif // CPU_RISCV_NATIVEINST_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.cpp b/src/hotspot/cpu/riscv/registerMap_riscv.cpp
-new file mode 100644
-index 00000000000..26c1edc36ff
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/registerMap_riscv.cpp
-@@ -0,0 +1,45 @@
-+/*
-+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++class HandlerImpl {
 +
-+#include "precompiled.hpp"
-+#include "runtime/registerMap.hpp"
-+#include "vmreg_riscv.inline.hpp"
++ public:
 +
-+address RegisterMap::pd_location(VMReg base_reg, int slot_idx) const {
-+  if (base_reg->is_VectorRegister()) {
-+    assert(base_reg->is_concrete(), "must pass base reg");
-+    int base_reg_enc = (base_reg->value() - ConcreteRegisterImpl::max_fpr) /
-+                       VectorRegisterImpl::max_slots_per_register;
-+    intptr_t offset_in_bytes = slot_idx * VMRegImpl::stack_slot_size;
-+    address base_location = location(base_reg);
-+    if (base_location != NULL) {
-+      return base_location + offset_in_bytes;
-+    } else {
-+      return NULL;
-+    }
-+  } else {
-+    return location(base_reg->next(slot_idx));
++  static int emit_exception_handler(CodeBuffer &cbuf);
++  static int emit_deopt_handler(CodeBuffer& cbuf);
++
++  static uint size_exception_handler() {
++    return MacroAssembler::far_branch_size();
 +  }
-+}
-diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.hpp b/src/hotspot/cpu/riscv/registerMap_riscv.hpp
-new file mode 100644
-index 00000000000..f34349811a9
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/registerMap_riscv.hpp
-@@ -0,0 +1,43 @@
-+/*
-+ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
 +
-+#ifndef CPU_RISCV_REGISTERMAP_RISCV_HPP
-+#define CPU_RISCV_REGISTERMAP_RISCV_HPP
++  static uint size_deopt_handler() {
++    // count auipc + far branch
++    return NativeInstruction::instruction_size + MacroAssembler::far_branch_size();
++  }
++};
 +
-+// machine-dependent implemention for register maps
-+  friend class frame;
++bool is_CAS(int opcode, bool maybe_volatile);
 +
-+ private:
-+  // This is the hook for finding a register in an "well-known" location,
-+  // such as a register block of a predetermined format.
-+  address pd_location(VMReg reg) const { return NULL; }
-+  address pd_location(VMReg base_reg, int slot_idx) const;
++// predicate controlling translation of CompareAndSwapX
++bool needs_acquiring_load_reserved(const Node *load);
 +
-+  // no PD state to clear or copy:
-+  void pd_clear() {}
-+  void pd_initialize() {}
-+  void pd_initialize_from(const RegisterMap* map) {}
++// predicate controlling translation of StoreCM
++bool unnecessary_storestore(const Node *storecm);
 +
-+#endif // CPU_RISCV_REGISTERMAP_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp
-new file mode 100644
-index 00000000000..f8116e9df8c
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/register_riscv.cpp
-@@ -0,0 +1,73 @@
-+/*
-+ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++// predicate controlling addressing modes
++bool size_fits_all_mem_uses(AddPNode* addp, int shift);
++%}
 +
-+#include "precompiled.hpp"
-+#include "register_riscv.hpp"
++source %{
 +
-+REGISTER_IMPL_DEFINITION(Register, RegisterImpl, RegisterImpl::number_of_registers);
-+REGISTER_IMPL_DEFINITION(FloatRegister, FloatRegisterImpl, FloatRegisterImpl::number_of_registers);
-+REGISTER_IMPL_DEFINITION(VectorRegister, VectorRegisterImpl, VectorRegisterImpl::number_of_registers);
++// Derived RegMask with conditionally allocatable registers
 +
-+const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers *
-+                                          RegisterImpl::max_slots_per_register;
++RegMask _ANY_REG32_mask;
++RegMask _ANY_REG_mask;
++RegMask _PTR_REG_mask;
++RegMask _NO_SPECIAL_REG32_mask;
++RegMask _NO_SPECIAL_REG_mask;
++RegMask _NO_SPECIAL_PTR_REG_mask;
 +
-+const int ConcreteRegisterImpl::max_fpr =
-+    ConcreteRegisterImpl::max_gpr +
-+    FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register;
++void reg_mask_init() {
 +
-+const int ConcreteRegisterImpl::max_vpr =
-+    ConcreteRegisterImpl::max_fpr +
-+    VectorRegisterImpl::number_of_registers * VectorRegisterImpl::max_slots_per_register;
++  _ANY_REG32_mask = _ALL_REG32_mask;
++  _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(x0->as_VMReg()));
 +
++  _ANY_REG_mask = _ALL_REG_mask;
++  _ANY_REG_mask.SUBTRACT(_ZR_REG_mask);
 +
-+const char* RegisterImpl::name() const {
-+  static const char *const names[number_of_registers] = {
-+    "zr", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "fp", "x9",
-+    "c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7",
-+    "x18", "x19", "esp", "xdispatch", "xbcp", "xthread", "xlocals",
-+    "xmonitors", "xcpool", "xheapbase", "x28", "x29", "x30", "xmethod"
-+  };
-+  return is_valid() ? names[encoding()] : "noreg";
-+}
++  _PTR_REG_mask = _ALL_REG_mask;
++  _PTR_REG_mask.SUBTRACT(_ZR_REG_mask);
 +
-+const char* FloatRegisterImpl::name() const {
-+  static const char *const names[number_of_registers] = {
-+    "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",
-+    "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
-+    "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
-+    "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"
-+  };
-+  return is_valid() ? names[encoding()] : "noreg";
-+}
++  _NO_SPECIAL_REG32_mask = _ALL_REG32_mask;
++  _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask);
 +
-+const char* VectorRegisterImpl::name() const {
-+  static const char *const names[number_of_registers] = {
-+    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
-+    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
-+    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
-+    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"
-+  };
-+  return is_valid() ? names[encoding()] : "noreg";
-+}
-diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp
-new file mode 100644
-index 00000000000..a9200cac647
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/register_riscv.hpp
-@@ -0,0 +1,324 @@
-+/*
-+ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  _NO_SPECIAL_REG_mask = _ALL_REG_mask;
++  _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
 +
-+#ifndef CPU_RISCV_REGISTER_RISCV_HPP
-+#define CPU_RISCV_REGISTER_RISCV_HPP
++  _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask;
++  _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
 +
-+#include "asm/register.hpp"
++  // x27 is not allocatable when compressed oops is on
++  if (UseCompressedOops) {
++    _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x27->as_VMReg()));
++    _NO_SPECIAL_REG_mask.SUBTRACT(_HEAPBASE_REG_mask);
++    _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_HEAPBASE_REG_mask);
++  }
 +
-+#define CSR_FFLAGS   0x001        // Floating-Point Accrued Exceptions.
-+#define CSR_FRM      0x002        // Floating-Point Dynamic Rounding Mode.
-+#define CSR_FCSR     0x003        // Floating-Point Control and Status Register (frm + fflags).
-+#define CSR_VSTART   0x008        // Vector start position
-+#define CSR_VXSAT    0x009        // Fixed-Point Saturate Flag
-+#define CSR_VXRM     0x00A        // Fixed-Point Rounding Mode
-+#define CSR_VCSR     0x00F        // Vector control and status register
-+#define CSR_VL       0xC20        // Vector length
-+#define CSR_VTYPE    0xC21        // Vector data type register
-+#define CSR_VLENB    0xC22        // VLEN/8 (vector register length in bytes)
-+#define CSR_CYCLE    0xc00        // Cycle counter for RDCYCLE instruction.
-+#define CSR_TIME     0xc01        // Timer for RDTIME instruction.
-+#define CSR_INSTERT  0xc02        // Instructions-retired counter for RDINSTRET instruction.
++  // x8 is not allocatable when PreserveFramePointer is on
++  if (PreserveFramePointer) {
++    _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x8->as_VMReg()));
++    _NO_SPECIAL_REG_mask.SUBTRACT(_FP_REG_mask);
++    _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_FP_REG_mask);
++  }
++}
 +
-+class VMRegImpl;
-+typedef VMRegImpl* VMReg;
++// is_CAS(int opcode, bool maybe_volatile)
++//
++// return true if opcode is one of the possible CompareAndSwapX
++// values otherwise false.
++bool is_CAS(int opcode, bool maybe_volatile)
++{
++  switch (opcode) {
++    // We handle these
++    case Op_CompareAndSwapI:
++    case Op_CompareAndSwapL:
++    case Op_CompareAndSwapP:
++    case Op_CompareAndSwapN:
++#if INCLUDE_SHENANDOAHGC
++    case Op_ShenandoahCompareAndSwapP:
++    case Op_ShenandoahCompareAndSwapN:
++#endif
++    case Op_CompareAndSwapB:
++    case Op_CompareAndSwapS:
++    case Op_GetAndSetI:
++    case Op_GetAndSetL:
++    case Op_GetAndSetP:
++    case Op_GetAndSetN:
++    case Op_GetAndAddI:
++    case Op_GetAndAddL:
++      return true;
++    case Op_CompareAndExchangeI:
++    case Op_CompareAndExchangeN:
++    case Op_CompareAndExchangeB:
++    case Op_CompareAndExchangeS:
++    case Op_CompareAndExchangeL:
++    case Op_CompareAndExchangeP:
++    case Op_WeakCompareAndSwapB:
++    case Op_WeakCompareAndSwapS:
++    case Op_WeakCompareAndSwapI:
++    case Op_WeakCompareAndSwapL:
++    case Op_WeakCompareAndSwapP:
++    case Op_WeakCompareAndSwapN:
++      return maybe_volatile;
++    default:
++      return false;
++  }
++}
 +
-+// Use Register as shortcut
-+class RegisterImpl;
-+typedef const RegisterImpl* Register;
++// predicate controlling translation of CAS
++//
++// returns true if CAS needs to use an acquiring load otherwise false
++bool needs_acquiring_load_reserved(const Node *n)
++{
++  assert(n != NULL && is_CAS(n->Opcode(), true), "expecting a compare and swap");
 +
-+inline constexpr Register as_Register(int encoding);
++  LoadStoreNode* ldst = n->as_LoadStore();
++  if (n != NULL && is_CAS(n->Opcode(), false)) {
++    assert(ldst != NULL && ldst->trailing_membar() != NULL, "expected trailing membar");
++  } else {
++    return ldst != NULL && ldst->trailing_membar() != NULL;
++  }
++  // so we can just return true here
++  return true;
++}
 +
-+class RegisterImpl: public AbstractRegisterImpl {
-+  static constexpr Register first();
++// predicate controlling translation of StoreCM
++//
++// returns true if a StoreStore must precede the card write otherwise
++// false
 +
-+ public:
-+  enum {
-+    number_of_registers      = 32,
-+    max_slots_per_register   = 2,
++bool unnecessary_storestore(const Node *storecm)
++{
++  assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
 +
-+    // integer registers x8 - x15 and floating-point registers f8 - f15 are allocatable
-+    // for compressed instructions. See Table 17.2 in spec.
-+    compressed_register_base = 8,
-+    compressed_register_top  = 15,
-+  };
++  // we need to generate a dmb ishst between an object put and the
++  // associated card mark when we are using CMS without conditional
++  // card marking
 +
-+  // derived registers, offsets, and addresses
-+  const Register successor() const { return this + 1; }
++  if (UseConcMarkSweepGC && !UseCondCardMark) {
++    return false;
++  }
 +
-+  // construction
-+  inline friend constexpr Register as_Register(int encoding);
++  // a storestore is unnecesary in all other cases
 +
-+  VMReg as_VMReg() const;
++  return true;
++}
 +
-+  // accessors
-+  int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
-+  int encoding_nocheck() const    { return this - first(); }
-+  bool is_valid() const           { return (unsigned)encoding_nocheck() < number_of_registers; }
-+  const char* name() const;
++#define __ _masm.
 +
-+  // for rvc
-+  int compressed_encoding() const {
-+    assert(is_compressed_valid(), "invalid compressed register");
-+    return encoding() - compressed_register_base;
-+  }
++// advance declarations for helper functions to convert register
++// indices to register objects
 +
-+  int compressed_encoding_nocheck() const {
-+    return encoding_nocheck() - compressed_register_base;
-+  }
++// the ad file has to provide implementations of certain methods
++// expected by the generic code
++//
++// REQUIRED FUNCTIONALITY
 +
-+  bool is_compressed_valid() const {
-+    return encoding_nocheck() >= compressed_register_base &&
-+           encoding_nocheck() <= compressed_register_top;
-+  }
-+};
++//=============================================================================
 +
-+REGISTER_IMPL_DECLARATION(Register, RegisterImpl, RegisterImpl::number_of_registers);
++// !!!!! Special hack to get all types of calls to specify the byte offset
++//       from the start of the call to the point where the return address
++//       will point.
 +
-+// The integer registers of the RISCV architecture
++int MachCallStaticJavaNode::ret_addr_offset()
++{
++  // jal
++  return 1 * NativeInstruction::instruction_size;
++}
 +
-+CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
++int MachCallDynamicJavaNode::ret_addr_offset()
++{
++  return 7 * NativeInstruction::instruction_size; // movptr, jal
++}
 +
-+CONSTANT_REGISTER_DECLARATION(Register, x0,    (0));
-+CONSTANT_REGISTER_DECLARATION(Register, x1,    (1));
-+CONSTANT_REGISTER_DECLARATION(Register, x2,    (2));
-+CONSTANT_REGISTER_DECLARATION(Register, x3,    (3));
-+CONSTANT_REGISTER_DECLARATION(Register, x4,    (4));
-+CONSTANT_REGISTER_DECLARATION(Register, x5,    (5));
-+CONSTANT_REGISTER_DECLARATION(Register, x6,    (6));
-+CONSTANT_REGISTER_DECLARATION(Register, x7,    (7));
-+CONSTANT_REGISTER_DECLARATION(Register, x8,    (8));
-+CONSTANT_REGISTER_DECLARATION(Register, x9,    (9));
-+CONSTANT_REGISTER_DECLARATION(Register, x10,  (10));
-+CONSTANT_REGISTER_DECLARATION(Register, x11,  (11));
-+CONSTANT_REGISTER_DECLARATION(Register, x12,  (12));
-+CONSTANT_REGISTER_DECLARATION(Register, x13,  (13));
-+CONSTANT_REGISTER_DECLARATION(Register, x14,  (14));
-+CONSTANT_REGISTER_DECLARATION(Register, x15,  (15));
-+CONSTANT_REGISTER_DECLARATION(Register, x16,  (16));
-+CONSTANT_REGISTER_DECLARATION(Register, x17,  (17));
-+CONSTANT_REGISTER_DECLARATION(Register, x18,  (18));
-+CONSTANT_REGISTER_DECLARATION(Register, x19,  (19));
-+CONSTANT_REGISTER_DECLARATION(Register, x20,  (20));
-+CONSTANT_REGISTER_DECLARATION(Register, x21,  (21));
-+CONSTANT_REGISTER_DECLARATION(Register, x22,  (22));
-+CONSTANT_REGISTER_DECLARATION(Register, x23,  (23));
-+CONSTANT_REGISTER_DECLARATION(Register, x24,  (24));
-+CONSTANT_REGISTER_DECLARATION(Register, x25,  (25));
-+CONSTANT_REGISTER_DECLARATION(Register, x26,  (26));
-+CONSTANT_REGISTER_DECLARATION(Register, x27,  (27));
-+CONSTANT_REGISTER_DECLARATION(Register, x28,  (28));
-+CONSTANT_REGISTER_DECLARATION(Register, x29,  (29));
-+CONSTANT_REGISTER_DECLARATION(Register, x30,  (30));
-+CONSTANT_REGISTER_DECLARATION(Register, x31,  (31));
++int MachCallRuntimeNode::ret_addr_offset() {
++  // for generated stubs the call will be
++  //   jal(addr)
++  // or with far branches
++  //   jal(trampoline_stub)
++  // for real runtime callouts it will be 11 instructions
++  // see riscv_enc_java_to_runtime
++  //   la(t1, retaddr)                ->  auipc + addi
++  //   la(t0, RuntimeAddress(addr))   ->  lui + addi + slli + addi + slli + addi
++  //   addi(sp, sp, -2 * wordSize)    ->  addi
++  //   sd(t1, Address(sp, wordSize))  ->  sd
++  //   jalr(t0)                       ->  jalr
++  CodeBlob *cb = CodeCache::find_blob(_entry_point);
++  if (cb != NULL) {
++    return 1 * NativeInstruction::instruction_size;
++  } else {
++    return 11 * NativeInstruction::instruction_size;
++  }
++}
 +
-+// Use FloatRegister as shortcut
-+class FloatRegisterImpl;
-+typedef const FloatRegisterImpl* FloatRegister;
++//
++// Compute padding required for nodes which need alignment
++//
 +
-+inline constexpr FloatRegister as_FloatRegister(int encoding);
++// With RVC a call instruction may get 2-byte aligned.
++// The address of the call instruction needs to be 4-byte aligned to
++// ensure that it does not span a cache line so that it can be patched.
++int CallStaticJavaDirectNode::compute_padding(int current_offset) const
++{
++  // to make sure the address of jal 4-byte aligned.
++  return align_up(current_offset, alignment_required()) - current_offset;
++}
 +
-+// The implementation of floating point registers for the architecture
-+class FloatRegisterImpl: public AbstractRegisterImpl {
-+  static constexpr FloatRegister first();
++// With RVC a call instruction may get 2-byte aligned.
++// The address of the call instruction needs to be 4-byte aligned to
++// ensure that it does not span a cache line so that it can be patched.
++int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
++{
++  // skip the movptr in MacroAssembler::ic_call():
++  // lui + addi + slli + addi + slli + addi
++  // Though movptr() has already 4-byte aligned with or without RVC,
++  // We need to prevent from further changes by explicitly calculating the size.
++  const int movptr_size = 6 * NativeInstruction::instruction_size;
++  current_offset += movptr_size;
++  // to make sure the address of jal 4-byte aligned.
++  return align_up(current_offset, alignment_required()) - current_offset;
++}
 +
-+ public:
-+  enum {
-+    number_of_registers     = 32,
-+    max_slots_per_register  = 2,
++// Indicate if the safepoint node needs the polling page as an input
 +
-+    // float registers in the range of [f8~f15] correspond to RVC. Please see Table 16.2 in spec.
-+    compressed_register_base = 8,
-+    compressed_register_top  = 15,
-+  };
++// the shared code plants the oop data at the start of the generated
++// code for the safepoint node and that needs ot be at the load
++// instruction itself. so we cannot plant a mov of the safepoint poll
++// address followed by a load. setting this to true means the mov is
++// scheduled as a prior instruction. that's better for scheduling
++// anyway.
 +
-+  // construction
-+  inline friend constexpr FloatRegister as_FloatRegister(int encoding);
++bool SafePointNode::needs_polling_address_input()
++{
++  return true;
++}
 +
-+  VMReg as_VMReg() const;
++//=============================================================================
 +
-+  // derived registers, offsets, and addresses
-+  FloatRegister successor() const {
-+    return as_FloatRegister((encoding() + 1) % (unsigned)number_of_registers);
-+  }
++#ifndef PRODUCT
++void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
++  assert_cond(st != NULL);
++  st->print("BREAKPOINT");
++}
++#endif
 +
-+  // accessors
-+  int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
-+  int encoding_nocheck() const    { return this - first(); }
-+  int is_valid() const            { return (unsigned)encoding_nocheck() < number_of_registers; }
-+  const char* name() const;
++void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  MacroAssembler _masm(&cbuf);
++  Assembler::CompressibleRegion cr(&_masm);
++  __ ebreak();
++}
 +
-+  // for rvc
-+  int compressed_encoding() const {
-+    assert(is_compressed_valid(), "invalid compressed register");
-+    return encoding() - compressed_register_base;
-+  }
++uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_);
++}
 +
-+  int compressed_encoding_nocheck() const {
-+    return encoding_nocheck() - compressed_register_base;
++//=============================================================================
++
++#ifndef PRODUCT
++  void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
++    st->print("nop \t# %d bytes pad for loops and calls", _count);
 +  }
++#endif
 +
-+  bool is_compressed_valid() const {
-+    return encoding_nocheck() >= compressed_register_base &&
-+           encoding_nocheck() <= compressed_register_top;
++  void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
++    MacroAssembler _masm(&cbuf);
++    Assembler::CompressibleRegion cr(&_masm); // nops shall be 2-byte under RVC for alignment purposes.
++    for (int i = 0; i < _count; i++) {
++      __ nop();
++    }
 +  }
-+};
 +
-+REGISTER_IMPL_DECLARATION(FloatRegister, FloatRegisterImpl, FloatRegisterImpl::number_of_registers);
++  uint MachNopNode::size(PhaseRegAlloc*) const {
++    return _count * (UseRVC ? NativeInstruction::compressed_instruction_size : NativeInstruction::instruction_size);
++  }
 +
-+// The float registers of the RISCV architecture
++//=============================================================================
++const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 +
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1));
++int Compile::ConstantTable::calculate_table_base_offset() const {
++  return 0;  // absolute addressing, no offset
++}
 +
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f0     , ( 0));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f1     , ( 1));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f2     , ( 2));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f3     , ( 3));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f4     , ( 4));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f5     , ( 5));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f6     , ( 6));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f7     , ( 7));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f8     , ( 8));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f9     , ( 9));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f10    , (10));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f11    , (11));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f12    , (12));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f13    , (13));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f14    , (14));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f15    , (15));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f16    , (16));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f17    , (17));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f18    , (18));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f19    , (19));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f20    , (20));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f21    , (21));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f22    , (22));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f23    , (23));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f24    , (24));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f25    , (25));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f26    , (26));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f27    , (27));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f28    , (28));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f29    , (29));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f30    , (30));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f31    , (31));
++bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
++void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
++  ShouldNotReachHere();
++}
 +
-+// Use VectorRegister as shortcut
-+class VectorRegisterImpl;
-+typedef const VectorRegisterImpl* VectorRegister;
++void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
++  // Empty encoding
++}
 +
-+inline constexpr VectorRegister as_VectorRegister(int encoding);
++uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
++  return 0;
++}
 +
-+// The implementation of vector registers for RVV
-+class VectorRegisterImpl: public AbstractRegisterImpl {
-+  static constexpr VectorRegister first();
++#ifndef PRODUCT
++void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
++  assert_cond(st != NULL);
++  st->print("-- \t// MachConstantBaseNode (empty encoding)");
++}
++#endif
 +
-+ public:
-+  enum {
-+    number_of_registers    = 32,
-+    max_slots_per_register = 4
-+  };
++#ifndef PRODUCT
++void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
++  assert_cond(st != NULL && ra_ != NULL);
++  Compile* C = ra_->C;
 +
-+  // construction
-+  inline friend constexpr VectorRegister as_VectorRegister(int encoding);
++  int framesize = C->frame_slots() << LogBytesPerInt;
 +
-+  VMReg as_VMReg() const;
++  if (C->need_stack_bang(framesize)) {
++    st->print("# stack bang size=%d\n\t", framesize);
++  }
 +
-+  // derived registers, offsets, and addresses
-+  VectorRegister successor() const { return this + 1; }
++  st->print("sd  fp, [sp, #%d]\n\t", - 2 * wordSize);
++  st->print("sd  ra, [sp, #%d]\n\t", - wordSize);
++  if (PreserveFramePointer) { st->print("sub  fp, sp, #%d\n\t", 2 * wordSize); }
++  st->print("sub sp, sp, #%d\n\t", framesize);
++}
++#endif
 +
-+  // accessors
-+  int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
-+  int encoding_nocheck() const    { return this - first(); }
-+  bool is_valid() const           { return (unsigned)encoding_nocheck() < number_of_registers; }
-+  const char* name() const;
++void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  assert_cond(ra_ != NULL);
++  Compile* C = ra_->C;
++  MacroAssembler _masm(&cbuf);
 +
-+};
++  // n.b. frame size includes space for return pc and fp
++  const int framesize = C->frame_size_in_bytes();
 +
-+REGISTER_IMPL_DECLARATION(VectorRegister, VectorRegisterImpl, VectorRegisterImpl::number_of_registers);
++  // insert a nop at the start of the prolog so we can patch in a
++  // branch if we need to invalidate the method later
++  __ nop();
 +
-+// The vector registers of RVV
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg , (-1));
++  assert_cond(C != NULL);
 +
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v0     , ( 0));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v1     , ( 1));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v2     , ( 2));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v3     , ( 3));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v4     , ( 4));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v5     , ( 5));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v6     , ( 6));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v7     , ( 7));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v8     , ( 8));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v9     , ( 9));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v10    , (10));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v11    , (11));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v12    , (12));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v13    , (13));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v14    , (14));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v15    , (15));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v16    , (16));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v17    , (17));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v18    , (18));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v19    , (19));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v20    , (20));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v21    , (21));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v22    , (22));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v23    , (23));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v24    , (24));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v25    , (25));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v26    , (26));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v27    , (27));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v28    , (28));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v29    , (29));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v30    , (30));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v31    , (31));
++  int bangsize = C->bang_size_in_bytes();
++  if (C->need_stack_bang(bangsize)) {
++    __ generate_stack_overflow_check(bangsize);
++  }
 +
++  __ build_frame(framesize);
 +
-+// Need to know the total number of registers of all sorts for SharedInfo.
-+// Define a class that exports it.
-+class ConcreteRegisterImpl : public AbstractRegisterImpl {
-+ public:
-+  enum {
-+  // A big enough number for C2: all the registers plus flags
-+  // This number must be large enough to cover REG_COUNT (defined by c2) registers.
-+  // There is no requirement that any ordering here matches any ordering c2 gives
-+  // it's optoregs.
++  if (VerifyStackAtCalls) {
++    Unimplemented();
++  }
 +
-+    number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers +
-+                           FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers +
-+                           VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers)
-+  };
++  C->set_frame_complete(cbuf.insts_size());
 +
-+  // added to make it compile
-+  static const int max_gpr;
-+  static const int max_fpr;
-+  static const int max_vpr;
-+};
++  if (C->has_mach_constant_base_node()) {
++    // NOTE: We set the table base offset here because users might be
++    // emitted before MachConstantBaseNode.
++    Compile::ConstantTable& constant_table = C->constant_table();
++    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
++  }
++}
 +
-+typedef AbstractRegSet<Register> RegSet;
-+typedef AbstractRegSet<FloatRegister> FloatRegSet;
-+typedef AbstractRegSet<VectorRegister> VectorRegSet;
++uint MachPrologNode::size(PhaseRegAlloc* ra_) const
++{
++  assert_cond(ra_ != NULL);
++  return MachNode::size(ra_); // too many variables; just compute it
++                              // the hard way
++}
 +
-+#endif // CPU_RISCV_REGISTER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
-new file mode 100644
-index 00000000000..228a64eae2c
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
-@@ -0,0 +1,113 @@
-+/*
-+ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++int MachPrologNode::reloc() const
++{
++  return 0;
++}
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.hpp"
-+#include "code/relocInfo.hpp"
-+#include "nativeInst_riscv.hpp"
-+#include "oops/oop.inline.hpp"
-+#include "runtime/safepoint.hpp"
++//=============================================================================
 +
-+void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
-+  if (verify_only) {
-+    return;
-+  }
++#ifndef PRODUCT
++void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
++  assert_cond(st != NULL && ra_ != NULL);
++  Compile* C = ra_->C;
++  assert_cond(C != NULL);
++  int framesize = C->frame_size_in_bytes();
 +
-+  int bytes;
++  st->print("# pop frame %d\n\t", framesize);
 +
-+  switch (type()) {
-+    case relocInfo::oop_type: {
-+      oop_Relocation *reloc = (oop_Relocation *)this;
-+      // in movoop when BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate
-+      if (NativeInstruction::is_load_pc_relative_at(addr())) {
-+        address constptr = (address)code()->oop_addr_at(reloc->oop_index());
-+        bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr);
-+        assert(*(address*)constptr == x, "error in oop relocation");
-+      } else {
-+        bytes = MacroAssembler::patch_oop(addr(), x);
-+      }
-+      break;
-+    }
-+    default:
-+      bytes = MacroAssembler::pd_patch_instruction_size(addr(), x);
-+      break;
++  if (framesize == 0) {
++    st->print("ld  ra, [sp,#%d]\n\t", (2 * wordSize));
++    st->print("ld  fp, [sp,#%d]\n\t", (3 * wordSize));
++    st->print("add sp, sp, #%d\n\t", (2 * wordSize));
++  } else {
++    st->print("add  sp, sp, #%d\n\t", framesize);
++    st->print("ld  ra, [sp,#%d]\n\t", - 2 * wordSize);
++    st->print("ld  fp, [sp,#%d]\n\t", - wordSize);
 +  }
-+  ICache::invalidate_range(addr(), bytes);
-+}
 +
-+address Relocation::pd_call_destination(address orig_addr) {
-+  assert(is_call(), "should be an address instruction here");
-+  if (NativeCall::is_call_at(addr())) {
-+    address trampoline = nativeCall_at(addr())->get_trampoline();
-+    if (trampoline != NULL) {
-+      return nativeCallTrampolineStub_at(trampoline)->destination();
-+    }
-+  }
-+  if (orig_addr != NULL) {
-+    // the extracted address from the instructions in address orig_addr
-+    address new_addr = MacroAssembler::pd_call_destination(orig_addr);
-+    // If call is branch to self, don't try to relocate it, just leave it
-+    // as branch to self. This happens during code generation if the code
-+    // buffer expands. It will be relocated to the trampoline above once
-+    // code generation is complete.
-+    new_addr = (new_addr == orig_addr) ? addr() : new_addr;
-+    return new_addr;
++  if (do_polling() && C->is_method_compilation()) {
++    st->print("# touch polling page\n\t");
++    st->print("li  t0, #0x%lx\n\t", p2i(os::get_polling_page()));
++    st->print("ld  zr, [t0]");
 +  }
-+  return MacroAssembler::pd_call_destination(addr());
 +}
++#endif
 +
-+void Relocation::pd_set_call_destination(address x) {
-+  assert(is_call(), "should be an address instruction here");
-+  if (NativeCall::is_call_at(addr())) {
-+    address trampoline = nativeCall_at(addr())->get_trampoline();
-+    if (trampoline != NULL) {
-+      nativeCall_at(addr())->set_destination_mt_safe(x, /* assert_lock */false);
-+      return;
-+    }
++void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  assert_cond(ra_ != NULL);
++  Compile* C = ra_->C;
++  MacroAssembler _masm(&cbuf);
++  assert_cond(C != NULL);
++  int framesize = C->frame_size_in_bytes();
++
++  __ remove_frame(framesize);
++
++  if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
++    __ reserved_stack_check();
 +  }
-+  MacroAssembler::pd_patch_instruction_size(addr(), x);
-+  address pd_call = pd_call_destination(addr());
-+  assert(pd_call == x, "fail in reloc");
-+}
 +
-+address* Relocation::pd_address_in_code() {
-+  assert(NativeCall::is_load_pc_relative_at(addr()), "Not the expected instruction sequence!");
-+  return (address*)(MacroAssembler::target_addr_for_insn(addr()));
++  if (do_polling() && C->is_method_compilation()) {
++    __ read_polling_page(t0, os::get_polling_page(), relocInfo::poll_return_type);
++  }
 +}
 +
-+address Relocation::pd_get_address_from_code() {
-+  return MacroAssembler::pd_call_destination(addr());
++uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
++  assert_cond(ra_ != NULL);
++  // Variable size. Determine dynamically.
++  return MachNode::size(ra_);
 +}
 +
-+void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
-+  if (NativeInstruction::maybe_cpool_ref(addr())) {
-+    address old_addr = old_addr_for(addr(), src, dest);
-+    MacroAssembler::pd_patch_instruction_size(addr(), MacroAssembler::target_addr_for_insn(old_addr));
-+  }
++int MachEpilogNode::reloc() const {
++  // Return number of relocatable values contained in this instruction.
++  return 1; // 1 for polling page.
++}
++const Pipeline * MachEpilogNode::pipeline() const {
++  return MachNode::pipeline_class();
 +}
 +
-+void metadata_Relocation::pd_fix_value(address x) {
++// This method seems to be obsolete. It is declared in machnode.hpp
++// and defined in all *.ad files, but it is never called. Should we
++// get rid of it?
++int MachEpilogNode::safepoint_offset() const {
++  assert(do_polling(), "no return for this epilog node");
++  return 4;
 +}
-diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.hpp b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp
-new file mode 100644
-index 00000000000..840ed935d88
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp
-@@ -0,0 +1,44 @@
-+/*
-+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
 +
-+#ifndef CPU_RISCV_RELOCINFO_RISCV_HPP
-+#define CPU_RISCV_RELOCINFO_RISCV_HPP
++//=============================================================================
 +
-+  // machine-dependent parts of class relocInfo
-+ private:
-+  enum {
-+    // Relocations are byte-aligned.
-+    offset_unit        =  1,
-+    // Must be at least 1 for RelocInfo::narrow_oop_in_const.
-+    format_width       =  1
-+  };
++// Figure out which register class each belongs in: rc_int, rc_float or
++// rc_stack.
++enum RC { rc_bad, rc_int, rc_float, rc_stack };
 +
-+ public:
++static enum RC rc_class(OptoReg::Name reg) {
 +
-+  // This platform has no oops in the code that are not also
-+  // listed in the oop section.
-+  static bool mustIterateImmediateOopsInCode() { return false; }
++  if (reg == OptoReg::Bad) {
++    return rc_bad;
++  }
 +
-+#endif // CPU_RISCV_RELOCINFO_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-new file mode 100644
-index 00000000000..588887e1d96
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -0,0 +1,10611 @@
-+//
-+// Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
-+// Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
-+// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+//
-+// This code is free software; you can redistribute it and/or modify it
-+// under the terms of the GNU General Public License version 2 only, as
-+// published by the Free Software Foundation.
-+//
-+// This code is distributed in the hope that it will be useful, but WITHOUT
-+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+// version 2 for more details (a copy is included in the LICENSE file that
-+// accompanied this code).
-+//
-+// You should have received a copy of the GNU General Public License version
-+// 2 along with this work; if not, write to the Free Software Foundation,
-+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+//
-+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+// or visit www.oracle.com if you need additional information or have any
-+// questions.
-+//
-+//
++  // we have 30 int registers * 2 halves
++  // (t0 and t1 are omitted)
++  int slots_of_int_registers = RegisterImpl::max_slots_per_register * (RegisterImpl::number_of_registers - 2);
++  if (reg < slots_of_int_registers) {
++    return rc_int;
++  }
 +
-+// RISCV Architecture Description File
++  // we have 32 float register * 2 halves
++  int slots_of_float_registers = FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers;
++  if (reg < slots_of_int_registers + slots_of_float_registers) {
++    return rc_float;
++  }
 +
-+//----------REGISTER DEFINITION BLOCK------------------------------------------
-+// This information is used by the matcher and the register allocator to
-+// describe individual registers and classes of registers within the target
-+// archtecture.
++  // Between float regs & stack is the flags regs.
++  assert(OptoReg::is_stack(reg), "blow up if spilling flags");
 +
-+register %{
-+//----------Architecture Description Register Definitions----------------------
-+// General Registers
-+// "reg_def"  name ( register save type, C convention save type,
-+//                   ideal register type, encoding );
-+// Register Save Types:
-+//
-+// NS  = No-Save:       The register allocator assumes that these registers
-+//                      can be used without saving upon entry to the method, &
-+//                      that they do not need to be saved at call sites.
-+//
-+// SOC = Save-On-Call:  The register allocator assumes that these registers
-+//                      can be used without saving upon entry to the method,
-+//                      but that they must be saved at call sites.
-+//
-+// SOE = Save-On-Entry: The register allocator assumes that these registers
-+//                      must be saved before using them upon entry to the
-+//                      method, but they do not need to be saved at call
-+//                      sites.
-+//
-+// AS  = Always-Save:   The register allocator assumes that these registers
-+//                      must be saved before using them upon entry to the
-+//                      method, & that they must be saved at call sites.
-+//
-+// Ideal Register Type is used to determine how to save & restore a
-+// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
-+// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
-+//
-+// The encoding number is the actual bit-pattern placed into the opcodes.
++  return rc_stack;
++}
 +
-+// We must define the 64 bit int registers in two 32 bit halves, the
-+// real lower register and a virtual upper half register. upper halves
-+// are used by the register allocator but are not actually supplied as
-+// operands to memory ops.
-+//
-+// follow the C1 compiler in making registers
-+//
-+//   x7, x9-x17, x27-x31 volatile (caller save)
-+//   x0-x4, x8, x23 system (no save, no allocate)
-+//   x5-x6 non-allocatable (so we can use them as temporary regs)
++uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
++  assert_cond(ra_ != NULL);
++  Compile* C = ra_->C;
 +
-+//
-+// as regards Java usage. we don't use any callee save registers
-+// because this makes it difficult to de-optimise a frame (see comment
-+// in x86 implementation of Deoptimization::unwind_callee_save_values)
-+//
++  // Get registers to move.
++  OptoReg::Name src_hi = ra_->get_reg_second(in(1));
++  OptoReg::Name src_lo = ra_->get_reg_first(in(1));
++  OptoReg::Name dst_hi = ra_->get_reg_second(this);
++  OptoReg::Name dst_lo = ra_->get_reg_first(this);
 +
-+// General Registers
++  enum RC src_hi_rc = rc_class(src_hi);
++  enum RC src_lo_rc = rc_class(src_lo);
++  enum RC dst_hi_rc = rc_class(dst_hi);
++  enum RC dst_lo_rc = rc_class(dst_lo);
 +
-+reg_def R0      ( NS,  NS,  Op_RegI, 0,  x0->as_VMReg()         ); // zr
-+reg_def R0_H    ( NS,  NS,  Op_RegI, 0,  x0->as_VMReg()->next() );
-+reg_def R1      ( NS,  SOC, Op_RegI, 1,  x1->as_VMReg()         ); // ra
-+reg_def R1_H    ( NS,  SOC, Op_RegI, 1,  x1->as_VMReg()->next() );
-+reg_def R2      ( NS,  SOE, Op_RegI, 2,  x2->as_VMReg()         ); // sp
-+reg_def R2_H    ( NS,  SOE, Op_RegI, 2,  x2->as_VMReg()->next() );
-+reg_def R3      ( NS,  NS,  Op_RegI, 3,  x3->as_VMReg()         ); // gp
-+reg_def R3_H    ( NS,  NS,  Op_RegI, 3,  x3->as_VMReg()->next() );
-+reg_def R4      ( NS,  NS,  Op_RegI, 4,  x4->as_VMReg()         ); // tp
-+reg_def R4_H    ( NS,  NS,  Op_RegI, 4,  x4->as_VMReg()->next() );
-+reg_def R7      ( SOC, SOC, Op_RegI, 7,  x7->as_VMReg()         );
-+reg_def R7_H    ( SOC, SOC, Op_RegI, 7,  x7->as_VMReg()->next() );
-+reg_def R8      ( NS,  SOE, Op_RegI, 8,  x8->as_VMReg()         ); // fp
-+reg_def R8_H    ( NS,  SOE, Op_RegI, 8,  x8->as_VMReg()->next() );
-+reg_def R9      ( SOC, SOE, Op_RegI, 9,  x9->as_VMReg()         );
-+reg_def R9_H    ( SOC, SOE, Op_RegI, 9,  x9->as_VMReg()->next() );
-+reg_def R10     ( SOC, SOC, Op_RegI, 10, x10->as_VMReg()        );
-+reg_def R10_H   ( SOC, SOC, Op_RegI, 10, x10->as_VMReg()->next());
-+reg_def R11     ( SOC, SOC, Op_RegI, 11, x11->as_VMReg()        );
-+reg_def R11_H   ( SOC, SOC, Op_RegI, 11, x11->as_VMReg()->next());
-+reg_def R12     ( SOC, SOC, Op_RegI, 12, x12->as_VMReg()        );
-+reg_def R12_H   ( SOC, SOC, Op_RegI, 12, x12->as_VMReg()->next());
-+reg_def R13     ( SOC, SOC, Op_RegI, 13, x13->as_VMReg()        );
-+reg_def R13_H   ( SOC, SOC, Op_RegI, 13, x13->as_VMReg()->next());
-+reg_def R14     ( SOC, SOC, Op_RegI, 14, x14->as_VMReg()        );
-+reg_def R14_H   ( SOC, SOC, Op_RegI, 14, x14->as_VMReg()->next());
-+reg_def R15     ( SOC, SOC, Op_RegI, 15, x15->as_VMReg()        );
-+reg_def R15_H   ( SOC, SOC, Op_RegI, 15, x15->as_VMReg()->next());
-+reg_def R16     ( SOC, SOC, Op_RegI, 16, x16->as_VMReg()        );
-+reg_def R16_H   ( SOC, SOC, Op_RegI, 16, x16->as_VMReg()->next());
-+reg_def R17     ( SOC, SOC, Op_RegI, 17, x17->as_VMReg()        );
-+reg_def R17_H   ( SOC, SOC, Op_RegI, 17, x17->as_VMReg()->next());
-+reg_def R18     ( SOC, SOE, Op_RegI, 18, x18->as_VMReg()        );
-+reg_def R18_H   ( SOC, SOE, Op_RegI, 18, x18->as_VMReg()->next());
-+reg_def R19     ( SOC, SOE, Op_RegI, 19, x19->as_VMReg()        );
-+reg_def R19_H   ( SOC, SOE, Op_RegI, 19, x19->as_VMReg()->next());
-+reg_def R20     ( SOC, SOE, Op_RegI, 20, x20->as_VMReg()        ); // caller esp
-+reg_def R20_H   ( SOC, SOE, Op_RegI, 20, x20->as_VMReg()->next());
-+reg_def R21     ( SOC, SOE, Op_RegI, 21, x21->as_VMReg()        );
-+reg_def R21_H   ( SOC, SOE, Op_RegI, 21, x21->as_VMReg()->next());
-+reg_def R22     ( SOC, SOE, Op_RegI, 22, x22->as_VMReg()        );
-+reg_def R22_H   ( SOC, SOE, Op_RegI, 22, x22->as_VMReg()->next());
-+reg_def R23     ( NS,  SOE, Op_RegI, 23, x23->as_VMReg()        ); // java thread
-+reg_def R23_H   ( NS,  SOE, Op_RegI, 23, x23->as_VMReg()->next());
-+reg_def R24     ( SOC, SOE, Op_RegI, 24, x24->as_VMReg()        );
-+reg_def R24_H   ( SOC, SOE, Op_RegI, 24, x24->as_VMReg()->next());
-+reg_def R25     ( SOC, SOE, Op_RegI, 25, x25->as_VMReg()        );
-+reg_def R25_H   ( SOC, SOE, Op_RegI, 25, x25->as_VMReg()->next());
-+reg_def R26     ( SOC, SOE, Op_RegI, 26, x26->as_VMReg()        );
-+reg_def R26_H   ( SOC, SOE, Op_RegI, 26, x26->as_VMReg()->next());
-+reg_def R27     ( SOC, SOE, Op_RegI, 27, x27->as_VMReg()        ); // heapbase
-+reg_def R27_H   ( SOC, SOE, Op_RegI, 27, x27->as_VMReg()->next());
-+reg_def R28     ( SOC, SOC, Op_RegI, 28, x28->as_VMReg()        );
-+reg_def R28_H   ( SOC, SOC, Op_RegI, 28, x28->as_VMReg()->next());
-+reg_def R29     ( SOC, SOC, Op_RegI, 29, x29->as_VMReg()        );
-+reg_def R29_H   ( SOC, SOC, Op_RegI, 29, x29->as_VMReg()->next());
-+reg_def R30     ( SOC, SOC, Op_RegI, 30, x30->as_VMReg()        );
-+reg_def R30_H   ( SOC, SOC, Op_RegI, 30, x30->as_VMReg()->next());
-+reg_def R31     ( SOC, SOC, Op_RegI, 31, x31->as_VMReg()        );
-+reg_def R31_H   ( SOC, SOC, Op_RegI, 31, x31->as_VMReg()->next());
++  assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
 +
-+// ----------------------------
-+// Float/Double Registers
-+// ----------------------------
++  if (src_hi != OptoReg::Bad) {
++    assert((src_lo & 1) == 0 && src_lo + 1 == src_hi &&
++           (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi,
++           "expected aligned-adjacent pairs");
++  }
 +
-+// Double Registers
++  if (src_lo == dst_lo && src_hi == dst_hi) {
++    return 0;            // Self copy, no move.
++  }
 +
-+// The rules of ADL require that double registers be defined in pairs.
-+// Each pair must be two 32-bit values, but not necessarily a pair of
-+// single float registers. In each pair, ADLC-assigned register numbers
-+// must be adjacent, with the lower number even. Finally, when the
-+// CPU stores such a register pair to memory, the word associated with
-+// the lower ADLC-assigned number must be stored to the lower address.
++  bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
++              (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
++  int src_offset = ra_->reg2offset(src_lo);
++  int dst_offset = ra_->reg2offset(dst_lo);
 +
-+// RISCV has 32 floating-point registers. Each can store a single
-+// or double precision floating-point value.
++  if (cbuf != NULL) {
++    MacroAssembler _masm(cbuf);
++    Assembler::CompressibleRegion cr(&_masm);
++    switch (src_lo_rc) {
++      case rc_int:
++        if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
++          if (!is64 && this->ideal_reg() != Op_RegI) { // zero extended for narrow oop or klass
++            __ zero_extend(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo]), 32);
++          } else {
++            __ mv(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo]));
++          }
++        } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
++          if (is64) {
++            __ fmv_d_x(as_FloatRegister(Matcher::_regEncode[dst_lo]),
++                       as_Register(Matcher::_regEncode[src_lo]));
++          } else {
++            __ fmv_w_x(as_FloatRegister(Matcher::_regEncode[dst_lo]),
++                       as_Register(Matcher::_regEncode[src_lo]));
++          }
++        } else {                    // gpr --> stack spill
++          assert(dst_lo_rc == rc_stack, "spill to bad register class");
++          __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
++        }
++        break;
++      case rc_float:
++        if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
++          if (is64) {
++            __ fmv_x_d(as_Register(Matcher::_regEncode[dst_lo]),
++                       as_FloatRegister(Matcher::_regEncode[src_lo]));
++          } else {
++            __ fmv_x_w(as_Register(Matcher::_regEncode[dst_lo]),
++                       as_FloatRegister(Matcher::_regEncode[src_lo]));
++          }
++        } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
++          if (is64) {
++            __ fmv_d(as_FloatRegister(Matcher::_regEncode[dst_lo]),
++                     as_FloatRegister(Matcher::_regEncode[src_lo]));
++          } else {
++            __ fmv_s(as_FloatRegister(Matcher::_regEncode[dst_lo]),
++                     as_FloatRegister(Matcher::_regEncode[src_lo]));
++          }
++        } else {                    // fpr --> stack spill
++          assert(dst_lo_rc == rc_stack, "spill to bad register class");
++          __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
++                   is64, dst_offset);
++        }
++        break;
++      case rc_stack:
++        if (dst_lo_rc == rc_int) {  // stack --> gpr load
++          if (this->ideal_reg() == Op_RegI) {
++            __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
++          } else { // // zero extended for narrow oop or klass
++            __ unspillu(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
++          }
++        } else if (dst_lo_rc == rc_float) { // stack --> fpr load
++          __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
++                     is64, src_offset);
++        } else {                    // stack --> stack copy
++          assert(dst_lo_rc == rc_stack, "spill to bad register class");
++          if (this->ideal_reg() == Op_RegI) {
++            __ unspill(t0, is64, src_offset);
++          } else { // zero extended for narrow oop or klass
++            __ unspillu(t0, is64, src_offset);
++          }
++          __ spill(t0, is64, dst_offset);
++        }
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++  }
 +
-+// for Java use float registers f0-f31 are always save on call whereas
-+// the platform ABI treats f8-f9 and f18-f27 as callee save). Other
-+// float registers are SOC as per the platform spec
++  if (st != NULL) {
++    st->print("spill ");
++    if (src_lo_rc == rc_stack) {
++      st->print("[sp, #%d] -> ", src_offset);
++    } else {
++      st->print("%s -> ", Matcher::regName[src_lo]);
++    }
++    if (dst_lo_rc == rc_stack) {
++      st->print("[sp, #%d]", dst_offset);
++    } else {
++      st->print("%s", Matcher::regName[dst_lo]);
++    }
++    st->print("\t# spill size = %d", is64 ? 64 : 32);
++  }
 +
-+reg_def F0    ( SOC, SOC, Op_RegF,  0,  f0->as_VMReg()          );
-+reg_def F0_H  ( SOC, SOC, Op_RegF,  0,  f0->as_VMReg()->next()  );
-+reg_def F1    ( SOC, SOC, Op_RegF,  1,  f1->as_VMReg()          );
-+reg_def F1_H  ( SOC, SOC, Op_RegF,  1,  f1->as_VMReg()->next()  );
-+reg_def F2    ( SOC, SOC, Op_RegF,  2,  f2->as_VMReg()          );
-+reg_def F2_H  ( SOC, SOC, Op_RegF,  2,  f2->as_VMReg()->next()  );
-+reg_def F3    ( SOC, SOC, Op_RegF,  3,  f3->as_VMReg()          );
-+reg_def F3_H  ( SOC, SOC, Op_RegF,  3,  f3->as_VMReg()->next()  );
-+reg_def F4    ( SOC, SOC, Op_RegF,  4,  f4->as_VMReg()          );
-+reg_def F4_H  ( SOC, SOC, Op_RegF,  4,  f4->as_VMReg()->next()  );
-+reg_def F5    ( SOC, SOC, Op_RegF,  5,  f5->as_VMReg()          );
-+reg_def F5_H  ( SOC, SOC, Op_RegF,  5,  f5->as_VMReg()->next()  );
-+reg_def F6    ( SOC, SOC, Op_RegF,  6,  f6->as_VMReg()          );
-+reg_def F6_H  ( SOC, SOC, Op_RegF,  6,  f6->as_VMReg()->next()  );
-+reg_def F7    ( SOC, SOC, Op_RegF,  7,  f7->as_VMReg()          );
-+reg_def F7_H  ( SOC, SOC, Op_RegF,  7,  f7->as_VMReg()->next()  );
-+reg_def F8    ( SOC, SOE, Op_RegF,  8,  f8->as_VMReg()          );
-+reg_def F8_H  ( SOC, SOE, Op_RegF,  8,  f8->as_VMReg()->next()  );
-+reg_def F9    ( SOC, SOE, Op_RegF,  9,  f9->as_VMReg()          );
-+reg_def F9_H  ( SOC, SOE, Op_RegF,  9,  f9->as_VMReg()->next()  );
-+reg_def F10   ( SOC, SOC, Op_RegF,  10, f10->as_VMReg()         );
-+reg_def F10_H ( SOC, SOC, Op_RegF,  10, f10->as_VMReg()->next() );
-+reg_def F11   ( SOC, SOC, Op_RegF,  11, f11->as_VMReg()         );
-+reg_def F11_H ( SOC, SOC, Op_RegF,  11, f11->as_VMReg()->next() );
-+reg_def F12   ( SOC, SOC, Op_RegF,  12, f12->as_VMReg()         );
-+reg_def F12_H ( SOC, SOC, Op_RegF,  12, f12->as_VMReg()->next() );
-+reg_def F13   ( SOC, SOC, Op_RegF,  13, f13->as_VMReg()         );
-+reg_def F13_H ( SOC, SOC, Op_RegF,  13, f13->as_VMReg()->next() );
-+reg_def F14   ( SOC, SOC, Op_RegF,  14, f14->as_VMReg()         );
-+reg_def F14_H ( SOC, SOC, Op_RegF,  14, f14->as_VMReg()->next() );
-+reg_def F15   ( SOC, SOC, Op_RegF,  15, f15->as_VMReg()         );
-+reg_def F15_H ( SOC, SOC, Op_RegF,  15, f15->as_VMReg()->next() );
-+reg_def F16   ( SOC, SOC, Op_RegF,  16, f16->as_VMReg()         );
-+reg_def F16_H ( SOC, SOC, Op_RegF,  16, f16->as_VMReg()->next() );
-+reg_def F17   ( SOC, SOC, Op_RegF,  17, f17->as_VMReg()         );
-+reg_def F17_H ( SOC, SOC, Op_RegF,  17, f17->as_VMReg()->next() );
-+reg_def F18   ( SOC, SOE, Op_RegF,  18, f18->as_VMReg()         );
-+reg_def F18_H ( SOC, SOE, Op_RegF,  18, f18->as_VMReg()->next() );
-+reg_def F19   ( SOC, SOE, Op_RegF,  19, f19->as_VMReg()         );
-+reg_def F19_H ( SOC, SOE, Op_RegF,  19, f19->as_VMReg()->next() );
-+reg_def F20   ( SOC, SOE, Op_RegF,  20, f20->as_VMReg()         );
-+reg_def F20_H ( SOC, SOE, Op_RegF,  20, f20->as_VMReg()->next() );
-+reg_def F21   ( SOC, SOE, Op_RegF,  21, f21->as_VMReg()         );
-+reg_def F21_H ( SOC, SOE, Op_RegF,  21, f21->as_VMReg()->next() );
-+reg_def F22   ( SOC, SOE, Op_RegF,  22, f22->as_VMReg()         );
-+reg_def F22_H ( SOC, SOE, Op_RegF,  22, f22->as_VMReg()->next() );
-+reg_def F23   ( SOC, SOE, Op_RegF,  23, f23->as_VMReg()         );
-+reg_def F23_H ( SOC, SOE, Op_RegF,  23, f23->as_VMReg()->next() );
-+reg_def F24   ( SOC, SOE, Op_RegF,  24, f24->as_VMReg()         );
-+reg_def F24_H ( SOC, SOE, Op_RegF,  24, f24->as_VMReg()->next() );
-+reg_def F25   ( SOC, SOE, Op_RegF,  25, f25->as_VMReg()         );
-+reg_def F25_H ( SOC, SOE, Op_RegF,  25, f25->as_VMReg()->next() );
-+reg_def F26   ( SOC, SOE, Op_RegF,  26, f26->as_VMReg()         );
-+reg_def F26_H ( SOC, SOE, Op_RegF,  26, f26->as_VMReg()->next() );
-+reg_def F27   ( SOC, SOE, Op_RegF,  27, f27->as_VMReg()         );
-+reg_def F27_H ( SOC, SOE, Op_RegF,  27, f27->as_VMReg()->next() );
-+reg_def F28   ( SOC, SOC, Op_RegF,  28, f28->as_VMReg()         );
-+reg_def F28_H ( SOC, SOC, Op_RegF,  28, f28->as_VMReg()->next() );
-+reg_def F29   ( SOC, SOC, Op_RegF,  29, f29->as_VMReg()         );
-+reg_def F29_H ( SOC, SOC, Op_RegF,  29, f29->as_VMReg()->next() );
-+reg_def F30   ( SOC, SOC, Op_RegF,  30, f30->as_VMReg()         );
-+reg_def F30_H ( SOC, SOC, Op_RegF,  30, f30->as_VMReg()->next() );
-+reg_def F31   ( SOC, SOC, Op_RegF,  31, f31->as_VMReg()         );
-+reg_def F31_H ( SOC, SOC, Op_RegF,  31, f31->as_VMReg()->next() );
++  return 0;
++}
 +
-+// ----------------------------
-+// Vector Registers
-+// ----------------------------
++#ifndef PRODUCT
++void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
++  if (ra_ == NULL) {
++    st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
++  } else {
++    implementation(NULL, ra_, false, st);
++  }
++}
++#endif
 +
-+// For RVV vector registers, we simply extend vector register size to 4
-+// 'logical' slots. This is nominally 128 bits but it actually covers
-+// all possible 'physical' RVV vector register lengths from 128 ~ 1024
-+// bits. The 'physical' RVV vector register length is detected during
-+// startup, so the register allocator is able to identify the correct
-+// number of bytes needed for an RVV spill/unspill.
-+
-+reg_def V0    ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()           );
-+reg_def V0_H  ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()->next()   );
-+reg_def V0_J  ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()->next(2)  );
-+reg_def V0_K  ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()->next(3)  );
-+
-+reg_def V1    ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg() 	        );
-+reg_def V1_H  ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg()->next()   );
-+reg_def V1_J  ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg()->next(2)  );
-+reg_def V1_K  ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg()->next(3)  );
-+
-+reg_def V2    ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()           );
-+reg_def V2_H  ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()->next()   );
-+reg_def V2_J  ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()->next(2)  );
-+reg_def V2_K  ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()->next(3)  );
-+
-+reg_def V3    ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()           );
-+reg_def V3_H  ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()->next()   );
-+reg_def V3_J  ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()->next(2)  );
-+reg_def V3_K  ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()->next(3)  );
-+
-+reg_def V4    ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()           );
-+reg_def V4_H  ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()->next()   );
-+reg_def V4_J  ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()->next(2)  );
-+reg_def V4_K  ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()->next(3)  );
-+
-+reg_def V5    ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg() 	        );
-+reg_def V5_H  ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg()->next()   );
-+reg_def V5_J  ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg()->next(2)  );
-+reg_def V5_K  ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg()->next(3)  );
-+
-+reg_def V6    ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()           );
-+reg_def V6_H  ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()->next()   );
-+reg_def V6_J  ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()->next(2)  );
-+reg_def V6_K  ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()->next(3)  );
-+
-+reg_def V7    ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg() 	        );
-+reg_def V7_H  ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg()->next()   );
-+reg_def V7_J  ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg()->next(2)  );
-+reg_def V7_K  ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg()->next(3)  );
-+
-+reg_def V8    ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()           );
-+reg_def V8_H  ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()->next()   );
-+reg_def V8_J  ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()->next(2)  );
-+reg_def V8_K  ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()->next(3)  );
-+
-+reg_def V9    ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()           );
-+reg_def V9_H  ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()->next()   );
-+reg_def V9_J  ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()->next(2)  );
-+reg_def V9_K  ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()->next(3)  );
-+
-+reg_def V10   ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()          );
-+reg_def V10_H ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next()  );
-+reg_def V10_J ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(2) );
-+reg_def V10_K ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(3) );
-+
-+reg_def V11   ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()          );
-+reg_def V11_H ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next()  );
-+reg_def V11_J ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(2) );
-+reg_def V11_K ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(3) );
-+
-+reg_def V12   ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()          );
-+reg_def V12_H ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next()  );
-+reg_def V12_J ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(2) );
-+reg_def V12_K ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(3) );
-+
-+reg_def V13   ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()          );
-+reg_def V13_H ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next()  );
-+reg_def V13_J ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(2) );
-+reg_def V13_K ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(3) );
-+
-+reg_def V14   ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()          );
-+reg_def V14_H ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next()  );
-+reg_def V14_J ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(2) );
-+reg_def V14_K ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(3) );
-+
-+reg_def V15   ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()          );
-+reg_def V15_H ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next()  );
-+reg_def V15_J ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(2) );
-+reg_def V15_K ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(3) );
-+
-+reg_def V16   ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()          );
-+reg_def V16_H ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next()  );
-+reg_def V16_J ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(2) );
-+reg_def V16_K ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(3) );
-+
-+reg_def V17   ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()          );
-+reg_def V17_H ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next()  );
-+reg_def V17_J ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(2) );
-+reg_def V17_K ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(3) );
-+
-+reg_def V18   ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()          );
-+reg_def V18_H ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next()  );
-+reg_def V18_J ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(2) );
-+reg_def V18_K ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(3) );
-+
-+reg_def V19   ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()          );
-+reg_def V19_H ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next()  );
-+reg_def V19_J ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(2) );
-+reg_def V19_K ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(3) );
-+
-+reg_def V20   ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()          );
-+reg_def V20_H ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next()  );
-+reg_def V20_J ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(2) );
-+reg_def V20_K ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(3) );
-+
-+reg_def V21   ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()          );
-+reg_def V21_H ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next()  );
-+reg_def V21_J ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(2) );
-+reg_def V21_K ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(3) );
-+
-+reg_def V22   ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()          );
-+reg_def V22_H ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next()  );
-+reg_def V22_J ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(2) );
-+reg_def V22_K ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(3) );
-+
-+reg_def V23   ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()          );
-+reg_def V23_H ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next()  );
-+reg_def V23_J ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(2) );
-+reg_def V23_K ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(3) );
-+
-+reg_def V24   ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()          );
-+reg_def V24_H ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next()  );
-+reg_def V24_J ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(2) );
-+reg_def V24_K ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(3) );
-+
-+reg_def V25   ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()          );
-+reg_def V25_H ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next()  );
-+reg_def V25_J ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(2) );
-+reg_def V25_K ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(3) );
-+
-+reg_def V26   ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()          );
-+reg_def V26_H ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next()  );
-+reg_def V26_J ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(2) );
-+reg_def V26_K ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(3) );
-+
-+reg_def V27   ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()          );
-+reg_def V27_H ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next()  );
-+reg_def V27_J ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(2) );
-+reg_def V27_K ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(3) );
-+
-+reg_def V28   ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()          );
-+reg_def V28_H ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next()  );
-+reg_def V28_J ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(2) );
-+reg_def V28_K ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(3) );
-+
-+reg_def V29   ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()          );
-+reg_def V29_H ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next()  );
-+reg_def V29_J ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(2) );
-+reg_def V29_K ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(3) );
-+
-+reg_def V30   ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()          );
-+reg_def V30_H ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next()  );
-+reg_def V30_J ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(2) );
-+reg_def V30_K ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(3) );
-+
-+reg_def V31   ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()          );
-+reg_def V31_H ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next()  );
-+reg_def V31_J ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(2) );
-+reg_def V31_K ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(3) );
++void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  implementation(&cbuf, ra_, false, NULL);
++}
 +
-+// ----------------------------
-+// Special Registers
-+// ----------------------------
++uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_);
++}
 +
-+// On riscv, the physical flag register is missing, so we use t1 instead,
-+// to bridge the RegFlag semantics in share/opto
++//=============================================================================
 +
-+reg_def RFLAGS   (SOC, SOC, Op_RegFlags, 6, x6->as_VMReg()        );
++#ifndef PRODUCT
++void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
++  assert_cond(ra_ != NULL && st != NULL);
++  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++  int reg = ra_->get_reg_first(this);
++  st->print("add %s, sp, #%d\t# box lock",
++            Matcher::regName[reg], offset);
++}
++#endif
 +
-+// Specify priority of register selection within phases of register
-+// allocation.  Highest priority is first.  A useful heuristic is to
-+// give registers a low priority when they are required by machine
-+// instructions, like EAX and EDX on I486, and choose no-save registers
-+// before save-on-call, & save-on-call before save-on-entry.  Registers
-+// which participate in fixed calling sequences should come last.
-+// Registers which are used as pairs must fall on an even boundary.
++void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  MacroAssembler _masm(&cbuf);
 +
-+alloc_class chunk0(
-+    // volatiles
-+    R7,  R7_H,
-+    R28, R28_H,
-+    R29, R29_H,
-+    R30, R30_H,
-+    R31, R31_H,
++  assert_cond(ra_ != NULL);
++  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++  int reg    = ra_->get_encode(this);
 +
-+    // arg registers
-+    R10, R10_H,
-+    R11, R11_H,
-+    R12, R12_H,
-+    R13, R13_H,
-+    R14, R14_H,
-+    R15, R15_H,
-+    R16, R16_H,
-+    R17, R17_H,
++  if (is_imm_in_range(offset, 12, 0)) {
++    __ addi(as_Register(reg), sp, offset);
++  } else if (is_imm_in_range(offset, 32, 0)) {
++    __ li32(t0, offset);
++    __ add(as_Register(reg), sp, t0);
++  } else {
++    ShouldNotReachHere();
++  }
++}
 +
-+    // non-volatiles
-+    R9,  R9_H,
-+    R18, R18_H,
-+    R19, R19_H,
-+    R20, R20_H,
-+    R21, R21_H,
-+    R22, R22_H,
-+    R24, R24_H,
-+    R25, R25_H,
-+    R26, R26_H,
++uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
++  // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
++  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 +
-+    // non-allocatable registers
-+    R23, R23_H, // java thread
-+    R27, R27_H, // heapbase
-+    R4,  R4_H,  // thread
-+    R8,  R8_H,  // fp
-+    R0,  R0_H,  // zero
-+    R1,  R1_H,  // ra
-+    R2,  R2_H,  // sp
-+    R3,  R3_H,  // gp
-+);
++  if (is_imm_in_range(offset, 12, 0)) {
++    return NativeInstruction::instruction_size;
++  } else {
++    return 3 * NativeInstruction::instruction_size; // lui + addiw + add;
++  }
++}
 +
-+alloc_class chunk1(
++//=============================================================================
 +
-+    // no save
-+    F0,  F0_H,
-+    F1,  F1_H,
-+    F2,  F2_H,
-+    F3,  F3_H,
-+    F4,  F4_H,
-+    F5,  F5_H,
-+    F6,  F6_H,
-+    F7,  F7_H,
-+    F28, F28_H,
-+    F29, F29_H,
-+    F30, F30_H,
-+    F31, F31_H,
++#ifndef PRODUCT
++void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
++{
++  assert_cond(st != NULL);
++  st->print_cr("# MachUEPNode");
++  if (UseCompressedClassPointers) {
++    st->print_cr("\tlwu t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass");
++    if (Universe::narrow_klass_shift() != 0) {
++      st->print_cr("\tdecode_klass_not_null t0, t0");
++    }
++  } else {
++    st->print_cr("\tld t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass");
++  }
++  st->print_cr("\tbeq t0, t1, ic_hit");
++  st->print_cr("\tj, SharedRuntime::_ic_miss_stub\t # Inline cache check");
++  st->print_cr("\tic_hit:");
++}
++#endif
 +
-+    // arg registers
-+    F10, F10_H,
-+    F11, F11_H,
-+    F12, F12_H,
-+    F13, F13_H,
-+    F14, F14_H,
-+    F15, F15_H,
-+    F16, F16_H,
-+    F17, F17_H,
++void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
++{
++  // This is the unverified entry point.
++  MacroAssembler _masm(&cbuf);
 +
-+    // non-volatiles
-+    F8,  F8_H,
-+    F9,  F9_H,
-+    F18, F18_H,
-+    F19, F19_H,
-+    F20, F20_H,
-+    F21, F21_H,
-+    F22, F22_H,
-+    F23, F23_H,
-+    F24, F24_H,
-+    F25, F25_H,
-+    F26, F26_H,
-+    F27, F27_H,
-+);
++  Label skip;
++  __ cmp_klass(j_rarg0, t1, t0, skip);
++  __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
++  __ bind(skip);
++}
 +
-+alloc_class chunk2(
-+    V0, V0_H, V0_J, V0_K,
-+    V1, V1_H, V1_J, V1_K,
-+    V2, V2_H, V2_J, V2_K,
-+    V3, V3_H, V3_J, V3_K,
-+    V4, V4_H, V4_J, V4_K,
-+    V5, V5_H, V5_J, V5_K,
-+    V6, V6_H, V6_J, V6_K,
-+    V7, V7_H, V7_J, V7_K,
-+    V8, V8_H, V8_J, V8_K,
-+    V9, V9_H, V9_J, V9_K,
-+    V10, V10_H, V10_J, V10_K,
-+    V11, V11_H, V11_J, V11_K,
-+    V12, V12_H, V12_J, V12_K,
-+    V13, V13_H, V13_J, V13_K,
-+    V14, V14_H, V14_J, V14_K,
-+    V15, V15_H, V15_J, V15_K,
-+    V16, V16_H, V16_J, V16_K,
-+    V17, V17_H, V17_J, V17_K,
-+    V18, V18_H, V18_J, V18_K,
-+    V19, V19_H, V19_J, V19_K,
-+    V20, V20_H, V20_J, V20_K,
-+    V21, V21_H, V21_J, V21_K,
-+    V22, V22_H, V22_J, V22_K,
-+    V23, V23_H, V23_J, V23_K,
-+    V24, V24_H, V24_J, V24_K,
-+    V25, V25_H, V25_J, V25_K,
-+    V26, V26_H, V26_J, V26_K,
-+    V27, V27_H, V27_J, V27_K,
-+    V28, V28_H, V28_J, V28_K,
-+    V29, V29_H, V29_J, V29_K,
-+    V30, V30_H, V30_J, V30_K,
-+    V31, V31_H, V31_J, V31_K,
-+);
++uint MachUEPNode::size(PhaseRegAlloc* ra_) const
++{
++  assert_cond(ra_ != NULL);
++  return MachNode::size(ra_);
++}
 +
-+alloc_class chunk3(RFLAGS);
++// REQUIRED EMIT CODE
 +
-+//----------Architecture Description Register Classes--------------------------
-+// Several register classes are automatically defined based upon information in
-+// this architecture description.
-+// 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
-+// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
-+//
++//=============================================================================
 +
-+// Class for all 32 bit general purpose registers
-+reg_class all_reg32(
-+    R0,
-+    R1,
-+    R2,
-+    R3,
-+    R4,
-+    R7,
-+    R8,
-+    R9,
-+    R10,
-+    R11,
-+    R12,
-+    R13,
-+    R14,
-+    R15,
-+    R16,
-+    R17,
-+    R18,
-+    R19,
-+    R20,
-+    R21,
-+    R22,
-+    R23,
-+    R24,
-+    R25,
-+    R26,
-+    R27,
-+    R28,
-+    R29,
-+    R30,
-+    R31
-+);
++// Emit exception handler code.
++int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
++{
++  // la_patchable t0, #exception_blob_entry_point
++  // jr (offset)t0
++  // or
++  // j #exception_blob_entry_point
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a handler.
++  MacroAssembler _masm(&cbuf);
++  address base = __ start_a_stub(size_exception_handler());
++  if (base == NULL) {
++    ciEnv::current()->record_failure("CodeCache is full");
++    return 0;  // CodeBuffer::expand failed
++  }
++  int offset = __ offset();
++  __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
++  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
++  __ end_a_stub();
++  return offset;
++}
 +
-+// Class for any 32 bit integer registers (excluding zr)
-+reg_class any_reg32 %{
-+  return _ANY_REG32_mask;
-+%}
++// Emit deopt handler code.
++int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
++{
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a handler.
++  MacroAssembler _masm(&cbuf);
++  address base = __ start_a_stub(size_deopt_handler());
++  if (base == NULL) {
++    ciEnv::current()->record_failure("CodeCache is full");
++    return 0;  // CodeBuffer::expand failed
++  }
++  int offset = __ offset();
 +
-+// Singleton class for R10 int register
-+reg_class int_r10_reg(R10);
++  __ auipc(ra, 0);
++  __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 +
-+// Singleton class for R12 int register
-+reg_class int_r12_reg(R12);
++  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
++  __ end_a_stub();
++  return offset;
 +
-+// Singleton class for R13 int register
-+reg_class int_r13_reg(R13);
++}
++// REQUIRED MATCHER CODE
 +
-+// Singleton class for R14 int register
-+reg_class int_r14_reg(R14);
++//=============================================================================
 +
-+// Class for all long integer registers
-+reg_class all_reg(
-+    R0,  R0_H,
-+    R1,  R1_H,
-+    R2,  R2_H,
-+    R3,  R3_H,
-+    R4,  R4_H,
-+    R7,  R7_H,
-+    R8,  R8_H,
-+    R9,  R9_H,
-+    R10, R10_H,
-+    R11, R11_H,
-+    R12, R12_H,
-+    R13, R13_H,
-+    R14, R14_H,
-+    R15, R15_H,
-+    R16, R16_H,
-+    R17, R17_H,
-+    R18, R18_H,
-+    R19, R19_H,
-+    R20, R20_H,
-+    R21, R21_H,
-+    R22, R22_H,
-+    R23, R23_H,
-+    R24, R24_H,
-+    R25, R25_H,
-+    R26, R26_H,
-+    R27, R27_H,
-+    R28, R28_H,
-+    R29, R29_H,
-+    R30, R30_H,
-+    R31, R31_H
-+);
++const bool Matcher::match_rule_supported(int opcode) {
++  if (!has_match_rule(opcode)) {
++    return false;
++  }
 +
-+// Class for all long integer registers (excluding zr)
-+reg_class any_reg %{
-+  return _ANY_REG_mask;
-+%}
++  switch (opcode) {
++    case Op_PopCountI:
++    case Op_PopCountL:
++      return UsePopCountInstruction;
 +
-+// Class for non-allocatable 32 bit registers
-+reg_class non_allocatable_reg32(
-+    R0,                       // zr
-+    R1,                       // ra
-+    R2,                       // sp
-+    R3,                       // gp
-+    R4,                       // tp
-+    R23                       // java thread
-+);
++    case Op_CountLeadingZerosI:
++    case Op_CountLeadingZerosL:
++    case Op_CountTrailingZerosI:
++    case Op_CountTrailingZerosL:
++      return UseZbb;
++  }
 +
-+// Class for non-allocatable 64 bit registers
-+reg_class non_allocatable_reg(
-+    R0,  R0_H,                // zr
-+    R1,  R1_H,                // ra
-+    R2,  R2_H,                // sp
-+    R3,  R3_H,                // gp
-+    R4,  R4_H,                // tp
-+    R23, R23_H                // java thread
-+);
++  return true; // Per default match rules are supported.
++}
 +
-+reg_class no_special_reg32 %{
-+  return _NO_SPECIAL_REG32_mask;
-+%}
++// Identify extra cases that we might want to provide match rules for vector nodes and
++// other intrinsics guarded with vector length (vlen).
++const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
++  return false;
++}
 +
-+reg_class no_special_reg %{
-+  return _NO_SPECIAL_REG_mask;
-+%}
++const bool Matcher::has_predicated_vectors(void) {
++  return false;
++}
 +
-+reg_class ptr_reg %{
-+  return _PTR_REG_mask;
-+%}
++const int Matcher::float_pressure(int default_pressure_threshold) {
++  return default_pressure_threshold;
++}
 +
-+reg_class no_special_ptr_reg %{
-+  return _NO_SPECIAL_PTR_REG_mask;
-+%}
++int Matcher::regnum_to_fpu_offset(int regnum)
++{
++  Unimplemented();
++  return 0;
++}
 +
-+// Class for 64 bit register r10
-+reg_class r10_reg(
-+    R10, R10_H
-+);
++// Is this branch offset short enough that a short branch can be used?
++//
++// NOTE: If the platform does not provide any short branch variants, then
++//       this method should return false for offset 0.
++// |---label(L1)-----|
++// |-----------------|
++// |-----------------|----------eq: float-------------------
++// |-----------------| // far_cmpD_branch   |   cmpD_branch
++// |------- ---------|    feq;              |      feq;
++// |-far_cmpD_branch-|    beqz done;        |      bnez L;
++// |-----------------|    j L;              |
++// |-----------------|    bind(done);       |
++// |-----------------|--------------------------------------
++// |-----------------| // so shortBrSize = br_size - 4;
++// |-----------------| // so offs = offset - shortBrSize + 4;
++// |---label(L2)-----|
++bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
++  // The passed offset is relative to address of the branch.
++  int shortBrSize = br_size - 4;
++  int offs = offset - shortBrSize + 4;
++  return (-4096 <= offs && offs < 4096);
++}
 +
-+// Class for 64 bit register r11
-+reg_class r11_reg(
-+    R11, R11_H
-+);
++const bool Matcher::isSimpleConstant64(jlong value) {
++  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
++  // Probably always true, even if a temp register is required.
++  return true;
++}
 +
-+// Class for 64 bit register r12
-+reg_class r12_reg(
-+    R12, R12_H
-+);
++// true just means we have fast l2f conversion
++const bool Matcher::convL2FSupported(void) {
++  return true;
++}
 +
-+// Class for 64 bit register r13
-+reg_class r13_reg(
-+    R13, R13_H
-+);
++// Vector width in bytes.
++const int Matcher::vector_width_in_bytes(BasicType bt) {
++  return 0;
++}
 +
-+// Class for 64 bit register r14
-+reg_class r14_reg(
-+    R14, R14_H
-+);
++// Limits on vector size (number of elements) loaded into vector.
++const int Matcher::max_vector_size(const BasicType bt) {
++  return vector_width_in_bytes(bt) / type2aelembytes(bt);
++}
++const int Matcher::min_vector_size(const BasicType bt) {
++  return max_vector_size(bt);
++}
 +
-+// Class for 64 bit register r15
-+reg_class r15_reg(
-+    R15, R15_H
-+);
++// Vector ideal reg.
++const uint Matcher::vector_ideal_reg(int len) {
++  ShouldNotReachHere();
++  return 0;
++}
 +
-+// Class for 64 bit register r16
-+reg_class r16_reg(
-+    R16, R16_H
-+);
++const uint Matcher::vector_shift_count_ideal_reg(int size) {
++  fatal("vector shift is not supported");
++  return Node::NotAMachineReg;
++}
 +
-+// Class for method register
-+reg_class method_reg(
-+    R31, R31_H
-+);
++// AES support not yet implemented
++const bool Matcher::pass_original_key_for_aes() {
++  return false;
++}
 +
-+// Class for heapbase register
-+reg_class heapbase_reg(
-+    R27, R27_H
-+);
++// RISC-V supports misaligned vectors store/load.
++const bool Matcher::misaligned_vectors_ok() {
++  return true;
++}
 +
-+// Class for java thread register
-+reg_class java_thread_reg(
-+    R23, R23_H
-+);
++// false => size gets scaled to BytesPerLong, ok.
++const bool Matcher::init_array_count_is_in_bytes = false;
 +
-+reg_class r28_reg(
-+    R28, R28_H
-+);
++// Use conditional move (CMOVL)
++const int Matcher::long_cmove_cost() {
++  // long cmoves are no more expensive than int cmoves
++  return 0;
++}
 +
-+reg_class r29_reg(
-+    R29, R29_H
-+);
++const int Matcher::float_cmove_cost() {
++  // float cmoves are no more expensive than int cmoves
++  return 0;
++}
 +
-+reg_class r30_reg(
-+    R30, R30_H
-+);
++// Does the CPU require late expand (see block.cpp for description of late expand)?
++const bool Matcher::require_postalloc_expand = false;
 +
-+// Class for zero registesr
-+reg_class zr_reg(
-+    R0, R0_H
-+);
++// Do we need to mask the count passed to shift instructions or does
++// the cpu only look at the lower 5/6 bits anyway?
++const bool Matcher::need_masked_shift_count = false;
 +
-+// Class for thread register
-+reg_class thread_reg(
-+    R4, R4_H
-+);
++// This affects two different things:
++//  - how Decode nodes are matched
++//  - how ImplicitNullCheck opportunities are recognized
++// If true, the matcher will try to remove all Decodes and match them
++// (as operands) into nodes. NullChecks are not prepared to deal with
++// Decodes by final_graph_reshaping().
++// If false, final_graph_reshaping() forces the decode behind the Cmp
++// for a NullCheck. The matcher matches the Decode node into a register.
++// Implicit_null_check optimization moves the Decode along with the
++// memory operation back up before the NullCheck.
++bool Matcher::narrow_oop_use_complex_address() {
++  return Universe::narrow_oop_shift() == 0;
++}
 +
-+// Class for frame pointer register
-+reg_class fp_reg(
-+    R8, R8_H
-+);
++bool Matcher::narrow_klass_use_complex_address() {
++// TODO
++// decide whether we need to set this to true
++  return false;
++}
 +
-+// Class for link register
-+reg_class ra_reg(
-+    R1, R1_H
-+);
++bool Matcher::const_oop_prefer_decode() {
++  // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
++  return Universe::narrow_oop_base() == NULL;
++}
 +
-+// Class for long sp register
-+reg_class sp_reg(
-+    R2, R2_H
-+);
++bool Matcher::const_klass_prefer_decode() {
++  // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
++  return Universe::narrow_klass_base() == NULL;
++}
 +
-+// Class for all float registers
-+reg_class float_reg(
-+    F0,
-+    F1,
-+    F2,
-+    F3,
-+    F4,
-+    F5,
-+    F6,
-+    F7,
-+    F8,
-+    F9,
-+    F10,
-+    F11,
-+    F12,
-+    F13,
-+    F14,
-+    F15,
-+    F16,
-+    F17,
-+    F18,
-+    F19,
-+    F20,
-+    F21,
-+    F22,
-+    F23,
-+    F24,
-+    F25,
-+    F26,
-+    F27,
-+    F28,
-+    F29,
-+    F30,
-+    F31
-+);
++// Is it better to copy float constants, or load them directly from
++// memory?  Intel can load a float constant from a direct address,
++// requiring no extra registers.  Most RISCs will have to materialize
++// an address into a register first, so they would do better to copy
++// the constant from stack.
++const bool Matcher::rematerialize_float_constants = false;
 +
-+// Double precision float registers have virtual `high halves' that
-+// are needed by the allocator.
-+// Class for all double registers
-+reg_class double_reg(
-+    F0,  F0_H,
-+    F1,  F1_H,
-+    F2,  F2_H,
-+    F3,  F3_H,
-+    F4,  F4_H,
-+    F5,  F5_H,
-+    F6,  F6_H,
-+    F7,  F7_H,
-+    F8,  F8_H,
-+    F9,  F9_H,
-+    F10, F10_H,
-+    F11, F11_H,
-+    F12, F12_H,
-+    F13, F13_H,
-+    F14, F14_H,
-+    F15, F15_H,
-+    F16, F16_H,
-+    F17, F17_H,
-+    F18, F18_H,
-+    F19, F19_H,
-+    F20, F20_H,
-+    F21, F21_H,
-+    F22, F22_H,
-+    F23, F23_H,
-+    F24, F24_H,
-+    F25, F25_H,
-+    F26, F26_H,
-+    F27, F27_H,
-+    F28, F28_H,
-+    F29, F29_H,
-+    F30, F30_H,
-+    F31, F31_H
-+);
++// If CPU can load and store mis-aligned doubles directly then no
++// fixup is needed.  Else we split the double into 2 integer pieces
++// and move it piece-by-piece.  Only happens when passing doubles into
++// C code as the Java calling convention forces doubles to be aligned.
++const bool Matcher::misaligned_doubles_ok = true;
 +
-+// Class for all RVV vector registers
-+reg_class vectora_reg(
-+    V1, V1_H, V1_J, V1_K,
-+    V2, V2_H, V2_J, V2_K,
-+    V3, V3_H, V3_J, V3_K,
-+    V4, V4_H, V4_J, V4_K,
-+    V5, V5_H, V5_J, V5_K,
-+    V6, V6_H, V6_J, V6_K,
-+    V7, V7_H, V7_J, V7_K,
-+    V8, V8_H, V8_J, V8_K,
-+    V9, V9_H, V9_J, V9_K,
-+    V10, V10_H, V10_J, V10_K,
-+    V11, V11_H, V11_J, V11_K,
-+    V12, V12_H, V12_J, V12_K,
-+    V13, V13_H, V13_J, V13_K,
-+    V14, V14_H, V14_J, V14_K,
-+    V15, V15_H, V15_J, V15_K,
-+    V16, V16_H, V16_J, V16_K,
-+    V17, V17_H, V17_J, V17_K,
-+    V18, V18_H, V18_J, V18_K,
-+    V19, V19_H, V19_J, V19_K,
-+    V20, V20_H, V20_J, V20_K,
-+    V21, V21_H, V21_J, V21_K,
-+    V22, V22_H, V22_J, V22_K,
-+    V23, V23_H, V23_J, V23_K,
-+    V24, V24_H, V24_J, V24_K,
-+    V25, V25_H, V25_J, V25_K,
-+    V26, V26_H, V26_J, V26_K,
-+    V27, V27_H, V27_J, V27_K,
-+    V28, V28_H, V28_J, V28_K,
-+    V29, V29_H, V29_J, V29_K,
-+    V30, V30_H, V30_J, V30_K,
-+    V31, V31_H, V31_J, V31_K
-+);
++// No-op on amd64
++void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
++  Unimplemented();
++}
 +
-+// Class for 64 bit register f0
-+reg_class f0_reg(
-+    F0, F0_H
-+);
++// Advertise here if the CPU requires explicit rounding operations to
++// implement the UseStrictFP mode.
++const bool Matcher::strict_fp_requires_explicit_rounding = false;
 +
-+// Class for 64 bit register f1
-+reg_class f1_reg(
-+    F1, F1_H
-+);
++// Are floats converted to double when stored to stack during
++// deoptimization?
++bool Matcher::float_in_double() { return false; }
 +
-+// Class for 64 bit register f2
-+reg_class f2_reg(
-+    F2, F2_H
-+);
++// Do ints take an entire long register or just half?
++// The relevant question is how the int is callee-saved:
++// the whole long is written but de-opt'ing will have to extract
++// the relevant 32 bits.
++const bool Matcher::int_in_long = true;
 +
-+// Class for 64 bit register f3
-+reg_class f3_reg(
-+    F3, F3_H
-+);
++// Return whether or not this register is ever used as an argument.
++// This function is used on startup to build the trampoline stubs in
++// generateOptoStub.  Registers not mentioned will be killed by the VM
++// call in the trampoline, and arguments in those registers not be
++// available to the callee.
++bool Matcher::can_be_java_arg(int reg)
++{
++  return
++    reg ==  R10_num || reg == R10_H_num ||
++    reg ==  R11_num || reg == R11_H_num ||
++    reg ==  R12_num || reg == R12_H_num ||
++    reg ==  R13_num || reg == R13_H_num ||
++    reg ==  R14_num || reg == R14_H_num ||
++    reg ==  R15_num || reg == R15_H_num ||
++    reg ==  R16_num || reg == R16_H_num ||
++    reg ==  R17_num || reg == R17_H_num ||
++    reg ==  F10_num || reg == F10_H_num ||
++    reg ==  F11_num || reg == F11_H_num ||
++    reg ==  F12_num || reg == F12_H_num ||
++    reg ==  F13_num || reg == F13_H_num ||
++    reg ==  F14_num || reg == F14_H_num ||
++    reg ==  F15_num || reg == F15_H_num ||
++    reg ==  F16_num || reg == F16_H_num ||
++    reg ==  F17_num || reg == F17_H_num;
++}
 +
-+// class for vector register v1
-+reg_class v1_reg(
-+    V1, V1_H, V1_J, V1_K
-+);
++bool Matcher::is_spillable_arg(int reg)
++{
++  return can_be_java_arg(reg);
++}
 +
-+// class for vector register v2
-+reg_class v2_reg(
-+    V2, V2_H, V2_J, V2_K
-+);
++bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
++  return false;
++}
 +
-+// class for vector register v3
-+reg_class v3_reg(
-+    V3, V3_H, V3_J, V3_K
-+);
++RegMask Matcher::divI_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
 +
-+// class for vector register v4
-+reg_class v4_reg(
-+    V4, V4_H, V4_J, V4_K
-+);
++// Register for MODI projection of divmodI.
++RegMask Matcher::modI_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
 +
-+// class for vector register v5
-+reg_class v5_reg(
-+    V5, V5_H, V5_J, V5_K
-+);
++// Register for DIVL projection of divmodL.
++RegMask Matcher::divL_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
 +
-+// class for condition codes
-+reg_class reg_flags(RFLAGS);
-+%}
++// Register for MODL projection of divmodL.
++RegMask Matcher::modL_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
 +
-+//----------DEFINITION BLOCK---------------------------------------------------
-+// Define name --> value mappings to inform the ADLC of an integer valued name
-+// Current support includes integer values in the range [0, 0x7FFFFFFF]
-+// Format:
-+//        int_def  <name>         ( <int_value>, <expression>);
-+// Generated Code in ad_<arch>.hpp
-+//        #define  <name>   (<expression>)
-+//        // value == <int_value>
-+// Generated code in ad_<arch>.cpp adlc_verification()
-+//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
-+//
++const RegMask Matcher::method_handle_invoke_SP_save_mask() {
++  return FP_REG_mask();
++}
 +
-+// we follow the ppc-aix port in using a simple cost model which ranks
-+// register operations as cheap, memory ops as more expensive and
-+// branches as most expensive. the first two have a low as well as a
-+// normal cost. huge cost appears to be a way of saying don't do
-+// something
++bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
++  assert_cond(addp != NULL);
++  for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
++    Node* u = addp->fast_out(i);
++    if (u != NULL && u->is_Mem()) {
++      int opsize = u->as_Mem()->memory_size();
++      assert(opsize > 0, "unexpected memory operand size");
++      if (u->as_Mem()->memory_size() != (1 << shift)) {
++        return false;
++      }
++    }
++  }
++  return true;
++}
 +
-+definitions %{
-+  // The default cost (of a register move instruction).
-+  int_def DEFAULT_COST         (  100,               100);
-+  int_def ALU_COST             (  100,  1 * DEFAULT_COST);          // unknown, const, arith, shift, slt,
-+                                                                    // multi, auipc, nop, logical, move
-+  int_def LOAD_COST            (  300,  3 * DEFAULT_COST);          // load, fpload
-+  int_def STORE_COST           (  100,  1 * DEFAULT_COST);          // store, fpstore
-+  int_def XFER_COST            (  300,  3 * DEFAULT_COST);          // mfc, mtc, fcvt, fmove, fcmp
-+  int_def BRANCH_COST          (  100,  1 * DEFAULT_COST);          // branch, jmp, call
-+  int_def IMUL_COST            ( 1000, 10 * DEFAULT_COST);          // imul
-+  int_def IDIVSI_COST          ( 3400, 34 * DEFAULT_COST);          // idivdi
-+  int_def IDIVDI_COST          ( 6600, 66 * DEFAULT_COST);          // idivsi
-+  int_def FMUL_SINGLE_COST     (  500,  5 * DEFAULT_COST);          // fadd, fmul, fmadd
-+  int_def FMUL_DOUBLE_COST     (  700,  7 * DEFAULT_COST);          // fadd, fmul, fmadd
-+  int_def FDIV_COST            ( 2000, 20 * DEFAULT_COST);          // fdiv
-+  int_def FSQRT_COST           ( 2500, 25 * DEFAULT_COST);          // fsqrt
-+  int_def VOLATILE_REF_COST    ( 1000, 10 * DEFAULT_COST);
-+%}
++const bool Matcher::convi2l_type_required = false;
 +
++// Should the Matcher clone shifts on addressing modes, expecting them
++// to be subsumed into complex addressing expressions or compute them
++// into registers?
++bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
++  return clone_base_plus_offset_address(m, mstack, address_visited);
++}
 +
++void Compile::reshape_address(AddPNode* addp) {
++}
 +
-+//----------SOURCE BLOCK-------------------------------------------------------
-+// This is a block of C++ code which provides values, functions, and
-+// definitions necessary in the rest of the architecture description
++%}
 +
-+source_hpp %{
 +
-+#include "asm/macroAssembler.hpp"
-+#include "gc/shared/cardTable.hpp"
-+#include "gc/shared/cardTableBarrierSet.hpp"
-+#include "gc/shared/collectedHeap.hpp"
-+#include "opto/addnode.hpp"
-+#include "opto/convertnode.hpp"
 +
-+extern RegMask _ANY_REG32_mask;
-+extern RegMask _ANY_REG_mask;
-+extern RegMask _PTR_REG_mask;
-+extern RegMask _NO_SPECIAL_REG32_mask;
-+extern RegMask _NO_SPECIAL_REG_mask;
-+extern RegMask _NO_SPECIAL_PTR_REG_mask;
++//----------ENCODING BLOCK-----------------------------------------------------
++// This block specifies the encoding classes used by the compiler to
++// output byte streams.  Encoding classes are parameterized macros
++// used by Machine Instruction Nodes in order to generate the bit
++// encoding of the instruction.  Operands specify their base encoding
++// interface with the interface keyword.  There are currently
++// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
++// COND_INTER.  REG_INTER causes an operand to generate a function
++// which returns its register number when queried.  CONST_INTER causes
++// an operand to generate a function which returns the value of the
++// constant when queried.  MEMORY_INTER causes an operand to generate
++// four functions which return the Base Register, the Index Register,
++// the Scale Value, and the Offset Value of the operand when queried.
++// COND_INTER causes an operand to generate six functions which return
++// the encoding code (ie - encoding bits for the instruction)
++// associated with each basic boolean condition for a conditional
++// instruction.
++//
++// Instructions specify two basic values for encoding.  Again, a
++// function is available to check if the constant displacement is an
++// oop. They use the ins_encode keyword to specify their encoding
++// classes (which must be a sequence of enc_class names, and their
++// parameters, specified in the encoding block), and they use the
++// opcode keyword to specify, in order, their primary, secondary, and
++// tertiary opcode.  Only the opcode sections which a particular
++// instruction needs for encoding need to be specified.
++encode %{
++  // BEGIN Non-volatile memory access
 +
-+class CallStubImpl {
++  enc_class riscv_enc_li_imm(iRegIorL dst, immIorL src) %{
++    MacroAssembler _masm(&cbuf);
++    Assembler::CompressibleRegion cr(&_masm);
++    int64_t con = (int64_t)$src$$constant;
++    Register dst_reg = as_Register($dst$$reg);
++    __ li(dst_reg, con);
++  %}
 +
-+  //--------------------------------------------------------------
-+  //---<  Used for optimization in Compile::shorten_branches  >---
-+  //--------------------------------------------------------------
++  enc_class riscv_enc_mov_p(iRegP dst, immP src) %{
++    MacroAssembler _masm(&cbuf);
++    Register dst_reg = as_Register($dst$$reg);
++    address con = (address)$src$$constant;
++    if (con == NULL || con == (address)1) {
++      ShouldNotReachHere();
++    } else {
++      relocInfo::relocType rtype = $src->constant_reloc();
++      if (rtype == relocInfo::oop_type) {
++        __ movoop(dst_reg, (jobject)con, /*immediate*/true);
++      } else if (rtype == relocInfo::metadata_type) {
++        __ mov_metadata(dst_reg, (Metadata*)con);
++      } else {
++        assert(rtype == relocInfo::none, "unexpected reloc type");
++        __ li(dst_reg, $src$$constant);
++      }
++    }
++  %}
 +
-+ public:
-+  // Size of call trampoline stub.
-+  static uint size_call_trampoline() {
-+    return 0; // no call trampolines on this platform
-+  }
++  enc_class riscv_enc_mov_p1(iRegP dst) %{
++    MacroAssembler _masm(&cbuf);
++    Assembler::CompressibleRegion cr(&_masm);
++    Register dst_reg = as_Register($dst$$reg);
++    __ li(dst_reg, 1);
++  %}
 +
-+  // number of relocations needed by a call trampoline stub
-+  static uint reloc_call_trampoline() {
-+    return 0; // no call trampolines on this platform
-+  }
-+};
++  enc_class riscv_enc_mov_poll_page(iRegP dst, immPollPage src) %{
++    MacroAssembler _masm(&cbuf);
++    int32_t offset = 0;
++    address page = (address)$src$$constant;
++    unsigned long align = (unsigned long)page & 0xfff;
++    assert(align == 0, "polling page must be page aligned");
++    Register dst_reg = as_Register($dst$$reg);
++    __ la_patchable(dst_reg, Address(page, relocInfo::poll_type), offset);
++    __ addi(dst_reg, dst_reg, offset);
++  %}
 +
-+class HandlerImpl {
++  enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{
++    MacroAssembler _masm(&cbuf);
++    __ load_byte_map_base($dst$$Register);
++  %}
 +
-+ public:
++  enc_class riscv_enc_mov_n(iRegN dst, immN src) %{
++    MacroAssembler _masm(&cbuf);
++    Register dst_reg = as_Register($dst$$reg);
++    address con = (address)$src$$constant;
++    if (con == NULL) {
++      ShouldNotReachHere();
++    } else {
++      relocInfo::relocType rtype = $src->constant_reloc();
++      assert(rtype == relocInfo::oop_type, "unexpected reloc type");
++      __ set_narrow_oop(dst_reg, (jobject)con);
++    }
++  %}
 +
-+  static int emit_exception_handler(CodeBuffer &cbuf);
-+  static int emit_deopt_handler(CodeBuffer& cbuf);
++  enc_class riscv_enc_mov_zero(iRegNorP dst) %{
++    MacroAssembler _masm(&cbuf);
++    Register dst_reg = as_Register($dst$$reg);
++    __ mv(dst_reg, zr);
++  %}
 +
-+  static uint size_exception_handler() {
-+    return MacroAssembler::far_branch_size();
-+  }
++  enc_class riscv_enc_mov_nk(iRegN dst, immNKlass src) %{
++    MacroAssembler _masm(&cbuf);
++    Register dst_reg = as_Register($dst$$reg);
++    address con = (address)$src$$constant;
++    if (con == NULL) {
++      ShouldNotReachHere();
++    } else {
++      relocInfo::relocType rtype = $src->constant_reloc();
++      assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
++      __ set_narrow_klass(dst_reg, (Klass *)con);
++    }
++  %}
 +
-+  static uint size_deopt_handler() {
-+    // count auipc + far branch
-+    return NativeInstruction::instruction_size + MacroAssembler::far_branch_size();
-+  }
-+};
++  enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{
++    MacroAssembler _masm(&cbuf);
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
++               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
++               /*result as bool*/ true);
++  %}
 +
-+class Node::PD {
-+public:
-+  enum NodeFlags {
-+    _last_flag = Node::_last_flag
-+  };
-+};
++  enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{
++    MacroAssembler _masm(&cbuf);
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
++               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
++               /*result as bool*/ true);
++  %}
 +
-+bool is_CAS(int opcode, bool maybe_volatile);
++  enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegL oldval, iRegL newval) %{
++    MacroAssembler _masm(&cbuf);
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
++               /*result as bool*/ true);
++  %}
 +
-+// predicate controlling translation of CompareAndSwapX
-+bool needs_acquiring_load_reserved(const Node *load);
++  enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{
++    MacroAssembler _masm(&cbuf);
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
++               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
++               /*result as bool*/ true);
++  %}
 +
-+// predicate controlling addressing modes
-+bool size_fits_all_mem_uses(AddPNode* addp, int shift);
-+%}
++  enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{
++    MacroAssembler _masm(&cbuf);
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
++               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
++               /*result as bool*/ true);
++  %}
 +
-+source %{
++  enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegL oldval, iRegL newval) %{
++    MacroAssembler _masm(&cbuf);
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
++               /*result as bool*/ true);
++  %}
 +
-+// Derived RegMask with conditionally allocatable registers
++  // compare and branch instruction encodings
 +
-+RegMask _ANY_REG32_mask;
-+RegMask _ANY_REG_mask;
-+RegMask _PTR_REG_mask;
-+RegMask _NO_SPECIAL_REG32_mask;
-+RegMask _NO_SPECIAL_REG_mask;
-+RegMask _NO_SPECIAL_PTR_REG_mask;
++  enc_class riscv_enc_j(label lbl) %{
++    MacroAssembler _masm(&cbuf);
++    Label* L = $lbl$$label;
++    __ j(*L);
++  %}
 +
-+void reg_mask_init() {
++  enc_class riscv_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{
++    MacroAssembler _masm(&cbuf);
++    Label* L = $lbl$$label;
++    switch ($cmp$$cmpcode) {
++      case(BoolTest::ge):
++        __ j(*L);
++        break;
++      case(BoolTest::lt):
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
 +
-+  _ANY_REG32_mask = _ALL_REG32_mask;
-+  _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(x0->as_VMReg()));
++  // call instruction encodings
 +
-+  _ANY_REG_mask = _ALL_REG_mask;
-+  _ANY_REG_mask.SUBTRACT(_ZR_REG_mask);
++  enc_class riscv_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result) %{
++    Register sub_reg = as_Register($sub$$reg);
++    Register super_reg = as_Register($super$$reg);
++    Register temp_reg = as_Register($temp$$reg);
++    Register result_reg = as_Register($result$$reg);
++    Register cr_reg = t1;
 +
-+  _PTR_REG_mask = _ALL_REG_mask;
-+  _PTR_REG_mask.SUBTRACT(_ZR_REG_mask);
++    Label miss;
++    Label done;
++    MacroAssembler _masm(&cbuf);
++    __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
++                                     NULL, &miss);
++    if ($primary) {
++      __ mv(result_reg, zr);
++    } else {
++      __ mv(cr_reg, zr);
++      __ j(done);
++    }
 +
-+  _NO_SPECIAL_REG32_mask = _ALL_REG32_mask;
-+  _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask);
++    __ bind(miss);
++    if (!$primary) {
++      __ li(cr_reg, 1);
++    }
 +
-+  _NO_SPECIAL_REG_mask = _ALL_REG_mask;
-+  _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
++    __ bind(done);
++  %}
 +
-+  _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask;
-+  _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
++  enc_class riscv_enc_java_static_call(method meth) %{
++    MacroAssembler _masm(&cbuf);
 +
-+  // x27 is not allocatable when compressed oops is on
-+  if (UseCompressedOops) {
-+    _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x27->as_VMReg()));
-+    _NO_SPECIAL_REG_mask.SUBTRACT(_HEAPBASE_REG_mask);
-+    _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_HEAPBASE_REG_mask);
-+  }
++    address addr = (address)$meth$$method;
++    address call = NULL;
++    assert_cond(addr != NULL);
++    if (!_method) {
++      // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
++      call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
++      if (call == NULL) {
++        ciEnv::current()->record_failure("CodeCache is full");
++        return;
++      }
++    } else {
++      int method_index = resolved_method_index(cbuf);
++      RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
++                                                  : static_call_Relocation::spec(method_index);
++      call = __ trampoline_call(Address(addr, rspec), &cbuf);
++      if (call == NULL) {
++        ciEnv::current()->record_failure("CodeCache is full");
++        return;
++      }
 +
-+  // x8 is not allocatable when PreserveFramePointer is on
-+  if (PreserveFramePointer) {
-+    _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x8->as_VMReg()));
-+    _NO_SPECIAL_REG_mask.SUBTRACT(_FP_REG_mask);
-+    _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_FP_REG_mask);
-+  }
-+}
++      // Emit stub for static call
++      address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
++      if (stub == NULL) {
++        ciEnv::current()->record_failure("CodeCache is full");
++        return;
++      }
++    }
++  %}
 +
-+void PhaseOutput::pd_perform_mach_node_analysis() {
-+}
++  enc_class riscv_enc_java_dynamic_call(method meth) %{
++    MacroAssembler _masm(&cbuf);
++    int method_index = resolved_method_index(cbuf);
++    address call = __ ic_call((address)$meth$$method, method_index);
++    if (call == NULL) {
++      ciEnv::current()->record_failure("CodeCache is full");
++      return;
++    }
++  %}
 +
-+int MachNode::pd_alignment_required() const {
-+  return 1;
-+}
++  enc_class riscv_enc_call_epilog() %{
++    MacroAssembler _masm(&cbuf);
++    if (VerifyStackAtCalls) {
++      // Check that stack depth is unchanged: find majik cookie on stack
++      __ call_Unimplemented();
++    }
++  %}
 +
-+int MachNode::compute_padding(int current_offset) const {
-+  return 0;
-+}
++  enc_class riscv_enc_java_to_runtime(method meth) %{
++    MacroAssembler _masm(&cbuf);
 +
-+// is_CAS(int opcode, bool maybe_volatile)
-+//
-+// return true if opcode is one of the possible CompareAndSwapX
-+// values otherwise false.
-+bool is_CAS(int opcode, bool maybe_volatile)
-+{
-+  switch (opcode) {
-+    // We handle these
-+    case Op_CompareAndSwapI:
-+    case Op_CompareAndSwapL:
-+    case Op_CompareAndSwapP:
-+    case Op_CompareAndSwapN:
-+    case Op_ShenandoahCompareAndSwapP:
-+    case Op_ShenandoahCompareAndSwapN:
-+    case Op_CompareAndSwapB:
-+    case Op_CompareAndSwapS:
-+    case Op_GetAndSetI:
-+    case Op_GetAndSetL:
-+    case Op_GetAndSetP:
-+    case Op_GetAndSetN:
-+    case Op_GetAndAddI:
-+    case Op_GetAndAddL:
-+      return true;
-+    case Op_CompareAndExchangeI:
-+    case Op_CompareAndExchangeN:
-+    case Op_CompareAndExchangeB:
-+    case Op_CompareAndExchangeS:
-+    case Op_CompareAndExchangeL:
-+    case Op_CompareAndExchangeP:
-+    case Op_WeakCompareAndSwapB:
-+    case Op_WeakCompareAndSwapS:
-+    case Op_WeakCompareAndSwapI:
-+    case Op_WeakCompareAndSwapL:
-+    case Op_WeakCompareAndSwapP:
-+    case Op_WeakCompareAndSwapN:
-+    case Op_ShenandoahWeakCompareAndSwapP:
-+    case Op_ShenandoahWeakCompareAndSwapN:
-+    case Op_ShenandoahCompareAndExchangeP:
-+    case Op_ShenandoahCompareAndExchangeN:
-+      return maybe_volatile;
-+    default:
-+      return false;
-+  }
-+}
++    // some calls to generated routines (arraycopy code) are scheduled
++    // by C2 as runtime calls. if so we can call them using a jr (they
++    // will be in a reachable segment) otherwise we have to use a jalr
++    // which loads the absolute address into a register.
++    address entry = (address)$meth$$method;
++    CodeBlob *cb = CodeCache::find_blob(entry);
++    if (cb != NULL) {
++      address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
++      if (call == NULL) {
++        ciEnv::current()->record_failure("CodeCache is full");
++        return;
++      }
++    } else {
++      Label retaddr;
++      __ la(t1, retaddr);
++      __ la(t0, RuntimeAddress(entry));
++      // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
++      __ addi(sp, sp, -2 * wordSize);
++      __ sd(t1, Address(sp, wordSize));
++      __ jalr(t0);
++      __ bind(retaddr);
++      __ addi(sp, sp, 2 * wordSize);
++    }
++  %}
 +
-+// predicate controlling translation of CAS
-+//
-+// returns true if CAS needs to use an acquiring load otherwise false
-+bool needs_acquiring_load_reserved(const Node *n)
-+{
-+  assert(n != NULL && is_CAS(n->Opcode(), true), "expecting a compare and swap");
++  // using the cr register as the bool result: 0 for success; others failed.
++  enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) %{
++    MacroAssembler _masm(&cbuf);
++    Register flag = t1;
++    Register oop = as_Register($object$$reg);
++    Register box = as_Register($box$$reg);
++    Register disp_hdr = as_Register($tmp1$$reg);
++    Register tmp = as_Register($tmp2$$reg);
++    Label cont;
++    Label object_has_monitor;
 +
-+  LoadStoreNode* ldst = n->as_LoadStore();
-+  if (n != NULL && is_CAS(n->Opcode(), false)) {
-+    assert(ldst != NULL && ldst->trailing_membar() != NULL, "expected trailing membar");
-+  } else {
-+    return ldst != NULL && ldst->trailing_membar() != NULL;
-+  }
-+  // so we can just return true here
-+  return true;
-+}
-+#define __ _masm.
++    assert_different_registers(oop, box, tmp, disp_hdr, t0);
 +
-+// advance declarations for helper functions to convert register
-+// indices to register objects
++    // Load markWord from object into displaced_header.
++    __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
 +
-+// the ad file has to provide implementations of certain methods
-+// expected by the generic code
-+//
-+// REQUIRED FUNCTIONALITY
++    // Always do locking in runtime.
++    if (EmitSync & 0x01) {
++      __ mv(flag, 1);
++      return;
++    }
 +
-+//=============================================================================
++    if (UseBiasedLocking && !UseOptoBiasInlining) {
++      __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont, /*slow_case*/NULL, NULL, flag);
++    }
 +
-+// !!!!! Special hack to get all types of calls to specify the byte offset
-+//       from the start of the call to the point where the return address
-+//       will point.
++    // Check for existing monitor
++    if ((EmitSync & 0x02) == 0) {
++      __ andi(t0, disp_hdr, markOopDesc::monitor_value);
++      __ bnez(t0, object_has_monitor);
++    }
 +
-+int MachCallStaticJavaNode::ret_addr_offset()
-+{
-+  // jal
-+  return 1 * NativeInstruction::instruction_size;
-+}
++    // Set tmp to be (markWord of object | UNLOCK_VALUE).
++    __ ori(tmp, disp_hdr, markOopDesc::unlocked_value);
 +
-+int MachCallDynamicJavaNode::ret_addr_offset()
-+{
-+  return 7 * NativeInstruction::instruction_size; // movptr, jal
-+}
++    // Initialize the box. (Must happen before we update the object mark!)
++    __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
 +
-+int MachCallRuntimeNode::ret_addr_offset() {
-+  // for generated stubs the call will be
-+  //   jal(addr)
-+  // or with far branches
-+  //   jal(trampoline_stub)
-+  // for real runtime callouts it will be 11 instructions
-+  // see riscv_enc_java_to_runtime
-+  //   la(t1, retaddr)                ->  auipc + addi
-+  //   la(t0, RuntimeAddress(addr))   ->  lui + addi + slli + addi + slli + addi
-+  //   addi(sp, sp, -2 * wordSize)    ->  addi
-+  //   sd(t1, Address(sp, wordSize))  ->  sd
-+  //   jalr(t0)                       ->  jalr
-+  CodeBlob *cb = CodeCache::find_blob(_entry_point);
-+  if (cb != NULL) {
-+    return 1 * NativeInstruction::instruction_size;
-+  } else {
-+    return 11 * NativeInstruction::instruction_size;
-+  }
-+}
++    // Compare object markWord with an unlocked value (tmp) and if
++    // equal exchange the stack address of our box with object markWord.
++    // On failure disp_hdr contains the possibly locked markWord.
++    __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq,
++               Assembler::rl, /*result*/disp_hdr);
++    __ mv(flag, zr);
++    __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas
 +
-+int MachCallNativeNode::ret_addr_offset() {
-+  Unimplemented();
-+  return -1;
-+}
++    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
 +
-+//
-+// Compute padding required for nodes which need alignment
-+//
++    // If the compare-and-exchange succeeded, then we found an unlocked
++    // object, will have now locked it will continue at label cont
++    // We did not see an unlocked object so try the fast recursive case.
 +
-+// With RVC a call instruction may get 2-byte aligned.
-+// The address of the call instruction needs to be 4-byte aligned to
-+// ensure that it does not span a cache line so that it can be patched.
-+int CallStaticJavaDirectNode::compute_padding(int current_offset) const
-+{
-+  // to make sure the address of jal 4-byte aligned.
-+  return align_up(current_offset, alignment_required()) - current_offset;
-+}
++    // Check if the owner is self by comparing the value in the
++    // markWord of object (disp_hdr) with the stack pointer.
++    __ sub(disp_hdr, disp_hdr, sp);
++    __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place));
++    // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont,
++    // hence we can store 0 as the displaced header in the box, which indicates that it is a
++    // recursive lock.
++    __ andr(tmp/*==0?*/, disp_hdr, tmp);
++    __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
++    __ mv(flag, tmp); // we can use the value of tmp as the result here
 +
-+// With RVC a call instruction may get 2-byte aligned.
-+// The address of the call instruction needs to be 4-byte aligned to
-+// ensure that it does not span a cache line so that it can be patched.
-+int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
-+{
-+  // skip the movptr in MacroAssembler::ic_call():
-+  // lui + addi + slli + addi + slli + addi
-+  // Though movptr() has already 4-byte aligned with or without RVC,
-+  // We need to prevent from further changes by explicitly calculating the size.
-+  const int movptr_size = 6 * NativeInstruction::instruction_size;
-+  current_offset += movptr_size;
-+  // to make sure the address of jal 4-byte aligned.
-+  return align_up(current_offset, alignment_required()) - current_offset;
-+}
++    if ((EmitSync & 0x02) == 0) {
++      __ j(cont);
 +
-+//=============================================================================
++      // Handle existing monitor.
++      __ bind(object_has_monitor);
++      // The object's monitor m is unlocked iff m->owner == NULL,
++      // otherwise m->owner may contain a thread or a stack address.
++      //
++      // Try to CAS m->owner from NULL to current thread.
++      __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value));
++      __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq,
++                 Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected)
 +
-+#ifndef PRODUCT
-+void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
-+  assert_cond(st != NULL);
-+  st->print("BREAKPOINT");
-+}
-+#endif
++      // Store a non-null value into the box to avoid looking like a re-entrant
++      // lock. The fast-path monitor unlock code checks for
++      // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the
++      // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
++      __ mv(tmp, (address)markOopDesc::unused_mark());
++      __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
++    }
 +
-+void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-+  C2_MacroAssembler _masm(&cbuf);
-+  Assembler::CompressibleRegion cr(&_masm);
-+  __ ebreak();
-+}
++    __ bind(cont);
++  %}
 +
-+uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
-+  return MachNode::size(ra_);
-+}
++  // using cr flag to indicate the fast_unlock result: 0 for success; others failed.
++  enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) %{
++    MacroAssembler _masm(&cbuf);
++    Register flag = t1;
++    Register oop = as_Register($object$$reg);
++    Register box = as_Register($box$$reg);
++    Register disp_hdr = as_Register($tmp1$$reg);
++    Register tmp = as_Register($tmp2$$reg);
++    Label cont;
++    Label object_has_monitor;
 +
-+//=============================================================================
++    assert_different_registers(oop, box, tmp, disp_hdr, flag);
 +
-+#ifndef PRODUCT
-+  void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
-+    st->print("nop \t# %d bytes pad for loops and calls", _count);
-+  }
-+#endif
++    // Always do locking in runtime.
++    if (EmitSync & 0x01) {
++      __ mv(flag, 1);
++      return;
++    }
 +
-+  void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
-+    C2_MacroAssembler _masm(&cbuf);
-+    Assembler::CompressibleRegion cr(&_masm); // nops shall be 2-byte under RVC for alignment purposes.
-+    for (int i = 0; i < _count; i++) {
-+      __ nop();
++    if (UseBiasedLocking && !UseOptoBiasInlining) {
++      __ biased_locking_exit(oop, tmp, cont, flag);
 +    }
-+  }
 +
-+  uint MachNopNode::size(PhaseRegAlloc*) const {
-+    return _count * (UseRVC ? NativeInstruction::compressed_instruction_size : NativeInstruction::instruction_size);
-+  }
++    // Find the lock address and load the displaced header from the stack.
++    __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
 +
-+//=============================================================================
-+const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
++    // If the displaced header is 0, we have a recursive unlock.
++    __ mv(flag, disp_hdr);
++    __ beqz(disp_hdr, cont);
 +
-+int ConstantTable::calculate_table_base_offset() const {
-+  return 0;  // absolute addressing, no offset
-+}
++    // Handle existing monitor.
++    if ((EmitSync & 0x02) == 0) {
++      __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
++      __ andi(t0, tmp, markOopDesc::monitor_value);
++      __ bnez(t0, object_has_monitor);
++    }
 +
-+bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
-+void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
-+  ShouldNotReachHere();
-+}
++    // Check if it is still a light weight lock, this is true if we
++    // see the stack address of the basicLock in the markWord of the
++    // object.
 +
-+void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
-+  // Empty encoding
-+}
++    __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed,
++               Assembler::rl, /*result*/tmp);
++    __ xorr(flag, box, tmp); // box == tmp if cas succeeds
++    __ j(cont);
 +
-+uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
-+  return 0;
-+}
++    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
 +
-+#ifndef PRODUCT
-+void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
-+  assert_cond(st != NULL);
-+  st->print("-- \t// MachConstantBaseNode (empty encoding)");
-+}
-+#endif
++    // Handle existing monitor.
++    if ((EmitSync & 0x02) == 0) {
++      __ bind(object_has_monitor);
++      STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX);
++      __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor
++      __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
++      __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
++      __ xorr(flag, flag, xthread); // Will be 0 if we are the owner.
++      __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions
++      __ bnez(flag, cont);
 +
-+#ifndef PRODUCT
-+void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
-+  assert_cond(st != NULL && ra_ != NULL);
-+  Compile* C = ra_->C;
++      __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
++      __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
++      __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0.
++      __ bnez(flag, cont);
++      // need a release store here
++      __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
++      __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
++      __ sd(zr, Address(tmp)); // set unowned
++    }
 +
-+  int framesize = C->output()->frame_slots() << LogBytesPerInt;
++    __ bind(cont);
++  %}
 +
-+  if (C->output()->need_stack_bang(framesize)) {
-+    st->print("# stack bang size=%d\n\t", framesize);
-+  }
++  // arithmetic encodings
 +
-+  st->print("sd  fp, [sp, #%d]\n\t", - 2 * wordSize);
-+  st->print("sd  ra, [sp, #%d]\n\t", - wordSize);
-+  if (PreserveFramePointer) { st->print("sub  fp, sp, #%d\n\t", 2 * wordSize); }
-+  st->print("sub sp, sp, #%d\n\t", framesize);
++  enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
++    MacroAssembler _masm(&cbuf);
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++    __ corrected_idivl(dst_reg, src1_reg, src2_reg, false);
++  %}
 +
-+  if (C->stub_function() == NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
-+    st->print("ld  t0, [guard]\n\t");
-+    st->print("membar LoadLoad\n\t");
-+    st->print("ld  t1, [xthread, #thread_disarmed_offset]\n\t");
-+    st->print("beq t0, t1, skip\n\t");
-+    st->print("jalr #nmethod_entry_barrier_stub\n\t");
-+    st->print("j skip\n\t");
-+    st->print("guard: int\n\t");
-+    st->print("skip:\n\t");
-+  }
-+}
-+#endif
++  enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
++    MacroAssembler _masm(&cbuf);
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++    __ corrected_idivq(dst_reg, src1_reg, src2_reg, false);
++  %}
 +
-+void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-+  assert_cond(ra_ != NULL);
-+  Compile* C = ra_->C;
-+  C2_MacroAssembler _masm(&cbuf);
++  enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
++    MacroAssembler _masm(&cbuf);
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++    __ corrected_idivl(dst_reg, src1_reg, src2_reg, true);
++  %}
 +
-+  // n.b. frame size includes space for return pc and fp
-+  const int framesize = C->output()->frame_size_in_bytes();
++  enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
++    MacroAssembler _masm(&cbuf);
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++    __ corrected_idivq(dst_reg, src1_reg, src2_reg, true);
++  %}
 +
-+  // insert a nop at the start of the prolog so we can patch in a
-+  // branch if we need to invalidate the method later
-+  __ nop();
++  enc_class riscv_enc_tail_call(iRegP jump_target) %{
++    MacroAssembler _masm(&cbuf);
++    Assembler::CompressibleRegion cr(&_masm);
++    Register target_reg = as_Register($jump_target$$reg);
++    __ jr(target_reg);
++  %}
 +
-+  assert_cond(C != NULL);
++  enc_class riscv_enc_tail_jmp(iRegP jump_target) %{
++    MacroAssembler _masm(&cbuf);
++    Assembler::CompressibleRegion cr(&_masm);
++    Register target_reg = as_Register($jump_target$$reg);
++    // exception oop should be in x10
++    // ret addr has been popped into ra
++    // callee expects it in x13
++    __ mv(x13, ra);
++    __ jr(target_reg);
++  %}
 +
-+  if (C->clinit_barrier_on_entry()) {
-+    assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
++  enc_class riscv_enc_rethrow() %{
++    MacroAssembler _masm(&cbuf);
++    __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
++  %}
 +
-+    Label L_skip_barrier;
++  enc_class riscv_enc_ret() %{
++    MacroAssembler _masm(&cbuf);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ ret();
++  %}
 +
-+    __ mov_metadata(t1, C->method()->holder()->constant_encoding());
-+    __ clinit_barrier(t1, t0, &L_skip_barrier);
-+    __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
-+    __ bind(L_skip_barrier);
-+  }
++%}
 +
-+  int bangsize = C->output()->bang_size_in_bytes();
-+  if (C->output()->need_stack_bang(bangsize)) {
-+    __ generate_stack_overflow_check(bangsize);
-+  }
++//----------FRAME--------------------------------------------------------------
++// Definition of frame structure and management information.
++//
++//  S T A C K   L A Y O U T    Allocators stack-slot number
++//                             |   (to get allocators register number
++//  G  Owned by    |        |  v    add OptoReg::stack0())
++//  r   CALLER     |        |
++//  o     |        +--------+      pad to even-align allocators stack-slot
++//  w     V        |  pad0  |        numbers; owned by CALLER
++//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
++//  h     ^        |   in   |  5
++//        |        |  args  |  4   Holes in incoming args owned by SELF
++//  |     |        |        |  3
++//  |     |        +--------+
++//  V     |        | old out|      Empty on Intel, window on Sparc
++//        |    old |preserve|      Must be even aligned.
++//        |     SP-+--------+----> Matcher::_old_SP, even aligned
++//        |        |   in   |  3   area for Intel ret address
++//     Owned by    |preserve|      Empty on Sparc.
++//       SELF      +--------+
++//        |        |  pad2  |  2   pad to align old SP
++//        |        +--------+  1
++//        |        | locks  |  0
++//        |        +--------+----> OptoReg::stack0(), even aligned
++//        |        |  pad1  | 11   pad to align new SP
++//        |        +--------+
++//        |        |        | 10
++//        |        | spills |  9   spills
++//        V        |        |  8   (pad0 slot for callee)
++//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
++//        ^        |  out   |  7
++//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
++//     Owned by    +--------+
++//      CALLEE     | new out|  6   Empty on Intel, window on Sparc
++//        |    new |preserve|      Must be even-aligned.
++//        |     SP-+--------+----> Matcher::_new_SP, even aligned
++//        |        |        |
++//
++// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
++//         known from SELF's arguments and the Java calling convention.
++//         Region 6-7 is determined per call site.
++// Note 2: If the calling convention leaves holes in the incoming argument
++//         area, those holes are owned by SELF.  Holes in the outgoing area
++//         are owned by the CALLEE.  Holes should not be nessecary in the
++//         incoming area, as the Java calling convention is completely under
++//         the control of the AD file.  Doubles can be sorted and packed to
++//         avoid holes.  Holes in the outgoing arguments may be nessecary for
++//         varargs C calling conventions.
++// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
++//         even aligned with pad0 as needed.
++//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
++//           (the latter is true on Intel but is it false on RISCV?)
++//         region 6-11 is even aligned; it may be padded out more so that
++//         the region from SP to FP meets the minimum stack alignment.
++// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
++//         alignment.  Region 11, pad1, may be dynamically extended so that
++//         SP meets the minimum alignment.
 +
-+  __ build_frame(framesize);
++frame %{
++  // What direction does stack grow in (assumed to be same for C & Java)
++  stack_direction(TOWARDS_LOW);
 +
-+  if (C->stub_function() == NULL) {
-+    BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+    bs->nmethod_entry_barrier(&_masm);
-+  }
++  // These three registers define part of the calling convention
++  // between compiled code and the interpreter.
 +
-+  if (VerifyStackAtCalls) {
-+    Unimplemented();
-+  }
++  // Inline Cache Register or methodOop for I2C.
++  inline_cache_reg(R31);
 +
-+  C->output()->set_frame_complete(cbuf.insts_size());
++  // Method Oop Register when calling interpreter.
++  interpreter_method_oop_reg(R31);
 +
-+  if (C->has_mach_constant_base_node()) {
-+    // NOTE: We set the table base offset here because users might be
-+    // emitted before MachConstantBaseNode.
-+    ConstantTable& constant_table = C->output()->constant_table();
-+    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
-+  }
-+}
++  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
++  cisc_spilling_operand_name(indOffset);
 +
-+uint MachPrologNode::size(PhaseRegAlloc* ra_) const
-+{
-+  assert_cond(ra_ != NULL);
-+  return MachNode::size(ra_); // too many variables; just compute it
-+                              // the hard way
-+}
++  // Number of stack slots consumed by locking an object
++  // generate Compile::sync_stack_slots
++  // VMRegImpl::slots_per_word = wordSize / stack_slot_size = 8 / 4 = 2
++  sync_stack_slots(1 * VMRegImpl::slots_per_word);
 +
-+int MachPrologNode::reloc() const
-+{
-+  return 0;
-+}
++  // Compiled code's Frame Pointer
++  frame_pointer(R2);
 +
-+//=============================================================================
++  // Interpreter stores its frame pointer in a register which is
++  // stored to the stack by I2CAdaptors.
++  // I2CAdaptors convert from interpreted java to compiled java.
++  interpreter_frame_pointer(R8);
 +
-+#ifndef PRODUCT
-+void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
-+  assert_cond(st != NULL && ra_ != NULL);
-+  Compile* C = ra_->C;
-+  assert_cond(C != NULL);
-+  int framesize = C->output()->frame_size_in_bytes();
++  // Stack alignment requirement
++  stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 +
-+  st->print("# pop frame %d\n\t", framesize);
++  // Number of stack slots between incoming argument block and the start of
++  // a new frame.  The PROLOG must add this many slots to the stack.  The
++  // EPILOG must remove this many slots. RISC-V needs two slots for
++  // return address and fp.
++  in_preserve_stack_slots(2 * VMRegImpl::slots_per_word);
 +
-+  if (framesize == 0) {
-+    st->print("ld  ra, [sp,#%d]\n\t", (2 * wordSize));
-+    st->print("ld  fp, [sp,#%d]\n\t", (3 * wordSize));
-+    st->print("add sp, sp, #%d\n\t", (2 * wordSize));
-+  } else {
-+    st->print("add  sp, sp, #%d\n\t", framesize);
-+    st->print("ld  ra, [sp,#%d]\n\t", - 2 * wordSize);
-+    st->print("ld  fp, [sp,#%d]\n\t", - wordSize);
-+  }
++  // Number of outgoing stack slots killed above the out_preserve_stack_slots
++  // for calls to C.  Supports the var-args backing area for register parms.
++  varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes / BytesPerInt);
 +
-+  if (do_polling() && C->is_method_compilation()) {
-+    st->print("# test polling word\n\t");
-+    st->print("ld t0, [xthread,#%d]\n\t", in_bytes(JavaThread::polling_word_offset()));
-+    st->print("bgtu sp, t0, #slow_path");
-+  }
-+}
-+#endif
++  // The after-PROLOG location of the return address.  Location of
++  // return address specifies a type (REG or STACK) and a number
++  // representing the register number (i.e. - use a register name) or
++  // stack slot.
++  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
++  // Otherwise, it is above the locks and verification slot and alignment word
++  // TODO this may well be correct but need to check why that - 2 is there
++  // ppc port uses 0 but we definitely need to allow for fixed_slots
++  // which folds in the space used for monitors
++  return_addr(STACK - 2 +
++              align_up((Compile::current()->in_preserve_stack_slots() +
++                        Compile::current()->fixed_slots()),
++                       stack_alignment_in_slots()));
 +
-+void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-+  assert_cond(ra_ != NULL);
-+  Compile* C = ra_->C;
-+  C2_MacroAssembler _masm(&cbuf);
-+  assert_cond(C != NULL);
-+  int framesize = C->output()->frame_size_in_bytes();
++  // Body of function which returns an integer array locating
++  // arguments either in registers or in stack slots.  Passed an array
++  // of ideal registers called "sig" and a "length" count.  Stack-slot
++  // offsets are based on outgoing arguments, i.e. a CALLER setting up
++  // arguments for a CALLEE.  Incoming stack arguments are
++  // automatically biased by the preserve_stack_slots field above.
 +
-+  __ remove_frame(framesize);
++  calling_convention
++  %{
++    // No difference between ingoing/outgoing just pass false
++    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
++  %}
 +
-+  if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
-+    __ reserved_stack_check();
-+  }
++  c_calling_convention
++  %{
++    // This is obviously always outgoing
++    (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
++  %}
 +
-+  if (do_polling() && C->is_method_compilation()) {
-+    Label dummy_label;
-+    Label* code_stub = &dummy_label;
-+    if (!C->output()->in_scratch_emit_size()) {
-+      code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
-+    }
-+    __ relocate(relocInfo::poll_return_type);
-+    __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
-+  }
-+}
++  // Location of compiled Java return values.  Same as C for now.
++  return_value
++  %{
++    assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
++           "only return normal values");
 +
-+uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
-+  assert_cond(ra_ != NULL);
-+  // Variable size. Determine dynamically.
-+  return MachNode::size(ra_);
-+}
++    static const int lo[Op_RegL + 1] = { // enum name
++      0,                                 // Op_Node
++      0,                                 // Op_Set
++      R10_num,                           // Op_RegN
++      R10_num,                           // Op_RegI
++      R10_num,                           // Op_RegP
++      F10_num,                           // Op_RegF
++      F10_num,                           // Op_RegD
++      R10_num                            // Op_RegL
++    };
 +
-+int MachEpilogNode::reloc() const {
-+  // Return number of relocatable values contained in this instruction.
-+  return 1; // 1 for polling page.
-+}
-+const Pipeline * MachEpilogNode::pipeline() const {
-+  return MachNode::pipeline_class();
-+}
++    static const int hi[Op_RegL + 1] = { // enum name
++      0,                                 // Op_Node
++      0,                                 // Op_Set
++      OptoReg::Bad,                      // Op_RegN
++      OptoReg::Bad,                      // Op_RegI
++      R10_H_num,                         // Op_RegP
++      OptoReg::Bad,                      // Op_RegF
++      F10_H_num,                         // Op_RegD
++      R10_H_num                          // Op_RegL
++    };
 +
-+//=============================================================================
++    return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
++  %}
++%}
 +
-+// Figure out which register class each belongs in: rc_int, rc_float or
-+// rc_stack.
-+enum RC { rc_bad, rc_int, rc_float, rc_vector, rc_stack };
++//----------ATTRIBUTES---------------------------------------------------------
++//----------Operand Attributes-------------------------------------------------
++op_attrib op_cost(1);        // Required cost attribute
 +
-+static enum RC rc_class(OptoReg::Name reg) {
++//----------Instruction Attributes---------------------------------------------
++ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute
++ins_attrib ins_size(32);        // Required size attribute (in bits)
++ins_attrib ins_short_branch(0); // Required flag: is this instruction
++                                // a non-matching short branch variant
++                                // of some long branch?
++ins_attrib ins_alignment(4);    // Required alignment attribute (must
++                                // be a power of 2) specifies the
++                                // alignment that some part of the
++                                // instruction (not necessarily the
++                                // start) requires.  If > 1, a
++                                // compute_padding() function must be
++                                // provided for the instruction
 +
-+  if (reg == OptoReg::Bad) {
-+    return rc_bad;
-+  }
++//----------OPERANDS-----------------------------------------------------------
++// Operand definitions must precede instruction definitions for correct parsing
++// in the ADLC because operands constitute user defined types which are used in
++// instruction definitions.
 +
-+  // we have 30 int registers * 2 halves
-+  // (t0 and t1 are omitted)
-+  int slots_of_int_registers = RegisterImpl::max_slots_per_register * (RegisterImpl::number_of_registers - 2);
-+  if (reg < slots_of_int_registers) {
-+    return rc_int;
-+  }
++//----------Simple Operands----------------------------------------------------
 +
-+  // we have 32 float register * 2 halves
-+  int slots_of_float_registers = FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers;
-+  if (reg < slots_of_int_registers + slots_of_float_registers) {
-+    return rc_float;
-+  }
++// Integer operands 32 bit
++// 32 bit immediate
++operand immI()
++%{
++  match(ConI);
 +
-+  // we have 32 vector register * 4 halves
-+  int slots_of_vector_registers = VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers;
-+  if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_vector_registers) {
-+    return rc_vector;
-+  }
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  // Between vector regs & stack is the flags regs.
-+  assert(OptoReg::is_stack(reg), "blow up if spilling flags");
++// 32 bit zero
++operand immI0()
++%{
++  predicate(n->get_int() == 0);
++  match(ConI);
 +
-+  return rc_stack;
-+}
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
-+  assert_cond(ra_ != NULL);
-+  Compile* C = ra_->C;
++// 32 bit unit increment
++operand immI_1()
++%{
++  predicate(n->get_int() == 1);
++  match(ConI);
 +
-+  // Get registers to move.
-+  OptoReg::Name src_hi = ra_->get_reg_second(in(1));
-+  OptoReg::Name src_lo = ra_->get_reg_first(in(1));
-+  OptoReg::Name dst_hi = ra_->get_reg_second(this);
-+  OptoReg::Name dst_lo = ra_->get_reg_first(this);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  enum RC src_hi_rc = rc_class(src_hi);
-+  enum RC src_lo_rc = rc_class(src_lo);
-+  enum RC dst_hi_rc = rc_class(dst_hi);
-+  enum RC dst_lo_rc = rc_class(dst_lo);
++// 32 bit unit decrement
++operand immI_M1()
++%{
++  predicate(n->get_int() == -1);
++  match(ConI);
 +
-+  assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  if (src_hi != OptoReg::Bad) {
-+    assert((src_lo & 1) == 0 && src_lo + 1 == src_hi &&
-+           (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi,
-+           "expected aligned-adjacent pairs");
-+  }
++// Unsigned Integer Immediate:  6-bit int, greater than 32
++operand uimmI6_ge32() %{
++  predicate(((unsigned int)(n->get_int()) < 64) && (n->get_int() >= 32));
++  match(ConI);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  if (src_lo == dst_lo && src_hi == dst_hi) {
-+    return 0;            // Self copy, no move.
-+  }
++operand immI_le_4()
++%{
++  predicate(n->get_int() <= 4);
++  match(ConI);
 +
-+  bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
-+              (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
-+  int src_offset = ra_->reg2offset(src_lo);
-+  int dst_offset = ra_->reg2offset(dst_lo);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  if (bottom_type()->isa_vect() != NULL) {
-+    uint ireg = ideal_reg();
-+    if (ireg == Op_VecA && cbuf) {
-+      C2_MacroAssembler _masm(cbuf);
-+      Assembler::CompressibleRegion cr(&_masm);
-+      int vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
-+      if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
-+        // stack to stack
-+        __ spill_copy_vector_stack_to_stack(src_offset, dst_offset,
-+                                            vector_reg_size_in_bytes);
-+      } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_stack) {
-+        // vpr to stack
-+        __ spill(as_VectorRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo));
-+      } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vector) {
-+        // stack to vpr
-+        __ unspill(as_VectorRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo));
-+      } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_vector) {
-+        // vpr to vpr
-+        __ vmv1r_v(as_VectorRegister(Matcher::_regEncode[dst_lo]), as_VectorRegister(Matcher::_regEncode[src_lo]));
-+      } else {
-+        ShouldNotReachHere();
-+      }
-+    }
-+  } else if (cbuf != NULL) {
-+    C2_MacroAssembler _masm(cbuf);
-+    Assembler::CompressibleRegion cr(&_masm);
-+    switch (src_lo_rc) {
-+      case rc_int:
-+        if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
-+          if (!is64 && this->ideal_reg() != Op_RegI) { // zero extended for narrow oop or klass
-+            __ zero_extend(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo]), 32);
-+          } else {
-+            __ mv(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo]));
-+          }
-+        } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
-+          if (is64) {
-+            __ fmv_d_x(as_FloatRegister(Matcher::_regEncode[dst_lo]),
-+                       as_Register(Matcher::_regEncode[src_lo]));
-+          } else {
-+            __ fmv_w_x(as_FloatRegister(Matcher::_regEncode[dst_lo]),
-+                       as_Register(Matcher::_regEncode[src_lo]));
-+          }
-+        } else {                    // gpr --> stack spill
-+          assert(dst_lo_rc == rc_stack, "spill to bad register class");
-+          __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
-+        }
-+        break;
-+      case rc_float:
-+        if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
-+          if (is64) {
-+            __ fmv_x_d(as_Register(Matcher::_regEncode[dst_lo]),
-+                       as_FloatRegister(Matcher::_regEncode[src_lo]));
-+          } else {
-+            __ fmv_x_w(as_Register(Matcher::_regEncode[dst_lo]),
-+                       as_FloatRegister(Matcher::_regEncode[src_lo]));
-+          }
-+        } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
-+          if (is64) {
-+            __ fmv_d(as_FloatRegister(Matcher::_regEncode[dst_lo]),
-+                     as_FloatRegister(Matcher::_regEncode[src_lo]));
-+          } else {
-+            __ fmv_s(as_FloatRegister(Matcher::_regEncode[dst_lo]),
-+                     as_FloatRegister(Matcher::_regEncode[src_lo]));
-+          }
-+        } else {                    // fpr --> stack spill
-+          assert(dst_lo_rc == rc_stack, "spill to bad register class");
-+          __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
-+                   is64, dst_offset);
-+        }
-+        break;
-+      case rc_stack:
-+        if (dst_lo_rc == rc_int) {  // stack --> gpr load
-+          if (this->ideal_reg() == Op_RegI) {
-+            __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
-+          } else { // // zero extended for narrow oop or klass
-+            __ unspillu(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
-+          }
-+        } else if (dst_lo_rc == rc_float) { // stack --> fpr load
-+          __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
-+                     is64, src_offset);
-+        } else {                    // stack --> stack copy
-+          assert(dst_lo_rc == rc_stack, "spill to bad register class");
-+          if (this->ideal_reg() == Op_RegI) {
-+            __ unspill(t0, is64, src_offset);
-+          } else { // zero extended for narrow oop or klass
-+            __ unspillu(t0, is64, src_offset);
-+          }
-+          __ spill(t0, is64, dst_offset);
-+        }
-+        break;
-+      default:
-+        ShouldNotReachHere();
-+    }
-+  }
++operand immI_16()
++%{
++  predicate(n->get_int() == 16);
++  match(ConI);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  if (st != NULL) {
-+    st->print("spill ");
-+    if (src_lo_rc == rc_stack) {
-+      st->print("[sp, #%d] -> ", src_offset);
-+    } else {
-+      st->print("%s -> ", Matcher::regName[src_lo]);
-+    }
-+    if (dst_lo_rc == rc_stack) {
-+      st->print("[sp, #%d]", dst_offset);
-+    } else {
-+      st->print("%s", Matcher::regName[dst_lo]);
-+    }
-+    if (bottom_type()->isa_vect() != NULL) {
-+      int vsize = 0;
-+      if (ideal_reg() == Op_VecA) {
-+        vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8;
-+      } else {
-+        ShouldNotReachHere();
-+      }
-+      st->print("\t# vector spill size = %d", vsize);
-+    } else {
-+      st->print("\t# spill size = %d", is64 ? 64 : 32);
-+    }
-+  }
++operand immI_24()
++%{
++  predicate(n->get_int() == 24);
++  match(ConI);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  return 0;
-+}
++operand immI_31()
++%{
++  predicate(n->get_int() == 31);
++  match(ConI);
 +
-+#ifndef PRODUCT
-+void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
-+  if (ra_ == NULL) {
-+    st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
-+  } else {
-+    implementation(NULL, ra_, false, st);
-+  }
-+}
-+#endif
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-+  implementation(&cbuf, ra_, false, NULL);
-+}
++operand immI_63()
++%{
++  predicate(n->get_int() == 63);
++  match(ConI);
 +
-+uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
-+  return MachNode::size(ra_);
-+}
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+//=============================================================================
++// 32 bit integer valid for add immediate
++operand immIAdd()
++%{
++  predicate(Assembler::operand_valid_for_add_immediate((int64_t)n->get_int()));
++  match(ConI);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+#ifndef PRODUCT
-+void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
-+  assert_cond(ra_ != NULL && st != NULL);
-+  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
-+  int reg = ra_->get_reg_first(this);
-+  st->print("add %s, sp, #%d\t# box lock",
-+            Matcher::regName[reg], offset);
-+}
-+#endif
++// 32 bit integer valid for sub immediate
++operand immISub()
++%{
++  predicate(Assembler::operand_valid_for_add_immediate(-(int64_t)n->get_int()));
++  match(ConI);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-+  C2_MacroAssembler _masm(&cbuf);
++// 5 bit signed value.
++operand immI5()
++%{
++  predicate(n->get_int() <= 15 && n->get_int() >= -16);
++  match(ConI);
 +
-+  assert_cond(ra_ != NULL);
-+  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
-+  int reg    = ra_->get_encode(this);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  if (is_imm_in_range(offset, 12, 0)) {
-+    __ addi(as_Register(reg), sp, offset);
-+  } else if (is_imm_in_range(offset, 32, 0)) {
-+    __ li32(t0, offset);
-+    __ add(as_Register(reg), sp, t0);
-+  } else {
-+    ShouldNotReachHere();
-+  }
-+}
++// 5 bit signed value (simm5)
++operand immL5()
++%{
++  predicate(n->get_long() <= 15 && n->get_long() >= -16);
++  match(ConL);
 +
-+uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
-+  // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
-+  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  if (is_imm_in_range(offset, 12, 0)) {
-+    return NativeInstruction::instruction_size;
-+  } else {
-+    return 3 * NativeInstruction::instruction_size; // lui + addiw + add;
-+  }
-+}
++// Integer operands 64 bit
++// 64 bit immediate
++operand immL()
++%{
++  match(ConL);
 +
-+//=============================================================================
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+#ifndef PRODUCT
-+void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
-+{
-+  assert_cond(st != NULL);
-+  st->print_cr("# MachUEPNode");
-+  if (UseCompressedClassPointers) {
-+    st->print_cr("\tlwu t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass");
-+    if (CompressedKlassPointers::shift() != 0) {
-+      st->print_cr("\tdecode_klass_not_null t0, t0");
-+    }
-+  } else {
-+    st->print_cr("\tld t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass");
-+  }
-+  st->print_cr("\tbeq t0, t1, ic_hit");
-+  st->print_cr("\tj, SharedRuntime::_ic_miss_stub\t # Inline cache check");
-+  st->print_cr("\tic_hit:");
-+}
-+#endif
++// 64 bit zero
++operand immL0()
++%{
++  predicate(n->get_long() == 0);
++  match(ConL);
 +
-+void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
-+{
-+  // This is the unverified entry point.
-+  C2_MacroAssembler _masm(&cbuf);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  Label skip;
-+  __ cmp_klass(j_rarg0, t1, t0, skip);
-+  __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
-+  __ bind(skip);
-+}
++// Pointer operands
++// Pointer Immediate
++operand immP()
++%{
++  match(ConP);
 +
-+uint MachUEPNode::size(PhaseRegAlloc* ra_) const
-+{
-+  assert_cond(ra_ != NULL);
-+  return MachNode::size(ra_);
-+}
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+// REQUIRED EMIT CODE
++// NULL Pointer Immediate
++operand immP0()
++%{
++  predicate(n->get_ptr() == 0);
++  match(ConP);
 +
-+//=============================================================================
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+// Emit exception handler code.
-+int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
-+{
-+  // la_patchable t0, #exception_blob_entry_point
-+  // jr (offset)t0
-+  // or
-+  // j #exception_blob_entry_point
-+  // Note that the code buffer's insts_mark is always relative to insts.
-+  // That's why we must use the macroassembler to generate a handler.
-+  C2_MacroAssembler _masm(&cbuf);
-+  address base = __ start_a_stub(size_exception_handler());
-+  if (base == NULL) {
-+    ciEnv::current()->record_failure("CodeCache is full");
-+    return 0;  // CodeBuffer::expand failed
-+  }
-+  int offset = __ offset();
-+  __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
-+  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
-+  __ end_a_stub();
-+  return offset;
-+}
++// Pointer Immediate One
++// this is used in object initialization (initial object header)
++operand immP_1()
++%{
++  predicate(n->get_ptr() == 1);
++  match(ConP);
 +
-+// Emit deopt handler code.
-+int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
-+{
-+  // Note that the code buffer's insts_mark is always relative to insts.
-+  // That's why we must use the macroassembler to generate a handler.
-+  C2_MacroAssembler _masm(&cbuf);
-+  address base = __ start_a_stub(size_deopt_handler());
-+  if (base == NULL) {
-+    ciEnv::current()->record_failure("CodeCache is full");
-+    return 0;  // CodeBuffer::expand failed
-+  }
-+  int offset = __ offset();
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  __ auipc(ra, 0);
-+  __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
++// Polling Page Pointer Immediate
++operand immPollPage()
++%{
++  predicate((address)n->get_ptr() == os::get_polling_page());
++  match(ConP);
 +
-+  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
-+  __ end_a_stub();
-+  return offset;
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+}
-+// REQUIRED MATCHER CODE
++// Card Table Byte Map Base
++operand immByteMapBase()
++%{
++  // Get base of card map
++  predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
++            (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
++  match(ConP);
 +
-+//=============================================================================
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+const bool Matcher::match_rule_supported(int opcode) {
-+  if (!has_match_rule(opcode)) {
-+    return false;
-+  }
++// Int Immediate: low 16-bit mask
++operand immI_16bits()
++%{
++  predicate(n->get_int() == 0xFFFF);
++  match(ConI);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  switch (opcode) {
-+    case Op_CacheWB:           // fall through
-+    case Op_CacheWBPreSync:    // fall through
-+    case Op_CacheWBPostSync:
-+      if (!VM_Version::supports_data_cache_line_flush()) {
-+        return false;
-+      }
-+      break;
++operand immIpowerOf2() %{
++  predicate(is_power_of_2((juint)(n->get_int())));
++  match(ConI);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+    case Op_StrCompressedCopy: // fall through
-+    case Op_StrInflatedCopy:   // fall through
-+    case Op_CountPositives:
-+      return UseRVV;
++// Long Immediate: low 32-bit mask
++operand immL_32bits()
++%{
++  predicate(n->get_long() == 0xFFFFFFFFL);
++  match(ConL);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+    case Op_EncodeISOArray:
-+      return UseRVV && SpecialEncodeISOArray;
++// 64 bit unit decrement
++operand immL_M1()
++%{
++  predicate(n->get_long() == -1);
++  match(ConL);
 +
-+    case Op_PopCountI:
-+    case Op_PopCountL:
-+      return UsePopCountInstruction;
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+    case Op_RotateRight:
-+    case Op_RotateLeft:
-+    case Op_CountLeadingZerosI:
-+    case Op_CountLeadingZerosL:
-+    case Op_CountTrailingZerosI:
-+    case Op_CountTrailingZerosL:
-+      return UseRVB;
-+  }
 +
-+  return true; // Per default match rules are supported.
-+}
++// 32 bit offset of pc in thread anchor
 +
-+// Identify extra cases that we might want to provide match rules for vector nodes and
-+// other intrinsics guarded with vector length (vlen) and element type (bt).
-+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
-+  if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
-+    return false;
-+  }
++operand immL_pc_off()
++%{
++  predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
++                             in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
++  match(ConL);
 +
-+  return op_vec_supported(opcode);
-+}
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
-+  return false;
-+}
++// 64 bit integer valid for add immediate
++operand immLAdd()
++%{
++  predicate(Assembler::operand_valid_for_add_immediate(n->get_long()));
++  match(ConL);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+const RegMask* Matcher::predicate_reg_mask(void) {
-+  return NULL;
-+}
++// 64 bit integer valid for sub immediate
++operand immLSub()
++%{
++  predicate(Assembler::operand_valid_for_add_immediate(-(n->get_long())));
++  match(ConL);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) {
-+  return NULL;
-+}
++// Narrow pointer operands
++// Narrow Pointer Immediate
++operand immN()
++%{
++  match(ConN);
 +
-+// Vector calling convention not yet implemented.
-+const bool Matcher::supports_vector_calling_convention(void) {
-+  return false;
-+}
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
-+  Unimplemented();
-+  return OptoRegPair(0, 0);
-+}
++// Narrow NULL Pointer Immediate
++operand immN0()
++%{
++  predicate(n->get_narrowcon() == 0);
++  match(ConN);
 +
-+// Is this branch offset short enough that a short branch can be used?
-+//
-+// NOTE: If the platform does not provide any short branch variants, then
-+//       this method should return false for offset 0.
-+// |---label(L1)-----|
-+// |-----------------|
-+// |-----------------|----------eq: float-------------------
-+// |-----------------| // far_cmpD_branch   |   cmpD_branch
-+// |------- ---------|    feq;              |      feq;
-+// |-far_cmpD_branch-|    beqz done;        |      bnez L;
-+// |-----------------|    j L;              |
-+// |-----------------|    bind(done);       |
-+// |-----------------|--------------------------------------
-+// |-----------------| // so shortBrSize = br_size - 4;
-+// |-----------------| // so offs = offset - shortBrSize + 4;
-+// |---label(L2)-----|
-+bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
-+  // The passed offset is relative to address of the branch.
-+  int shortBrSize = br_size - 4;
-+  int offs = offset - shortBrSize + 4;
-+  return (-4096 <= offs && offs < 4096);
-+}
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+// Vector width in bytes.
-+const int Matcher::vector_width_in_bytes(BasicType bt) {
-+  if (UseRVV) {
-+    // The MaxVectorSize should have been set by detecting RVV max vector register size when check UseRVV.
-+    // MaxVectorSize == VM_Version::_initial_vector_length
-+    return MaxVectorSize;
-+  }
-+  return 0;
-+}
++operand immNKlass()
++%{
++  match(ConNKlass);
 +
-+// Limits on vector size (number of elements) loaded into vector.
-+const int Matcher::max_vector_size(const BasicType bt) {
-+  return vector_width_in_bytes(bt) / type2aelembytes(bt);
-+}
-+const int Matcher::min_vector_size(const BasicType bt) {
-+  return max_vector_size(bt);
-+}
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+// Vector ideal reg.
-+const uint Matcher::vector_ideal_reg(int len) {
-+  assert(MaxVectorSize >= len, "");
-+  if (UseRVV) {
-+    return Op_VecA;
-+  }
++// Float and Double operands
++// Double Immediate
++operand immD()
++%{
++  match(ConD);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+  ShouldNotReachHere();
-+  return 0;
-+}
++// Double Immediate: +0.0d
++operand immD0()
++%{
++  predicate(jlong_cast(n->getd()) == 0);
++  match(ConD);
 +
-+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
-+  return Matcher::max_vector_size(bt);
-+}
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) {
-+  ShouldNotReachHere(); // generic vector operands not supported
-+  return NULL;
-+}
++// Float Immediate
++operand immF()
++%{
++  match(ConF);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+bool Matcher::is_reg2reg_move(MachNode* m) {
-+  ShouldNotReachHere(); // generic vector operands not supported
-+  return false;
-+}
++// Float Immediate: +0.0f.
++operand immF0()
++%{
++  predicate(jint_cast(n->getf()) == 0);
++  match(ConF);
 +
-+bool Matcher::is_generic_vector(MachOper* opnd) {
-+  ShouldNotReachHere(); // generic vector operands not supported
-+  return false;
-+}
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+// Return whether or not this register is ever used as an argument.
-+// This function is used on startup to build the trampoline stubs in
-+// generateOptoStub.  Registers not mentioned will be killed by the VM
-+// call in the trampoline, and arguments in those registers not be
-+// available to the callee.
-+bool Matcher::can_be_java_arg(int reg)
-+{
-+  return
-+    reg ==  R10_num || reg == R10_H_num ||
-+    reg ==  R11_num || reg == R11_H_num ||
-+    reg ==  R12_num || reg == R12_H_num ||
-+    reg ==  R13_num || reg == R13_H_num ||
-+    reg ==  R14_num || reg == R14_H_num ||
-+    reg ==  R15_num || reg == R15_H_num ||
-+    reg ==  R16_num || reg == R16_H_num ||
-+    reg ==  R17_num || reg == R17_H_num ||
-+    reg ==  F10_num || reg == F10_H_num ||
-+    reg ==  F11_num || reg == F11_H_num ||
-+    reg ==  F12_num || reg == F12_H_num ||
-+    reg ==  F13_num || reg == F13_H_num ||
-+    reg ==  F14_num || reg == F14_H_num ||
-+    reg ==  F15_num || reg == F15_H_num ||
-+    reg ==  F16_num || reg == F16_H_num ||
-+    reg ==  F17_num || reg == F17_H_num;
-+}
++operand immIOffset()
++%{
++  predicate(is_imm_in_range(n->get_int(), 12, 0));
++  match(ConI);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+bool Matcher::is_spillable_arg(int reg)
-+{
-+  return can_be_java_arg(reg);
-+}
++operand immLOffset()
++%{
++  predicate(is_imm_in_range(n->get_long(), 12, 0));
++  match(ConL);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+uint Matcher::int_pressure_limit()
-+{
-+  // A derived pointer is live at CallNode and then is flagged by RA
-+  // as a spilled LRG. Spilling heuristics(Spill-USE) explicitly skip
-+  // derived pointers and lastly fail to spill after reaching maximum
-+  // number of iterations. Lowering the default pressure threshold to
-+  // (_NO_SPECIAL_REG32_mask.Size() minus 1) forces CallNode to become
-+  // a high register pressure area of the code so that split_DEF can
-+  // generate DefinitionSpillCopy for the derived pointer.
-+  uint default_int_pressure_threshold = _NO_SPECIAL_REG32_mask.Size() - 1;
-+  if (!PreserveFramePointer) {
-+    // When PreserveFramePointer is off, frame pointer is allocatable,
-+    // but different from other SOC registers, it is excluded from
-+    // fatproj's mask because its save type is No-Save. Decrease 1 to
-+    // ensure high pressure at fatproj when PreserveFramePointer is off.
-+    // See check_pressure_at_fatproj().
-+    default_int_pressure_threshold--;
-+  }
-+  return (INTPRESSURE == -1) ? default_int_pressure_threshold : INTPRESSURE;
-+}
-+
-+uint Matcher::float_pressure_limit()
-+{
-+  // _FLOAT_REG_mask is generated by adlc from the float_reg register class.
-+  return (FLOATPRESSURE == -1) ? _FLOAT_REG_mask.Size() : FLOATPRESSURE;
-+}
++// Scale values
++operand immIScale()
++%{
++  predicate(1 <= n->get_int() && (n->get_int() <= 3));
++  match(ConI);
 +
-+bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
-+  return false;
-+}
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+RegMask Matcher::divI_proj_mask() {
-+  ShouldNotReachHere();
-+  return RegMask();
-+}
++// Integer 32 bit Register Operands
++operand iRegI()
++%{
++  constraint(ALLOC_IN_RC(any_reg32));
++  match(RegI);
++  match(iRegINoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+// Register for MODI projection of divmodI.
-+RegMask Matcher::modI_proj_mask() {
-+  ShouldNotReachHere();
-+  return RegMask();
-+}
++// Integer 32 bit Register not Special
++operand iRegINoSp()
++%{
++  constraint(ALLOC_IN_RC(no_special_reg32));
++  match(RegI);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+// Register for DIVL projection of divmodL.
-+RegMask Matcher::divL_proj_mask() {
-+  ShouldNotReachHere();
-+  return RegMask();
-+}
++// Register R10 only
++operand iRegI_R10()
++%{
++  constraint(ALLOC_IN_RC(int_r10_reg));
++  match(RegI);
++  match(iRegINoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+// Register for MODL projection of divmodL.
-+RegMask Matcher::modL_proj_mask() {
-+  ShouldNotReachHere();
-+  return RegMask();
-+}
++// Register R12 only
++operand iRegI_R12()
++%{
++  constraint(ALLOC_IN_RC(int_r12_reg));
++  match(RegI);
++  match(iRegINoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+const RegMask Matcher::method_handle_invoke_SP_save_mask() {
-+  return FP_REG_mask();
-+}
++// Register R13 only
++operand iRegI_R13()
++%{
++  constraint(ALLOC_IN_RC(int_r13_reg));
++  match(RegI);
++  match(iRegINoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
-+  assert_cond(addp != NULL);
-+  for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
-+    Node* u = addp->fast_out(i);
-+    if (u != NULL && u->is_Mem()) {
-+      int opsize = u->as_Mem()->memory_size();
-+      assert(opsize > 0, "unexpected memory operand size");
-+      if (u->as_Mem()->memory_size() != (1 << shift)) {
-+        return false;
-+      }
-+    }
-+  }
-+  return true;
-+}
++// Register R14 only
++operand iRegI_R14()
++%{
++  constraint(ALLOC_IN_RC(int_r14_reg));
++  match(RegI);
++  match(iRegINoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+// Should the Matcher clone input 'm' of node 'n'?
-+bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
-+  assert_cond(m != NULL);
-+  if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
-+    mstack.push(m, Visit);           // m = ShiftCntV
-+    return true;
-+  }
-+  return false;
-+}
++// Integer 64 bit Register Operands
++operand iRegL()
++%{
++  constraint(ALLOC_IN_RC(any_reg));
++  match(RegL);
++  match(iRegLNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+// Should the Matcher clone shifts on addressing modes, expecting them
-+// to be subsumed into complex addressing expressions or compute them
-+// into registers?
-+bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
-+  return clone_base_plus_offset_address(m, mstack, address_visited);
-+}
++// Integer 64 bit Register not Special
++operand iRegLNoSp()
++%{
++  constraint(ALLOC_IN_RC(no_special_reg));
++  match(RegL);
++  match(iRegL_R10);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
++// Long 64 bit Register R28 only
++operand iRegL_R28()
++%{
++  constraint(ALLOC_IN_RC(r28_reg));
++  match(RegL);
++  match(iRegLNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
++// Long 64 bit Register R29 only
++operand iRegL_R29()
++%{
++  constraint(ALLOC_IN_RC(r29_reg));
++  match(RegL);
++  match(iRegLNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
++// Long 64 bit Register R30 only
++operand iRegL_R30()
++%{
++  constraint(ALLOC_IN_RC(r30_reg));
++  match(RegL);
++  match(iRegLNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+//----------ENCODING BLOCK-----------------------------------------------------
-+// This block specifies the encoding classes used by the compiler to
-+// output byte streams.  Encoding classes are parameterized macros
-+// used by Machine Instruction Nodes in order to generate the bit
-+// encoding of the instruction.  Operands specify their base encoding
-+// interface with the interface keyword.  There are currently
-+// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
-+// COND_INTER.  REG_INTER causes an operand to generate a function
-+// which returns its register number when queried.  CONST_INTER causes
-+// an operand to generate a function which returns the value of the
-+// constant when queried.  MEMORY_INTER causes an operand to generate
-+// four functions which return the Base Register, the Index Register,
-+// the Scale Value, and the Offset Value of the operand when queried.
-+// COND_INTER causes an operand to generate six functions which return
-+// the encoding code (ie - encoding bits for the instruction)
-+// associated with each basic boolean condition for a conditional
-+// instruction.
-+//
-+// Instructions specify two basic values for encoding.  Again, a
-+// function is available to check if the constant displacement is an
-+// oop. They use the ins_encode keyword to specify their encoding
-+// classes (which must be a sequence of enc_class names, and their
-+// parameters, specified in the encoding block), and they use the
-+// opcode keyword to specify, in order, their primary, secondary, and
-+// tertiary opcode.  Only the opcode sections which a particular
-+// instruction needs for encoding need to be specified.
-+encode %{
-+  // BEGIN Non-volatile memory access
++// Pointer Register Operands
++// Pointer Register
++operand iRegP()
++%{
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(RegP);
++  match(iRegPNoSp);
++  match(iRegP_R10);
++  match(javaThread_RegP);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  enc_class riscv_enc_li_imm(iRegIorL dst, immIorL src) %{
-+    C2_MacroAssembler _masm(&cbuf);
-+    Assembler::CompressibleRegion cr(&_masm);
-+    int64_t con = (int64_t)$src$$constant;
-+    Register dst_reg = as_Register($dst$$reg);
-+    __ li(dst_reg, con);
-+  %}
++// Pointer 64 bit Register not Special
++operand iRegPNoSp()
++%{
++  constraint(ALLOC_IN_RC(no_special_ptr_reg));
++  match(RegP);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  enc_class riscv_enc_mov_p(iRegP dst, immP src) %{
-+    C2_MacroAssembler _masm(&cbuf);
-+    Register dst_reg = as_Register($dst$$reg);
-+    address con = (address)$src$$constant;
-+    if (con == NULL || con == (address)1) {
-+      ShouldNotReachHere();
-+    } else {
-+      relocInfo::relocType rtype = $src->constant_reloc();
-+      if (rtype == relocInfo::oop_type) {
-+        __ movoop(dst_reg, (jobject)con, /*immediate*/true);
-+      } else if (rtype == relocInfo::metadata_type) {
-+        __ mov_metadata(dst_reg, (Metadata*)con);
-+      } else {
-+        assert(rtype == relocInfo::none, "unexpected reloc type");
-+        __ li(dst_reg, $src$$constant);
-+      }
-+    }
-+  %}
++operand iRegP_R10()
++%{
++  constraint(ALLOC_IN_RC(r10_reg));
++  match(RegP);
++  // match(iRegP);
++  match(iRegPNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  enc_class riscv_enc_mov_p1(iRegP dst) %{
-+    C2_MacroAssembler _masm(&cbuf);
-+    Assembler::CompressibleRegion cr(&_masm);
-+    Register dst_reg = as_Register($dst$$reg);
-+    __ li(dst_reg, 1);
-+  %}
++// Pointer 64 bit Register R11 only
++operand iRegP_R11()
++%{
++  constraint(ALLOC_IN_RC(r11_reg));
++  match(RegP);
++  match(iRegPNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{
-+    C2_MacroAssembler _masm(&cbuf);
-+    __ load_byte_map_base($dst$$Register);
-+  %}
++operand iRegP_R12()
++%{
++  constraint(ALLOC_IN_RC(r12_reg));
++  match(RegP);
++  // match(iRegP);
++  match(iRegPNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  enc_class riscv_enc_mov_n(iRegN dst, immN src) %{
-+    C2_MacroAssembler _masm(&cbuf);
-+    Register dst_reg = as_Register($dst$$reg);
-+    address con = (address)$src$$constant;
-+    if (con == NULL) {
-+      ShouldNotReachHere();
-+    } else {
-+      relocInfo::relocType rtype = $src->constant_reloc();
-+      assert(rtype == relocInfo::oop_type, "unexpected reloc type");
-+      __ set_narrow_oop(dst_reg, (jobject)con);
-+    }
-+  %}
++// Pointer 64 bit Register R13 only
++operand iRegP_R13()
++%{
++  constraint(ALLOC_IN_RC(r13_reg));
++  match(RegP);
++  match(iRegPNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  enc_class riscv_enc_mov_zero(iRegNorP dst) %{
-+    C2_MacroAssembler _masm(&cbuf);
-+    Register dst_reg = as_Register($dst$$reg);
-+    __ mv(dst_reg, zr);
-+  %}
++operand iRegP_R14()
++%{
++  constraint(ALLOC_IN_RC(r14_reg));
++  match(RegP);
++  // match(iRegP);
++  match(iRegPNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  enc_class riscv_enc_mov_nk(iRegN dst, immNKlass src) %{
-+    C2_MacroAssembler _masm(&cbuf);
-+    Register dst_reg = as_Register($dst$$reg);
-+    address con = (address)$src$$constant;
-+    if (con == NULL) {
-+      ShouldNotReachHere();
-+    } else {
-+      relocInfo::relocType rtype = $src->constant_reloc();
-+      assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
-+      __ set_narrow_klass(dst_reg, (Klass *)con);
-+    }
++operand iRegP_R15()
++%{
++  constraint(ALLOC_IN_RC(r15_reg));
++  match(RegP);
++  // match(iRegP);
++  match(iRegPNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand iRegP_R16()
++%{
++  constraint(ALLOC_IN_RC(r16_reg));
++  match(RegP);
++  // match(iRegP);
++  match(iRegPNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Pointer 64 bit Register R28 only
++operand iRegP_R28()
++%{
++  constraint(ALLOC_IN_RC(r28_reg));
++  match(RegP);
++  match(iRegPNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Pointer Register Operands
++// Narrow Pointer Register
++operand iRegN()
++%{
++  constraint(ALLOC_IN_RC(any_reg32));
++  match(RegN);
++  match(iRegNNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Integer 64 bit Register not Special
++operand iRegNNoSp()
++%{
++  constraint(ALLOC_IN_RC(no_special_reg32));
++  match(RegN);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// heap base register -- used for encoding immN0
++operand iRegIHeapbase()
++%{
++  constraint(ALLOC_IN_RC(heapbase_reg));
++  match(RegI);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Long 64 bit Register R10 only
++operand iRegL_R10()
++%{
++  constraint(ALLOC_IN_RC(r10_reg));
++  match(RegL);
++  match(iRegLNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Float Register
++// Float register operands
++operand fRegF()
++%{
++  constraint(ALLOC_IN_RC(float_reg));
++  match(RegF);
++
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Double Register
++// Double register operands
++operand fRegD()
++%{
++  constraint(ALLOC_IN_RC(double_reg));
++  match(RegD);
++
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Java Thread Register
++operand javaThread_RegP(iRegP reg)
++%{
++  constraint(ALLOC_IN_RC(java_thread_reg)); // java_thread_reg
++  match(reg);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++//----------Memory Operands----------------------------------------------------
++// RISCV has only base_plus_offset and literal address mode, so no need to use
++// index and scale. Here set index as 0xffffffff and scale as 0x0.
++operand indirect(iRegP reg)
++%{
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(reg);
++  op_cost(0);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);
++    scale(0x0);
++    disp(0x0);
 +  %}
++%}
 +
-+  enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
-+    C2_MacroAssembler _masm(&cbuf);
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
-+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
-+               /*result as bool*/ true);
++operand indOffI(iRegP reg, immIOffset off)
++%{
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP reg off);
++  op_cost(0);
++  format %{ "[$reg, $off]" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);
++    scale(0x0);
++    disp($off);
 +  %}
++%}
 +
-+  enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
-+    C2_MacroAssembler _masm(&cbuf);
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
-+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
-+               /*result as bool*/ true);
++operand indOffL(iRegP reg, immLOffset off)
++%{
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP reg off);
++  op_cost(0);
++  format %{ "[$reg, $off]" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);
++    scale(0x0);
++    disp($off);
 +  %}
++%}
 +
-+  enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
-+    C2_MacroAssembler _masm(&cbuf);
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
-+               /*result as bool*/ true);
++operand indirectN(iRegN reg)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(DecodeN reg);
++  op_cost(0);
++  format %{ "[$reg]\t# narrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);
++    scale(0x0);
++    disp(0x0);
 +  %}
++%}
 +
-+  enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
-+    C2_MacroAssembler _masm(&cbuf);
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
-+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
-+               /*result as bool*/ true);
++operand indOffIN(iRegN reg, immIOffset off)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP (DecodeN reg) off);
++  op_cost(0);
++  format %{ "[$reg, $off]\t# narrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);
++    scale(0x0);
++    disp($off);
 +  %}
++%}
 +
-+  enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
-+    C2_MacroAssembler _masm(&cbuf);
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
-+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
-+               /*result as bool*/ true);
++operand indOffLN(iRegN reg, immLOffset off)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP (DecodeN reg) off);
++  op_cost(0);
++  format %{ "[$reg, $off]\t# narrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);
++    scale(0x0);
++    disp($off);
 +  %}
++%}
 +
-+  enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
-+    C2_MacroAssembler _masm(&cbuf);
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
-+               /*result as bool*/ true);
++// RISCV opto stubs need to write to the pc slot in the thread anchor
++operand thread_anchor_pc(javaThread_RegP reg, immL_pc_off off)
++%{
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP reg off);
++  op_cost(0);
++  format %{ "[$reg, $off]" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);
++    scale(0x0);
++    disp($off);
 +  %}
++%}
 +
-+  // compare and branch instruction encodings
 +
-+  enc_class riscv_enc_j(label lbl) %{
-+    C2_MacroAssembler _masm(&cbuf);
-+    Label* L = $lbl$$label;
-+    __ j(*L);
++//----------Special Memory Operands--------------------------------------------
++// Stack Slot Operand - This operand is used for loading and storing temporary
++//                      values on the stack where a match requires a value to
++//                      flow through memory.
++operand stackSlotI(sRegI reg)
++%{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  // match(RegI);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x02);  // RSP
++    index(0xffffffff);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
 +  %}
++%}
 +
-+  enc_class riscv_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{
-+    C2_MacroAssembler _masm(&cbuf);
-+    Label* L = $lbl$$label;
-+    switch ($cmp$$cmpcode) {
-+      case(BoolTest::ge):
-+        __ j(*L);
-+        break;
-+      case(BoolTest::lt):
-+        break;
-+      default:
-+        Unimplemented();
-+    }
++operand stackSlotF(sRegF reg)
++%{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  // match(RegF);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x02);  // RSP
++    index(0xffffffff);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
 +  %}
++%}
 +
-+  // call instruction encodings
-+
-+  enc_class riscv_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result) %{
-+    Register sub_reg = as_Register($sub$$reg);
-+    Register super_reg = as_Register($super$$reg);
-+    Register temp_reg = as_Register($temp$$reg);
-+    Register result_reg = as_Register($result$$reg);
-+    Register cr_reg = t1;
++operand stackSlotD(sRegD reg)
++%{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  // match(RegD);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x02);  // RSP
++    index(0xffffffff);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
 +
-+    Label miss;
-+    Label done;
-+    C2_MacroAssembler _masm(&cbuf);
-+    __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
-+                                     NULL, &miss);
-+    if ($primary) {
-+      __ mv(result_reg, zr);
-+    } else {
-+      __ mv(cr_reg, zr);
-+      __ j(done);
-+    }
++operand stackSlotL(sRegL reg)
++%{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  // match(RegL);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x02);  // RSP
++    index(0xffffffff);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
 +
-+    __ bind(miss);
-+    if (!$primary) {
-+      __ li(cr_reg, 1);
-+    }
++// Special operand allowing long args to int ops to be truncated for free
 +
-+    __ bind(done);
-+  %}
++operand iRegL2I(iRegL reg) %{
 +
-+  enc_class riscv_enc_java_static_call(method meth) %{
-+    C2_MacroAssembler _masm(&cbuf);
++  op_cost(0);
 +
-+    address addr = (address)$meth$$method;
-+    address call = NULL;
-+    assert_cond(addr != NULL);
-+    if (!_method) {
-+      // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
-+      call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
-+      if (call == NULL) {
-+        ciEnv::current()->record_failure("CodeCache is full");
-+        return;
-+      }
-+    } else {
-+      int method_index = resolved_method_index(cbuf);
-+      RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
-+                                                  : static_call_Relocation::spec(method_index);
-+      call = __ trampoline_call(Address(addr, rspec), &cbuf);
-+      if (call == NULL) {
-+        ciEnv::current()->record_failure("CodeCache is full");
-+        return;
-+      }
++  match(ConvL2I reg);
 +
-+      // Emit stub for static call
-+      address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
-+      if (stub == NULL) {
-+        ciEnv::current()->record_failure("CodeCache is full");
-+        return;
-+      }
-+    }
-+  %}
++  format %{ "l2i($reg)" %}
 +
-+  enc_class riscv_enc_java_dynamic_call(method meth) %{
-+    C2_MacroAssembler _masm(&cbuf);
-+    int method_index = resolved_method_index(cbuf);
-+    address call = __ ic_call((address)$meth$$method, method_index);
-+    if (call == NULL) {
-+      ciEnv::current()->record_failure("CodeCache is full");
-+      return;
-+    }
-+  %}
++  interface(REG_INTER)
++%}
 +
-+  enc_class riscv_enc_call_epilog() %{
-+    C2_MacroAssembler _masm(&cbuf);
-+    if (VerifyStackAtCalls) {
-+      // Check that stack depth is unchanged: find majik cookie on stack
-+      __ call_Unimplemented();
-+    }
-+  %}
 +
-+  enc_class riscv_enc_java_to_runtime(method meth) %{
-+    C2_MacroAssembler _masm(&cbuf);
++// Comparison Operands
++// NOTE: Label is a predefined operand which should not be redefined in
++//       the AD file. It is generically handled within the ADLC.
 +
-+    // some calls to generated routines (arraycopy code) are scheduled
-+    // by C2 as runtime calls. if so we can call them using a jr (they
-+    // will be in a reachable segment) otherwise we have to use a jalr
-+    // which loads the absolute address into a register.
-+    address entry = (address)$meth$$method;
-+    CodeBlob *cb = CodeCache::find_blob(entry);
-+    if (cb != NULL) {
-+      address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
-+      if (call == NULL) {
-+        ciEnv::current()->record_failure("CodeCache is full");
-+        return;
-+      }
-+    } else {
-+      Label retaddr;
-+      __ la(t1, retaddr);
-+      __ la(t0, RuntimeAddress(entry));
-+      // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
-+      __ addi(sp, sp, -2 * wordSize);
-+      __ sd(t1, Address(sp, wordSize));
-+      __ jalr(t0);
-+      __ bind(retaddr);
-+      __ addi(sp, sp, 2 * wordSize);
-+    }
-+  %}
-+
-+  // using the cr register as the bool result: 0 for success; others failed.
-+  enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{
-+    C2_MacroAssembler _masm(&cbuf);
-+    Register flag = t1;
-+    Register oop = as_Register($object$$reg);
-+    Register box = as_Register($box$$reg);
-+    Register disp_hdr = as_Register($tmp1$$reg);
-+    Register tmp = as_Register($tmp2$$reg);
-+    Label cont;
-+    Label object_has_monitor;
-+
-+    assert_different_registers(oop, box, tmp, disp_hdr, t0);
-+
-+    // Load markWord from object into displaced_header.
-+    __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
-+
-+    if (DiagnoseSyncOnValueBasedClasses != 0) {
-+      __ load_klass(flag, oop);
-+      __ lwu(flag, Address(flag, Klass::access_flags_offset()));
-+      __ andi(flag, flag, JVM_ACC_IS_VALUE_BASED_CLASS, tmp /* tmp */);
-+      __ bnez(flag, cont, true /* is_far */);
-+    }
-+
-+    // Check for existing monitor
-+    __ andi(t0, disp_hdr, markWord::monitor_value);
-+    __ bnez(t0, object_has_monitor);
-+
-+    if (!UseHeavyMonitors) {
-+      // Set tmp to be (markWord of object | UNLOCK_VALUE).
-+      __ ori(tmp, disp_hdr, markWord::unlocked_value);
-+
-+      // Initialize the box. (Must happen before we update the object mark!)
-+      __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
-+
-+      // Compare object markWord with an unlocked value (tmp) and if
-+      // equal exchange the stack address of our box with object markWord.
-+      // On failure disp_hdr contains the possibly locked markWord.
-+      __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq,
-+                 Assembler::rl, /*result*/disp_hdr);
-+      __ mv(flag, zr);
-+      __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas
-+
-+      assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
-+
-+      // If the compare-and-exchange succeeded, then we found an unlocked
-+      // object, will have now locked it will continue at label cont
-+      // We did not see an unlocked object so try the fast recursive case.
-+
-+      // Check if the owner is self by comparing the value in the
-+      // markWord of object (disp_hdr) with the stack pointer.
-+      __ sub(disp_hdr, disp_hdr, sp);
-+      __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markWord::lock_mask_in_place));
-+      // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont,
-+      // hence we can store 0 as the displaced header in the box, which indicates that it is a
-+      // recursive lock.
-+      __ andr(tmp/*==0?*/, disp_hdr, tmp);
-+      __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
-+      __ mv(flag, tmp); // we can use the value of tmp as the result here
-+    } else {
-+      __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow-path
-+    }
-+
-+    __ j(cont);
-+
-+    // Handle existing monitor.
-+    __ bind(object_has_monitor);
-+    // The object's monitor m is unlocked iff m->owner == NULL,
-+    // otherwise m->owner may contain a thread or a stack address.
-+    //
-+    // Try to CAS m->owner from NULL to current thread.
-+    __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markWord::monitor_value));
-+    __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq,
-+             Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected)
-+
-+    // Store a non-null value into the box to avoid looking like a re-entrant
-+    // lock. The fast-path monitor unlock code checks for
-+    // markWord::monitor_value so use markWord::unused_mark which has the
-+    // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
-+    __ mv(tmp, (address)markWord::unused_mark().value());
-+    __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
++//----------Conditional Branch Operands----------------------------------------
++// Comparison Op  - This is the operation of the comparison, and is limited to
++//                  the following set of codes:
++//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
++//
++// Other attributes of the comparison, such as unsignedness, are specified
++// by the comparison instruction that sets a condition code flags register.
++// That result is represented by a flags operand whose subtype is appropriate
++// to the unsignedness (etc.) of the comparison.
++//
++// Later, the instruction which matches both the Comparison Op (a Bool) and
++// the flags (produced by the Cmp) specifies the coding of the comparison op
++// by matching a specific subtype of Bool operand below, such as cmpOpU.
 +
-+    __ beqz(flag, cont); // CAS success means locking succeeded
 +
-+    __ bne(flag, xthread, cont); // Check for recursive locking
++// used for signed integral comparisons and fp comparisons
++operand cmpOp()
++%{
++  match(Bool);
 +
-+    // Recursive lock case
-+    __ mv(flag, zr);
-+    __ ld(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value));
-+    __ add(tmp, tmp, 1u);
-+    __ sd(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value));
++  format %{ "" %}
 +
-+    __ bind(cont);
++  // the values in interface derives from struct BoolTest::mask
++  interface(COND_INTER) %{
++    equal(0x0, "eq");
++    greater(0x1, "gt");
++    overflow(0x2, "overflow");
++    less(0x3, "lt");
++    not_equal(0x4, "ne");
++    less_equal(0x5, "le");
++    no_overflow(0x6, "no_overflow");
++    greater_equal(0x7, "ge");
 +  %}
++%}
 +
-+  // using cr flag to indicate the fast_unlock result: 0 for success; others failed.
-+  enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{
-+    C2_MacroAssembler _masm(&cbuf);
-+    Register flag = t1;
-+    Register oop = as_Register($object$$reg);
-+    Register box = as_Register($box$$reg);
-+    Register disp_hdr = as_Register($tmp1$$reg);
-+    Register tmp = as_Register($tmp2$$reg);
-+    Label cont;
-+    Label object_has_monitor;
-+
-+    assert_different_registers(oop, box, tmp, disp_hdr, flag);
-+
-+    if (!UseHeavyMonitors) {
-+      // Find the lock address and load the displaced header from the stack.
-+      __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
-+
-+      // If the displaced header is 0, we have a recursive unlock.
-+      __ mv(flag, disp_hdr);
-+      __ beqz(disp_hdr, cont);
-+    }
-+
-+    // Handle existing monitor.
-+    __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
-+    __ andi(t0, disp_hdr, markWord::monitor_value);
-+    __ bnez(t0, object_has_monitor);
-+
-+    if (!UseHeavyMonitors) {
-+      // Check if it is still a light weight lock, this is true if we
-+      // see the stack address of the basicLock in the markWord of the
-+      // object.
-+
-+      __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed,
-+                 Assembler::rl, /*result*/tmp);
-+      __ xorr(flag, box, tmp); // box == tmp if cas succeeds
-+    } else {
-+      __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow path
-+    }
-+    __ j(cont);
-+
-+    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
-+
-+    // Handle existing monitor.
-+    __ bind(object_has_monitor);
-+    STATIC_ASSERT(markWord::monitor_value <= INT_MAX);
-+    __ add(tmp, tmp, -(int)markWord::monitor_value); // monitor
-+    __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
-+
-+    Label notRecursive;
-+    __ beqz(disp_hdr, notRecursive); // Will be 0 if not recursive.
-+
-+    // Recursive lock
-+    __ addi(disp_hdr, disp_hdr, -1);
-+    __ sd(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
-+    __ mv(flag, zr);
-+    __ j(cont);
-+
-+    __ bind(notRecursive);
-+    __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
-+    __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
-+    __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0.
-+    __ bnez(flag, cont);
-+    // need a release store here
-+    __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
-+    __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
-+    __ sd(zr, Address(tmp)); // set unowned
++// used for unsigned integral comparisons
++operand cmpOpU()
++%{
++  match(Bool);
 +
-+    __ bind(cont);
++  format %{ "" %}
++  // the values in interface derives from struct BoolTest::mask
++  interface(COND_INTER) %{
++    equal(0x0, "eq");
++    greater(0x1, "gtu");
++    overflow(0x2, "overflow");
++    less(0x3, "ltu");
++    not_equal(0x4, "ne");
++    less_equal(0x5, "leu");
++    no_overflow(0x6, "no_overflow");
++    greater_equal(0x7, "geu");
 +  %}
++%}
 +
-+  // arithmetic encodings
-+
-+  enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
-+    C2_MacroAssembler _masm(&cbuf);
-+    Register dst_reg = as_Register($dst$$reg);
-+    Register src1_reg = as_Register($src1$$reg);
-+    Register src2_reg = as_Register($src2$$reg);
-+    __ corrected_idivl(dst_reg, src1_reg, src2_reg, false);
-+  %}
++// used for certain integral comparisons which can be
++// converted to bxx instructions
++operand cmpOpEqNe()
++%{
++  match(Bool);
++  op_cost(0);
++  predicate(n->as_Bool()->_test._test == BoolTest::ne ||
++            n->as_Bool()->_test._test == BoolTest::eq);
 +
-+  enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
-+    C2_MacroAssembler _masm(&cbuf);
-+    Register dst_reg = as_Register($dst$$reg);
-+    Register src1_reg = as_Register($src1$$reg);
-+    Register src2_reg = as_Register($src2$$reg);
-+    __ corrected_idivq(dst_reg, src1_reg, src2_reg, false);
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x0, "eq");
++    greater(0x1, "gt");
++    overflow(0x2, "overflow");
++    less(0x3, "lt");
++    not_equal(0x4, "ne");
++    less_equal(0x5, "le");
++    no_overflow(0x6, "no_overflow");
++    greater_equal(0x7, "ge");
 +  %}
++%}
 +
-+  enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
-+    C2_MacroAssembler _masm(&cbuf);
-+    Register dst_reg = as_Register($dst$$reg);
-+    Register src1_reg = as_Register($src1$$reg);
-+    Register src2_reg = as_Register($src2$$reg);
-+    __ corrected_idivl(dst_reg, src1_reg, src2_reg, true);
-+  %}
++operand cmpOpULtGe()
++%{
++  match(Bool);
++  op_cost(0);
++  predicate(n->as_Bool()->_test._test == BoolTest::lt ||
++            n->as_Bool()->_test._test == BoolTest::ge);
 +
-+  enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
-+    C2_MacroAssembler _masm(&cbuf);
-+    Register dst_reg = as_Register($dst$$reg);
-+    Register src1_reg = as_Register($src1$$reg);
-+    Register src2_reg = as_Register($src2$$reg);
-+    __ corrected_idivq(dst_reg, src1_reg, src2_reg, true);
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x0, "eq");
++    greater(0x1, "gtu");
++    overflow(0x2, "overflow");
++    less(0x3, "ltu");
++    not_equal(0x4, "ne");
++    less_equal(0x5, "leu");
++    no_overflow(0x6, "no_overflow");
++    greater_equal(0x7, "geu");
 +  %}
++%}
 +
-+  enc_class riscv_enc_tail_call(iRegP jump_target) %{
-+    C2_MacroAssembler _masm(&cbuf);
-+    Assembler::CompressibleRegion cr(&_masm);
-+    Register target_reg = as_Register($jump_target$$reg);
-+    __ jr(target_reg);
-+  %}
++operand cmpOpUEqNeLeGt()
++%{
++  match(Bool);
++  op_cost(0);
++  predicate(n->as_Bool()->_test._test == BoolTest::ne ||
++            n->as_Bool()->_test._test == BoolTest::eq ||
++            n->as_Bool()->_test._test == BoolTest::le ||
++            n->as_Bool()->_test._test == BoolTest::gt);
 +
-+  enc_class riscv_enc_tail_jmp(iRegP jump_target) %{
-+    C2_MacroAssembler _masm(&cbuf);
-+    Assembler::CompressibleRegion cr(&_masm);
-+    Register target_reg = as_Register($jump_target$$reg);
-+    // exception oop should be in x10
-+    // ret addr has been popped into ra
-+    // callee expects it in x13
-+    __ mv(x13, ra);
-+    __ jr(target_reg);
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x0, "eq");
++    greater(0x1, "gtu");
++    overflow(0x2, "overflow");
++    less(0x3, "ltu");
++    not_equal(0x4, "ne");
++    less_equal(0x5, "leu");
++    no_overflow(0x6, "no_overflow");
++    greater_equal(0x7, "geu");
 +  %}
++%}
 +
-+  enc_class riscv_enc_rethrow() %{
-+    C2_MacroAssembler _masm(&cbuf);
-+    __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
-+  %}
 +
-+  enc_class riscv_enc_ret() %{
-+    C2_MacroAssembler _masm(&cbuf);
-+    Assembler::CompressibleRegion cr(&_masm);
-+    __ ret();
-+  %}
++// Flags register, used as output of compare logic
++operand rFlagsReg()
++%{
++  constraint(ALLOC_IN_RC(reg_flags));
++  match(RegFlags);
 +
++  op_cost(0);
++  format %{ "RFLAGS" %}
++  interface(REG_INTER);
 +%}
 +
-+//----------FRAME--------------------------------------------------------------
-+// Definition of frame structure and management information.
-+//
-+//  S T A C K   L A Y O U T    Allocators stack-slot number
-+//                             |   (to get allocators register number
-+//  G  Owned by    |        |  v    add OptoReg::stack0())
-+//  r   CALLER     |        |
-+//  o     |        +--------+      pad to even-align allocators stack-slot
-+//  w     V        |  pad0  |        numbers; owned by CALLER
-+//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
-+//  h     ^        |   in   |  5
-+//        |        |  args  |  4   Holes in incoming args owned by SELF
-+//  |     |        |        |  3
-+//  |     |        +--------+
-+//  V     |        | old out|      Empty on Intel, window on Sparc
-+//        |    old |preserve|      Must be even aligned.
-+//        |     SP-+--------+----> Matcher::_old_SP, even aligned
-+//        |        |   in   |  3   area for Intel ret address
-+//     Owned by    |preserve|      Empty on Sparc.
-+//       SELF      +--------+
-+//        |        |  pad2  |  2   pad to align old SP
-+//        |        +--------+  1
-+//        |        | locks  |  0
-+//        |        +--------+----> OptoReg::stack0(), even aligned
-+//        |        |  pad1  | 11   pad to align new SP
-+//        |        +--------+
-+//        |        |        | 10
-+//        |        | spills |  9   spills
-+//        V        |        |  8   (pad0 slot for callee)
-+//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
-+//        ^        |  out   |  7
-+//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
-+//     Owned by    +--------+
-+//      CALLEE     | new out|  6   Empty on Intel, window on Sparc
-+//        |    new |preserve|      Must be even-aligned.
-+//        |     SP-+--------+----> Matcher::_new_SP, even aligned
-+//        |        |        |
-+//
-+// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
-+//         known from SELF's arguments and the Java calling convention.
-+//         Region 6-7 is determined per call site.
-+// Note 2: If the calling convention leaves holes in the incoming argument
-+//         area, those holes are owned by SELF.  Holes in the outgoing area
-+//         are owned by the CALLEE.  Holes should not be nessecary in the
-+//         incoming area, as the Java calling convention is completely under
-+//         the control of the AD file.  Doubles can be sorted and packed to
-+//         avoid holes.  Holes in the outgoing arguments may be nessecary for
-+//         varargs C calling conventions.
-+// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
-+//         even aligned with pad0 as needed.
-+//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
-+//           (the latter is true on Intel but is it false on RISCV?)
-+//         region 6-11 is even aligned; it may be padded out more so that
-+//         the region from SP to FP meets the minimum stack alignment.
-+// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
-+//         alignment.  Region 11, pad1, may be dynamically extended so that
-+//         SP meets the minimum alignment.
++// Special Registers
 +
-+frame %{
-+  // These three registers define part of the calling convention
-+  // between compiled code and the interpreter.
++// Method Register
++operand inline_cache_RegP(iRegP reg)
++%{
++  constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
++  match(reg);
++  match(iRegPNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  // Inline Cache Register or methodOop for I2C.
-+  inline_cache_reg(R31);
++//----------OPERAND CLASSES----------------------------------------------------
++// Operand Classes are groups of operands that are used as to simplify
++// instruction definitions by not requiring the AD writer to specify
++// separate instructions for every form of operand when the
++// instruction accepts multiple operand types with the same basic
++// encoding and format. The classic case of this is memory operands.
 +
-+  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
-+  cisc_spilling_operand_name(indOffset);
++// memory is used to define read/write location for load/store
++// instruction defs. we can turn a memory op into an Address
 +
-+  // Number of stack slots consumed by locking an object
-+  // generate Compile::sync_stack_slots
-+  // VMRegImpl::slots_per_word = wordSize / stack_slot_size = 8 / 4 = 2
-+  sync_stack_slots(1 * VMRegImpl::slots_per_word);
++opclass memory(indirect, indOffI, indOffL, indirectN, indOffIN, indOffLN);
 +
-+  // Compiled code's Frame Pointer
-+  frame_pointer(R2);
++// iRegIorL2I is used for src inputs in rules for 32 bit int (I)
++// operations. it allows the src to be either an iRegI or a (ConvL2I
++// iRegL). in the latter case the l2i normally planted for a ConvL2I
++// can be elided because the 32-bit instruction will just employ the
++// lower 32 bits anyway.
++//
++// n.b. this does not elide all L2I conversions. if the truncated
++// value is consumed by more than one operation then the ConvL2I
++// cannot be bundled into the consuming nodes so an l2i gets planted
++// (actually a mvw $dst $src) and the downstream instructions consume
++// the result of the l2i as an iRegI input. That's a shame since the
++// mvw is actually redundant but its not too costly.
 +
-+  // Interpreter stores its frame pointer in a register which is
-+  // stored to the stack by I2CAdaptors.
-+  // I2CAdaptors convert from interpreted java to compiled java.
-+  interpreter_frame_pointer(R8);
++opclass iRegIorL2I(iRegI, iRegL2I);
++opclass iRegIorL(iRegI, iRegL);
++opclass iRegNorP(iRegN, iRegP);
++opclass iRegILNP(iRegI, iRegL, iRegN, iRegP);
++opclass iRegILNPNoSp(iRegINoSp, iRegLNoSp, iRegNNoSp, iRegPNoSp);
++opclass immIorL(immI, immL);
 +
-+  // Stack alignment requirement
-+  stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
++//----------PIPELINE-----------------------------------------------------------
++// Rules which define the behavior of the target architectures pipeline.
 +
-+  // Number of outgoing stack slots killed above the out_preserve_stack_slots
-+  // for calls to C.  Supports the var-args backing area for register parms.
-+  varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes / BytesPerInt);
++// For specific pipelines, e.g. generic RISC-V, define the stages of that pipeline
++//pipe_desc(ID, EX, MEM, WR);
++#define ID   S0
++#define EX   S1
++#define MEM  S2
++#define WR   S3
 +
-+  // The after-PROLOG location of the return address.  Location of
-+  // return address specifies a type (REG or STACK) and a number
-+  // representing the register number (i.e. - use a register name) or
-+  // stack slot.
-+  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
-+  // Otherwise, it is above the locks and verification slot and alignment word
-+  // TODO this may well be correct but need to check why that - 2 is there
-+  // ppc port uses 0 but we definitely need to allow for fixed_slots
-+  // which folds in the space used for monitors
-+  return_addr(STACK - 2 +
-+              align_up((Compile::current()->in_preserve_stack_slots() +
-+                        Compile::current()->fixed_slots()),
-+                       stack_alignment_in_slots()));
++// Integer ALU reg operation
++pipeline %{
 +
-+  // Location of compiled Java return values.  Same as C for now.
-+  return_value
-+  %{
-+    assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
-+           "only return normal values");
++attributes %{
++  // RISC-V instructions are of fixed length
++  fixed_size_instructions;           // Fixed size instructions TODO does
++  max_instructions_per_bundle = 2;   // Generic RISC-V 1, Sifive Series 7 2
++  // RISC-V instructions come in 32-bit word units
++  instruction_unit_size = 4;         // An instruction is 4 bytes long
++  instruction_fetch_unit_size = 64;  // The processor fetches one line
++  instruction_fetch_units = 1;       // of 64 bytes
 +
-+    static const int lo[Op_RegL + 1] = { // enum name
-+      0,                                 // Op_Node
-+      0,                                 // Op_Set
-+      R10_num,                           // Op_RegN
-+      R10_num,                           // Op_RegI
-+      R10_num,                           // Op_RegP
-+      F10_num,                           // Op_RegF
-+      F10_num,                           // Op_RegD
-+      R10_num                            // Op_RegL
-+    };
++  // List of nop instructions
++  nops( MachNop );
++%}
 +
-+    static const int hi[Op_RegL + 1] = { // enum name
-+      0,                                 // Op_Node
-+      0,                                 // Op_Set
-+      OptoReg::Bad,                      // Op_RegN
-+      OptoReg::Bad,                      // Op_RegI
-+      R10_H_num,                         // Op_RegP
-+      OptoReg::Bad,                      // Op_RegF
-+      F10_H_num,                         // Op_RegD
-+      R10_H_num                          // Op_RegL
-+    };
++// We don't use an actual pipeline model so don't care about resources
++// or description. we do use pipeline classes to introduce fixed
++// latencies
 +
-+    return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
-+  %}
-+%}
++//----------RESOURCES----------------------------------------------------------
++// Resources are the functional units available to the machine
 +
-+//----------ATTRIBUTES---------------------------------------------------------
-+//----------Operand Attributes-------------------------------------------------
-+op_attrib op_cost(1);        // Required cost attribute
++// Generic RISC-V pipeline
++// 1 decoder
++// 1 instruction decoded per cycle
++// 1 load/store ops per cycle, 1 branch, 1 FPU
++// 1 mul, 1 div
 +
-+//----------Instruction Attributes---------------------------------------------
-+ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute
-+ins_attrib ins_size(32);        // Required size attribute (in bits)
-+ins_attrib ins_short_branch(0); // Required flag: is this instruction
-+                                // a non-matching short branch variant
-+                                // of some long branch?
-+ins_attrib ins_alignment(4);    // Required alignment attribute (must
-+                                // be a power of 2) specifies the
-+                                // alignment that some part of the
-+                                // instruction (not necessarily the
-+                                // start) requires.  If > 1, a
-+                                // compute_padding() function must be
-+                                // provided for the instruction
++resources ( DECODE,
++            ALU,
++            MUL,
++            DIV,
++            BRANCH,
++            LDST,
++            FPU);
 +
-+//----------OPERANDS-----------------------------------------------------------
-+// Operand definitions must precede instruction definitions for correct parsing
-+// in the ADLC because operands constitute user defined types which are used in
-+// instruction definitions.
++//----------PIPELINE DESCRIPTION-----------------------------------------------
++// Pipeline Description specifies the stages in the machine's pipeline
 +
-+//----------Simple Operands----------------------------------------------------
++// Define the pipeline as a generic 6 stage pipeline
++pipe_desc(S0, S1, S2, S3, S4, S5);
 +
-+// Integer operands 32 bit
-+// 32 bit immediate
-+operand immI()
-+%{
-+  match(ConI);
++//----------PIPELINE CLASSES---------------------------------------------------
++// Pipeline Classes describe the stages in which input and output are
++// referenced by the hardware pipeline.
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++pipe_class fp_dop_reg_reg_s(fRegF dst, fRegF src1, fRegF src2)
++%{
++  single_instruction;
++  src1   : S1(read);
++  src2   : S2(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+// 32 bit zero
-+operand immI0()
++pipe_class fp_dop_reg_reg_d(fRegD dst, fRegD src1, fRegD src2)
 +%{
-+  predicate(n->get_int() == 0);
-+  match(ConI);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  src1   : S1(read);
++  src2   : S2(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+// 32 bit unit increment
-+operand immI_1()
++pipe_class fp_uop_s(fRegF dst, fRegF src)
 +%{
-+  predicate(n->get_int() == 1);
-+  match(ConI);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+// 32 bit unit decrement
-+operand immI_M1()
++pipe_class fp_uop_d(fRegD dst, fRegD src)
 +%{
-+  predicate(n->get_int() == -1);
-+  match(ConI);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+// Unsigned Integer Immediate:  6-bit int, greater than 32
-+operand uimmI6_ge32() %{
-+  predicate(((unsigned int)(n->get_int()) < 64) && (n->get_int() >= 32));
-+  match(ConI);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++pipe_class fp_d2f(fRegF dst, fRegD src)
++%{
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+operand immI_le_4()
++pipe_class fp_f2d(fRegD dst, fRegF src)
 +%{
-+  predicate(n->get_int() <= 4);
-+  match(ConI);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+operand immI_16()
++pipe_class fp_f2i(iRegINoSp dst, fRegF src)
 +%{
-+  predicate(n->get_int() == 16);
-+  match(ConI);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+operand immI_24()
++pipe_class fp_f2l(iRegLNoSp dst, fRegF src)
 +%{
-+  predicate(n->get_int() == 24);
-+  match(ConI);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+operand immI_31()
++pipe_class fp_i2f(fRegF dst, iRegIorL2I src)
 +%{
-+  predicate(n->get_int() == 31);
-+  match(ConI);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+operand immI_63()
++pipe_class fp_l2f(fRegF dst, iRegL src)
 +%{
-+  predicate(n->get_int() == 63);
-+  match(ConI);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+// 32 bit integer valid for add immediate
-+operand immIAdd()
++pipe_class fp_d2i(iRegINoSp dst, fRegD src)
 +%{
-+  predicate(Assembler::operand_valid_for_add_immediate((int64_t)n->get_int()));
-+  match(ConI);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+// 32 bit integer valid for sub immediate
-+operand immISub()
++pipe_class fp_d2l(iRegLNoSp dst, fRegD src)
 +%{
-+  predicate(Assembler::operand_valid_for_add_immediate(-(int64_t)n->get_int()));
-+  match(ConI);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+// 5 bit signed value.
-+operand immI5()
++pipe_class fp_i2d(fRegD dst, iRegIorL2I src)
 +%{
-+  predicate(n->get_int() <= 15 && n->get_int() >= -16);
-+  match(ConI);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+// 5 bit signed value (simm5)
-+operand immL5()
++pipe_class fp_l2d(fRegD dst, iRegIorL2I src)
 +%{
-+  predicate(n->get_long() <= 15 && n->get_long() >= -16);
-+  match(ConL);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+// Integer operands 64 bit
-+// 64 bit immediate
-+operand immL()
++pipe_class fp_div_s(fRegF dst, fRegF src1, fRegF src2)
 +%{
-+  match(ConL);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  src1   : S1(read);
++  src2   : S2(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+// 64 bit zero
-+operand immL0()
++pipe_class fp_div_d(fRegD dst, fRegD src1, fRegD src2)
 +%{
-+  predicate(n->get_long() == 0);
-+  match(ConL);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  src1   : S1(read);
++  src2   : S2(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+// Pointer operands
-+// Pointer Immediate
-+operand immP()
++pipe_class fp_sqrt_s(fRegF dst, fRegF src1, fRegF src2)
 +%{
-+  match(ConP);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  src1   : S1(read);
++  src2   : S2(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+// NULL Pointer Immediate
-+operand immP0()
++pipe_class fp_sqrt_d(fRegD dst, fRegD src1, fRegD src2)
 +%{
-+  predicate(n->get_ptr() == 0);
-+  match(ConP);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  src1   : S1(read);
++  src2   : S2(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+// Pointer Immediate One
-+// this is used in object initialization (initial object header)
-+operand immP_1()
++pipe_class fp_load_constant_s(fRegF dst)
 +%{
-+  predicate(n->get_ptr() == 1);
-+  match(ConP);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+// Card Table Byte Map Base
-+operand immByteMapBase()
++pipe_class fp_load_constant_d(fRegD dst)
 +%{
-+  // Get base of card map
-+  predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
-+            (CardTable::CardValue*)n->get_ptr() ==
-+             ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
-+  match(ConP);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+// Int Immediate: low 16-bit mask
-+operand immI_16bits()
++pipe_class fp_load_mem_s(fRegF dst, memory mem)
 +%{
-+  predicate(n->get_int() == 0xFFFF);
-+  match(ConI);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  mem    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  LDST   : MEM;
 +%}
 +
-+// Long Immediate: low 32-bit mask
-+operand immL_32bits()
++pipe_class fp_load_mem_d(fRegD dst, memory mem)
 +%{
-+  predicate(n->get_long() == 0xFFFFFFFFL);
-+  match(ConL);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  mem    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  LDST   : MEM;
 +%}
 +
-+// 64 bit unit decrement
-+operand immL_M1()
++pipe_class fp_store_reg_s(fRegF src, memory mem)
 +%{
-+  predicate(n->get_long() == -1);
-+  match(ConL);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  src    : S1(read);
++  mem    : S5(write);
++  DECODE : ID;
++  LDST   : MEM;
 +%}
 +
-+
-+// 32 bit offset of pc in thread anchor
-+
-+operand immL_pc_off()
++pipe_class fp_store_reg_d(fRegD src, memory mem)
 +%{
-+  predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
-+                             in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
-+  match(ConL);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  src    : S1(read);
++  mem    : S5(write);
++  DECODE : ID;
++  LDST   : MEM;
 +%}
 +
-+// 64 bit integer valid for add immediate
-+operand immLAdd()
++//------- Integer ALU operations --------------------------
++
++// Integer ALU reg-reg operation
++// Operands needs in ID, result generated in EX
++// E.g.  ADD   Rd, Rs1, Rs2
++pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
 +%{
-+  predicate(Assembler::operand_valid_for_add_immediate(n->get_long()));
-+  match(ConL);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  dst    : EX(write);
++  src1   : ID(read);
++  src2   : ID(read);
++  DECODE : ID;
++  ALU    : EX;
 +%}
 +
-+// 64 bit integer valid for sub immediate
-+operand immLSub()
++// Integer ALU reg operation with constant shift
++// E.g. SLLI    Rd, Rs1, #shift
++pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
 +%{
-+  predicate(Assembler::operand_valid_for_add_immediate(-(n->get_long())));
-+  match(ConL);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  dst    : EX(write);
++  src1   : ID(read);
++  DECODE : ID;
++  ALU    : EX;
 +%}
 +
-+// Narrow pointer operands
-+// Narrow Pointer Immediate
-+operand immN()
++// Integer ALU reg-reg operation with variable shift
++// both operands must be available in ID
++// E.g. SLL   Rd, Rs1, Rs2
++pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
 +%{
-+  match(ConN);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  dst    : EX(write);
++  src1   : ID(read);
++  src2   : ID(read);
++  DECODE : ID;
++  ALU    : EX;
 +%}
 +
-+// Narrow NULL Pointer Immediate
-+operand immN0()
++// Integer ALU reg operation
++// E.g. NEG   Rd, Rs2
++pipe_class ialu_reg(iRegI dst, iRegI src)
 +%{
-+  predicate(n->get_narrowcon() == 0);
-+  match(ConN);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  dst    : EX(write);
++  src    : ID(read);
++  DECODE : ID;
++  ALU    : EX;
 +%}
 +
-+operand immNKlass()
++// Integer ALU reg immediate operation
++// E.g. ADDI   Rd, Rs1, #imm
++pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
 +%{
-+  match(ConNKlass);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  dst    : EX(write);
++  src1   : ID(read);
++  DECODE : ID;
++  ALU    : EX;
 +%}
 +
-+// Float and Double operands
-+// Double Immediate
-+operand immD()
++// Integer ALU immediate operation (no source operands)
++// E.g. LI    Rd, #imm
++pipe_class ialu_imm(iRegI dst)
 +%{
-+  match(ConD);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  dst    : EX(write);
++  DECODE : ID;
++  ALU    : EX;
 +%}
 +
-+// Double Immediate: +0.0d
-+operand immD0()
-+%{
-+  predicate(jlong_cast(n->getd()) == 0);
-+  match(ConD);
++//------- Multiply pipeline operations --------------------
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++// Multiply reg-reg
++// E.g. MULW   Rd, Rs1, Rs2
++pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
++%{
++  single_instruction;
++  dst    : WR(write);
++  src1   : ID(read);
++  src2   : ID(read);
++  DECODE : ID;
++  MUL    : WR;
 +%}
 +
-+// Float Immediate
-+operand immF()
++// E.g. MUL   RD, Rs1, Rs2
++pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
 +%{
-+  match(ConF);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  fixed_latency(3); // Maximum latency for 64 bit mul
++  dst    : WR(write);
++  src1   : ID(read);
++  src2   : ID(read);
++  DECODE : ID;
++  MUL    : WR;
 +%}
 +
-+// Float Immediate: +0.0f.
-+operand immF0()
-+%{
-+  predicate(jint_cast(n->getf()) == 0);
-+  match(ConF);
++//------- Divide pipeline operations --------------------
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++// E.g. DIVW   Rd, Rs1, Rs2
++pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
++%{
++  single_instruction;
++  fixed_latency(8); // Maximum latency for 32 bit divide
++  dst    : WR(write);
++  src1   : ID(read);
++  src2   : ID(read);
++  DECODE : ID;
++  DIV    : WR;
 +%}
 +
-+operand immIOffset()
++// E.g. DIV   RD, Rs1, Rs2
++pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
 +%{
-+  predicate(is_imm_in_range(n->get_int(), 12, 0));
-+  match(ConI);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  fixed_latency(16); // Maximum latency for 64 bit divide
++  dst    : WR(write);
++  src1   : ID(read);
++  src2   : ID(read);
++  DECODE : ID;
++  DIV    : WR;
 +%}
 +
-+operand immLOffset()
++//------- Load pipeline operations ------------------------
++
++// Load - reg, mem
++// E.g. LA    Rd, mem
++pipe_class iload_reg_mem(iRegI dst, memory mem)
 +%{
-+  predicate(is_imm_in_range(n->get_long(), 12, 0));
-+  match(ConL);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  dst    : WR(write);
++  mem    : ID(read);
++  DECODE : ID;
++  LDST   : MEM;
 +%}
 +
-+// Scale values
-+operand immIScale()
++// Load - reg, reg
++// E.g. LD    Rd, Rs
++pipe_class iload_reg_reg(iRegI dst, iRegI src)
 +%{
-+  predicate(1 <= n->get_int() && (n->get_int() <= 3));
-+  match(ConI);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
++  single_instruction;
++  dst    : WR(write);
++  src    : ID(read);
++  DECODE : ID;
++  LDST   : MEM;
 +%}
 +
-+// Integer 32 bit Register Operands
-+operand iRegI()
++//------- Control transfer pipeline operations ------------
++
++// Store - zr, mem
++// E.g. SD    zr, mem
++pipe_class istore_mem(memory mem)
 +%{
-+  constraint(ALLOC_IN_RC(any_reg32));
-+  match(RegI);
-+  match(iRegINoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
++  single_instruction;
++  mem    : ID(read);
++  DECODE : ID;
++  LDST   : MEM;
 +%}
 +
-+// Integer 32 bit Register not Special
-+operand iRegINoSp()
++// Store - reg, mem
++// E.g. SD    Rs, mem
++pipe_class istore_reg_mem(iRegI src, memory mem)
 +%{
-+  constraint(ALLOC_IN_RC(no_special_reg32));
-+  match(RegI);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
++  single_instruction;
++  mem    : ID(read);
++  src    : EX(read);
++  DECODE : ID;
++  LDST   : MEM;
 +%}
 +
-+// Register R10 only
-+operand iRegI_R10()
-+%{
-+  constraint(ALLOC_IN_RC(int_r10_reg));
-+  match(RegI);
-+  match(iRegINoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+// Register R12 only
-+operand iRegI_R12()
++// Store - reg, reg
++// E.g. SD    Rs2, Rs1
++pipe_class istore_reg_reg(iRegI dst, iRegI src)
 +%{
-+  constraint(ALLOC_IN_RC(int_r12_reg));
-+  match(RegI);
-+  match(iRegINoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
++  single_instruction;
++  dst    : ID(read);
++  src    : EX(read);
++  DECODE : ID;
++  LDST   : MEM;
 +%}
 +
-+// Register R13 only
-+operand iRegI_R13()
-+%{
-+  constraint(ALLOC_IN_RC(int_r13_reg));
-+  match(RegI);
-+  match(iRegINoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++//------- Store pipeline operations -----------------------
 +
-+// Register R14 only
-+operand iRegI_R14()
++// Branch
++pipe_class pipe_branch()
 +%{
-+  constraint(ALLOC_IN_RC(int_r14_reg));
-+  match(RegI);
-+  match(iRegINoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
++  single_instruction;
++  DECODE : ID;
++  BRANCH : EX;
 +%}
 +
-+// Integer 64 bit Register Operands
-+operand iRegL()
++// Branch
++pipe_class pipe_branch_reg(iRegI src)
 +%{
-+  constraint(ALLOC_IN_RC(any_reg));
-+  match(RegL);
-+  match(iRegLNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
++  single_instruction;
++  src    : ID(read);
++  DECODE : ID;
++  BRANCH : EX;
 +%}
 +
-+// Integer 64 bit Register not Special
-+operand iRegLNoSp()
++// Compare & Branch
++// E.g. BEQ   Rs1, Rs2, L
++pipe_class pipe_cmp_branch(iRegI src1, iRegI src2)
 +%{
-+  constraint(ALLOC_IN_RC(no_special_reg));
-+  match(RegL);
-+  match(iRegL_R10);
-+  format %{ %}
-+  interface(REG_INTER);
++  single_instruction;
++  src1   : ID(read);
++  src2   : ID(read);
++  DECODE : ID;
++  BRANCH : EX;
 +%}
 +
-+// Long 64 bit Register R28 only
-+operand iRegL_R28()
++// E.g. BEQZ Rs, L
++pipe_class pipe_cmpz_branch(iRegI src)
 +%{
-+  constraint(ALLOC_IN_RC(r28_reg));
-+  match(RegL);
-+  match(iRegLNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
++  single_instruction;
++  src    : ID(read);
++  DECODE : ID;
++  BRANCH : EX;
 +%}
 +
-+// Long 64 bit Register R29 only
-+operand iRegL_R29()
++//------- Synchronisation operations ----------------------
++// Any operation requiring serialization
++// E.g. FENCE/Atomic Ops/Load Acquire/Store Release
++pipe_class pipe_serial()
 +%{
-+  constraint(ALLOC_IN_RC(r29_reg));
-+  match(RegL);
-+  match(iRegLNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
++  single_instruction;
++  force_serialization;
++  fixed_latency(16);
++  DECODE : ID;
++  LDST   : MEM;
 +%}
 +
-+// Long 64 bit Register R30 only
-+operand iRegL_R30()
++pipe_class pipe_slow()
 +%{
-+  constraint(ALLOC_IN_RC(r30_reg));
-+  match(RegL);
-+  match(iRegLNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
++  instruction_count(10);
++  multiple_bundles;
++  force_serialization;
++  fixed_latency(16);
++  DECODE : ID;
++  LDST   : MEM;
 +%}
 +
-+// Pointer Register Operands
-+// Pointer Register
-+operand iRegP()
++// Empty pipeline class
++pipe_class pipe_class_empty()
 +%{
-+  constraint(ALLOC_IN_RC(ptr_reg));
-+  match(RegP);
-+  match(iRegPNoSp);
-+  match(iRegP_R10);
-+  match(javaThread_RegP);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
++  single_instruction;
++  fixed_latency(0);
 +%}
 +
-+// Pointer 64 bit Register not Special
-+operand iRegPNoSp()
++// Default pipeline class.
++pipe_class pipe_class_default()
 +%{
-+  constraint(ALLOC_IN_RC(no_special_ptr_reg));
-+  match(RegP);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
++  single_instruction;
++  fixed_latency(2);
 +%}
 +
-+operand iRegP_R10()
++// Pipeline class for compares.
++pipe_class pipe_class_compare()
 +%{
-+  constraint(ALLOC_IN_RC(r10_reg));
-+  match(RegP);
-+  // match(iRegP);
-+  match(iRegPNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
++  single_instruction;
++  fixed_latency(16);
 +%}
 +
-+// Pointer 64 bit Register R11 only
-+operand iRegP_R11()
++// Pipeline class for memory operations.
++pipe_class pipe_class_memory()
 +%{
-+  constraint(ALLOC_IN_RC(r11_reg));
-+  match(RegP);
-+  match(iRegPNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
++  single_instruction;
++  fixed_latency(16);
 +%}
 +
-+operand iRegP_R12()
++// Pipeline class for call.
++pipe_class pipe_class_call()
 +%{
-+  constraint(ALLOC_IN_RC(r12_reg));
-+  match(RegP);
-+  // match(iRegP);
-+  match(iRegPNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
++  single_instruction;
++  fixed_latency(100);
 +%}
 +
-+// Pointer 64 bit Register R13 only
-+operand iRegP_R13()
-+%{
-+  constraint(ALLOC_IN_RC(r13_reg));
-+  match(RegP);
-+  match(iRegPNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
++// Define the class for the Nop node.
++define %{
++   MachNop = pipe_class_empty;
 +%}
-+
-+operand iRegP_R14()
-+%{
-+  constraint(ALLOC_IN_RC(r14_reg));
-+  match(RegP);
-+  // match(iRegP);
-+  match(iRegPNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
 +%}
++//----------INSTRUCTIONS-------------------------------------------------------
++//
++// match      -- States which machine-independent subtree may be replaced
++//               by this instruction.
++// ins_cost   -- The estimated cost of this instruction is used by instruction
++//               selection to identify a minimum cost tree of machine
++//               instructions that matches a tree of machine-independent
++//               instructions.
++// format     -- A string providing the disassembly for this instruction.
++//               The value of an instruction's operand may be inserted
++//               by referring to it with a '$' prefix.
++// opcode     -- Three instruction opcodes may be provided.  These are referred
++//               to within an encode class as $primary, $secondary, and $tertiary
++//               rrspectively.  The primary opcode is commonly used to
++//               indicate the type of machine instruction, while secondary
++//               and tertiary are often used for prefix options or addressing
++//               modes.
++// ins_encode -- A list of encode classes with parameters. The encode class
++//               name must have been defined in an 'enc_class' specification
++//               in the encode section of the architecture description.
 +
-+operand iRegP_R15()
-+%{
-+  constraint(ALLOC_IN_RC(r15_reg));
-+  match(RegP);
-+  // match(iRegP);
-+  match(iRegPNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++// ============================================================================
++// Memory (Load/Store) Instructions
 +
-+operand iRegP_R16()
-+%{
-+  constraint(ALLOC_IN_RC(r16_reg));
-+  match(RegP);
-+  // match(iRegP);
-+  match(iRegPNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++// Load Instructions
 +
-+// Pointer 64 bit Register R28 only
-+operand iRegP_R28()
++// Load Byte (8 bit signed)
++instruct loadB(iRegINoSp dst, memory mem)
 +%{
-+  constraint(ALLOC_IN_RC(r28_reg));
-+  match(RegP);
-+  match(iRegPNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++  match(Set dst (LoadB mem));
 +
-+// Pointer Register Operands
-+// Narrow Pointer Register
-+operand iRegN()
-+%{
-+  constraint(ALLOC_IN_RC(any_reg32));
-+  match(RegN);
-+  match(iRegNNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++  ins_cost(LOAD_COST);
++  format %{ "lb  $dst, $mem\t# byte, #@loadB" %}
 +
-+// Integer 64 bit Register not Special
-+operand iRegNNoSp()
-+%{
-+  constraint(ALLOC_IN_RC(no_special_reg32));
-+  match(RegN);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++  ins_encode %{
++    __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++  %}
 +
-+// heap base register -- used for encoding immN0
-+operand iRegIHeapbase()
-+%{
-+  constraint(ALLOC_IN_RC(heapbase_reg));
-+  match(RegI);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+// Long 64 bit Register R10 only
-+operand iRegL_R10()
++// Load Byte (8 bit signed) into long
++instruct loadB2L(iRegLNoSp dst, memory mem)
 +%{
-+  constraint(ALLOC_IN_RC(r10_reg));
-+  match(RegL);
-+  match(iRegLNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++  match(Set dst (ConvI2L (LoadB mem)));
 +
-+// Float Register
-+// Float register operands
-+operand fRegF()
-+%{
-+  constraint(ALLOC_IN_RC(float_reg));
-+  match(RegF);
++  ins_cost(LOAD_COST);
++  format %{ "lb  $dst, $mem\t# byte, #@loadB2L" %}
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
++  ins_encode %{
++    __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++  %}
++
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+// Double Register
-+// Double register operands
-+operand fRegD()
++// Load Byte (8 bit unsigned)
++instruct loadUB(iRegINoSp dst, memory mem)
 +%{
-+  constraint(ALLOC_IN_RC(double_reg));
-+  match(RegD);
++  match(Set dst (LoadUB mem));
 +
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++  ins_cost(LOAD_COST);
++  format %{ "lbu  $dst, $mem\t# byte, #@loadUB" %}
 +
-+// Generic vector class. This will be used for
-+// all vector operands.
-+operand vReg()
-+%{
-+  constraint(ALLOC_IN_RC(vectora_reg));
-+  match(VecA);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++  ins_encode %{
++    __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++  %}
 +
-+operand vReg_V1()
-+%{
-+  constraint(ALLOC_IN_RC(v1_reg));
-+  match(VecA);
-+  match(vReg);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+operand vReg_V2()
++// Load Byte (8 bit unsigned) into long
++instruct loadUB2L(iRegLNoSp dst, memory mem)
 +%{
-+  constraint(ALLOC_IN_RC(v2_reg));
-+  match(VecA);
-+  match(vReg);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++  match(Set dst (ConvI2L (LoadUB mem)));
 +
-+operand vReg_V3()
-+%{
-+  constraint(ALLOC_IN_RC(v3_reg));
-+  match(VecA);
-+  match(vReg);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++  ins_cost(LOAD_COST);
++  format %{ "lbu  $dst, $mem\t# byte, #@loadUB2L" %}
 +
-+operand vReg_V4()
-+%{
-+  constraint(ALLOC_IN_RC(v4_reg));
-+  match(VecA);
-+  match(vReg);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++  ins_encode %{
++    __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++  %}
 +
-+operand vReg_V5()
-+%{
-+  constraint(ALLOC_IN_RC(v5_reg));
-+  match(VecA);
-+  match(vReg);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+// Java Thread Register
-+operand javaThread_RegP(iRegP reg)
++// Load Short (16 bit signed)
++instruct loadS(iRegINoSp dst, memory mem)
 +%{
-+  constraint(ALLOC_IN_RC(java_thread_reg)); // java_thread_reg
-+  match(reg);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
++  match(Set dst (LoadS mem));
++
++  ins_cost(LOAD_COST);
++  format %{ "lh  $dst, $mem\t# short, #@loadS" %}
++
++  ins_encode %{
++    __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++  %}
++
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+//----------Memory Operands----------------------------------------------------
-+// RISCV has only base_plus_offset and literal address mode, so no need to use
-+// index and scale. Here set index as 0xffffffff and scale as 0x0.
-+operand indirect(iRegP reg)
++// Load Short (16 bit signed) into long
++instruct loadS2L(iRegLNoSp dst, memory mem)
 +%{
-+  constraint(ALLOC_IN_RC(ptr_reg));
-+  match(reg);
-+  op_cost(0);
-+  format %{ "[$reg]" %}
-+  interface(MEMORY_INTER) %{
-+    base($reg);
-+    index(0xffffffff);
-+    scale(0x0);
-+    disp(0x0);
++  match(Set dst (ConvI2L (LoadS mem)));
++
++  ins_cost(LOAD_COST);
++  format %{ "lh  $dst, $mem\t# short, #@loadS2L" %}
++
++  ins_encode %{
++    __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
++
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+operand indOffI(iRegP reg, immIOffset off)
++// Load Char (16 bit unsigned)
++instruct loadUS(iRegINoSp dst, memory mem)
 +%{
-+  constraint(ALLOC_IN_RC(ptr_reg));
-+  match(AddP reg off);
-+  op_cost(0);
-+  format %{ "[$reg, $off]" %}
-+  interface(MEMORY_INTER) %{
-+    base($reg);
-+    index(0xffffffff);
-+    scale(0x0);
-+    disp($off);
++  match(Set dst (LoadUS mem));
++
++  ins_cost(LOAD_COST);
++  format %{ "lhu  $dst, $mem\t# short, #@loadUS" %}
++
++  ins_encode %{
++    __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
++
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+operand indOffL(iRegP reg, immLOffset off)
++// Load Short/Char (16 bit unsigned) into long
++instruct loadUS2L(iRegLNoSp dst, memory mem)
 +%{
-+  constraint(ALLOC_IN_RC(ptr_reg));
-+  match(AddP reg off);
-+  op_cost(0);
-+  format %{ "[$reg, $off]" %}
-+  interface(MEMORY_INTER) %{
-+    base($reg);
-+    index(0xffffffff);
-+    scale(0x0);
-+    disp($off);
++  match(Set dst (ConvI2L (LoadUS mem)));
++
++  ins_cost(LOAD_COST);
++  format %{ "lhu  $dst, $mem\t# short, #@loadUS2L" %}
++
++  ins_encode %{
++    __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
++
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+operand indirectN(iRegN reg)
++// Load Integer (32 bit signed)
++instruct loadI(iRegINoSp dst, memory mem)
 +%{
-+  predicate(CompressedOops::shift() == 0);
-+  constraint(ALLOC_IN_RC(ptr_reg));
-+  match(DecodeN reg);
-+  op_cost(0);
-+  format %{ "[$reg]\t# narrow" %}
-+  interface(MEMORY_INTER) %{
-+    base($reg);
-+    index(0xffffffff);
-+    scale(0x0);
-+    disp(0x0);
++  match(Set dst (LoadI mem));
++
++  ins_cost(LOAD_COST);
++  format %{ "lw  $dst, $mem\t# int, #@loadI" %}
++
++  ins_encode %{
++    Assembler::CompressibleRegion cr(&_masm);
++    __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
++
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+operand indOffIN(iRegN reg, immIOffset off)
++// Load Integer (32 bit signed) into long
++instruct loadI2L(iRegLNoSp dst, memory mem)
 +%{
-+  predicate(CompressedOops::shift() == 0);
-+  constraint(ALLOC_IN_RC(ptr_reg));
-+  match(AddP (DecodeN reg) off);
-+  op_cost(0);
-+  format %{ "[$reg, $off]\t# narrow" %}
-+  interface(MEMORY_INTER) %{
-+    base($reg);
-+    index(0xffffffff);
-+    scale(0x0);
-+    disp($off);
++  match(Set dst (ConvI2L (LoadI mem)));
++
++  ins_cost(LOAD_COST);
++  format %{ "lw  $dst, $mem\t# int, #@loadI2L" %}
++
++  ins_encode %{
++    Assembler::CompressibleRegion cr(&_masm);
++    __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
++
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+operand indOffLN(iRegN reg, immLOffset off)
++// Load Integer (32 bit unsigned) into long
++instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
 +%{
-+  predicate(CompressedOops::shift() == 0);
-+  constraint(ALLOC_IN_RC(ptr_reg));
-+  match(AddP (DecodeN reg) off);
-+  op_cost(0);
-+  format %{ "[$reg, $off]\t# narrow" %}
-+  interface(MEMORY_INTER) %{
-+    base($reg);
-+    index(0xffffffff);
-+    scale(0x0);
-+    disp($off);
++  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
++
++  ins_cost(LOAD_COST);
++  format %{ "lwu  $dst, $mem\t# int, #@loadUI2L" %}
++
++  ins_encode %{
++    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
++
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+// RISCV opto stubs need to write to the pc slot in the thread anchor
-+operand thread_anchor_pc(javaThread_RegP reg, immL_pc_off off)
++// Load Long (64 bit signed)
++instruct loadL(iRegLNoSp dst, memory mem)
 +%{
-+  constraint(ALLOC_IN_RC(ptr_reg));
-+  match(AddP reg off);
-+  op_cost(0);
-+  format %{ "[$reg, $off]" %}
-+  interface(MEMORY_INTER) %{
-+    base($reg);
-+    index(0xffffffff);
-+    scale(0x0);
-+    disp($off);
-+  %}
-+%}
++  match(Set dst (LoadL mem));
 +
++  ins_cost(LOAD_COST);
++  format %{ "ld  $dst, $mem\t# int, #@loadL" %}
 +
-+//----------Special Memory Operands--------------------------------------------
-+// Stack Slot Operand - This operand is used for loading and storing temporary
-+//                      values on the stack where a match requires a value to
-+//                      flow through memory.
-+operand stackSlotI(sRegI reg)
-+%{
-+  constraint(ALLOC_IN_RC(stack_slots));
-+  // No match rule because this operand is only generated in matching
-+  // match(RegI);
-+  format %{ "[$reg]" %}
-+  interface(MEMORY_INTER) %{
-+    base(0x02);  // RSP
-+    index(0xffffffff);  // No Index
-+    scale(0x0);  // No Scale
-+    disp($reg);  // Stack Offset
++  ins_encode %{
++    Assembler::CompressibleRegion cr(&_masm);
++    __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
-+%}
 +
-+operand stackSlotF(sRegF reg)
-+%{
-+  constraint(ALLOC_IN_RC(stack_slots));
-+  // No match rule because this operand is only generated in matching
-+  // match(RegF);
-+  format %{ "[$reg]" %}
-+  interface(MEMORY_INTER) %{
-+    base(0x02);  // RSP
-+    index(0xffffffff);  // No Index
-+    scale(0x0);  // No Scale
-+    disp($reg);  // Stack Offset
-+  %}
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+operand stackSlotD(sRegD reg)
++// Load Range
++instruct loadRange(iRegINoSp dst, memory mem)
 +%{
-+  constraint(ALLOC_IN_RC(stack_slots));
-+  // No match rule because this operand is only generated in matching
-+  // match(RegD);
-+  format %{ "[$reg]" %}
-+  interface(MEMORY_INTER) %{
-+    base(0x02);  // RSP
-+    index(0xffffffff);  // No Index
-+    scale(0x0);  // No Scale
-+    disp($reg);  // Stack Offset
-+  %}
-+%}
++  match(Set dst (LoadRange mem));
 +
-+operand stackSlotL(sRegL reg)
-+%{
-+  constraint(ALLOC_IN_RC(stack_slots));
-+  // No match rule because this operand is only generated in matching
-+  // match(RegL);
-+  format %{ "[$reg]" %}
-+  interface(MEMORY_INTER) %{
-+    base(0x02);  // RSP
-+    index(0xffffffff);  // No Index
-+    scale(0x0);  // No Scale
-+    disp($reg);  // Stack Offset
-+  %}
-+%}
++  ins_cost(LOAD_COST);
++  format %{ "lwu  $dst, $mem\t# range, #@loadRange" %}
 +
-+// Special operand allowing long args to int ops to be truncated for free
++  ins_encode %{
++    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++  %}
 +
-+operand iRegL2I(iRegL reg) %{
++  ins_pipe(iload_reg_mem);
++%}
 +
-+  op_cost(0);
++// Load Pointer
++instruct loadP(iRegPNoSp dst, memory mem)
++%{
++  match(Set dst (LoadP mem));
 +
-+  match(ConvL2I reg);
++  ins_cost(LOAD_COST);
++  format %{ "ld  $dst, $mem\t# ptr, #@loadP" %}
 +
-+  format %{ "l2i($reg)" %}
++  ins_encode %{
++    Assembler::CompressibleRegion cr(&_masm);
++    __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++  %}
 +
-+  interface(REG_INTER)
++  ins_pipe(iload_reg_mem);
 +%}
 +
++// Load Compressed Pointer
++instruct loadN(iRegNNoSp dst, memory mem)
++%{
++  match(Set dst (LoadN mem));
 +
-+// Comparison Operands
-+// NOTE: Label is a predefined operand which should not be redefined in
-+//       the AD file. It is generically handled within the ADLC.
++  ins_cost(LOAD_COST);
++  format %{ "lwu  $dst, $mem\t# loadN, compressed ptr, #@loadN" %}
 +
-+//----------Conditional Branch Operands----------------------------------------
-+// Comparison Op  - This is the operation of the comparison, and is limited to
-+//                  the following set of codes:
-+//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
-+//
-+// Other attributes of the comparison, such as unsignedness, are specified
-+// by the comparison instruction that sets a condition code flags register.
-+// That result is represented by a flags operand whose subtype is appropriate
-+// to the unsignedness (etc.) of the comparison.
-+//
-+// Later, the instruction which matches both the Comparison Op (a Bool) and
-+// the flags (produced by the Cmp) specifies the coding of the comparison op
-+// by matching a specific subtype of Bool operand below, such as cmpOpU.
++  ins_encode %{
++    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++  %}
 +
++  ins_pipe(iload_reg_mem);
++%}
 +
-+// used for signed integral comparisons and fp comparisons
-+operand cmpOp()
++// Load Klass Pointer
++instruct loadKlass(iRegPNoSp dst, memory mem)
 +%{
-+  match(Bool);
++  match(Set dst (LoadKlass mem));
 +
-+  format %{ "" %}
++  ins_cost(LOAD_COST);
++  format %{ "ld  $dst, $mem\t# class, #@loadKlass" %}
 +
-+  // the values in interface derives from struct BoolTest::mask
-+  interface(COND_INTER) %{
-+    equal(0x0, "eq");
-+    greater(0x1, "gt");
-+    overflow(0x2, "overflow");
-+    less(0x3, "lt");
-+    not_equal(0x4, "ne");
-+    less_equal(0x5, "le");
-+    no_overflow(0x6, "no_overflow");
-+    greater_equal(0x7, "ge");
++  ins_encode %{
++    Assembler::CompressibleRegion cr(&_masm);
++    __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
++
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+// used for unsigned integral comparisons
-+operand cmpOpU()
++// Load Narrow Klass Pointer
++instruct loadNKlass(iRegNNoSp dst, memory mem)
 +%{
-+  match(Bool);
++  match(Set dst (LoadNKlass mem));
 +
-+  format %{ "" %}
-+  // the values in interface derives from struct BoolTest::mask
-+  interface(COND_INTER) %{
-+    equal(0x0, "eq");
-+    greater(0x1, "gtu");
-+    overflow(0x2, "overflow");
-+    less(0x3, "ltu");
-+    not_equal(0x4, "ne");
-+    less_equal(0x5, "leu");
-+    no_overflow(0x6, "no_overflow");
-+    greater_equal(0x7, "geu");
++  ins_cost(LOAD_COST);
++  format %{ "lwu  $dst, $mem\t# loadNKlass, compressed class ptr, #@loadNKlass" %}
++
++  ins_encode %{
++    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
++
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+// used for certain integral comparisons which can be
-+// converted to bxx instructions
-+operand cmpOpEqNe()
++// Load Float
++instruct loadF(fRegF dst, memory mem)
 +%{
-+  match(Bool);
-+  op_cost(0);
-+  predicate(n->as_Bool()->_test._test == BoolTest::ne ||
-+            n->as_Bool()->_test._test == BoolTest::eq);
++  match(Set dst (LoadF mem));
 +
-+  format %{ "" %}
-+  interface(COND_INTER) %{
-+    equal(0x0, "eq");
-+    greater(0x1, "gt");
-+    overflow(0x2, "overflow");
-+    less(0x3, "lt");
-+    not_equal(0x4, "ne");
-+    less_equal(0x5, "le");
-+    no_overflow(0x6, "no_overflow");
-+    greater_equal(0x7, "ge");
++  ins_cost(LOAD_COST);
++  format %{ "flw  $dst, $mem\t# float, #@loadF" %}
++
++  ins_encode %{
++    __ flw(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
++
++  ins_pipe(fp_load_mem_s);
 +%}
 +
-+operand cmpOpULtGe()
++// Load Double
++instruct loadD(fRegD dst, memory mem)
 +%{
-+  match(Bool);
-+  op_cost(0);
-+  predicate(n->as_Bool()->_test._test == BoolTest::lt ||
-+            n->as_Bool()->_test._test == BoolTest::ge);
++  match(Set dst (LoadD mem));
 +
-+  format %{ "" %}
-+  interface(COND_INTER) %{
-+    equal(0x0, "eq");
-+    greater(0x1, "gt");
-+    overflow(0x2, "overflow");
-+    less(0x3, "lt");
-+    not_equal(0x4, "ne");
-+    less_equal(0x5, "le");
-+    no_overflow(0x6, "no_overflow");
-+    greater_equal(0x7, "ge");
++  ins_cost(LOAD_COST);
++  format %{ "fld  $dst, $mem\t# double, #@loadD" %}
++
++  ins_encode %{
++    Assembler::CompressibleRegion cr(&_masm);
++    __ fld(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
++
++  ins_pipe(fp_load_mem_d);
 +%}
 +
-+operand cmpOpUEqNeLeGt()
++// Load Int Constant
++instruct loadConI(iRegINoSp dst, immI src)
 +%{
-+  match(Bool);
-+  op_cost(0);
-+  predicate(n->as_Bool()->_test._test == BoolTest::ne ||
-+            n->as_Bool()->_test._test == BoolTest::eq ||
-+            n->as_Bool()->_test._test == BoolTest::le ||
-+            n->as_Bool()->_test._test == BoolTest::gt);
-+
-+  format %{ "" %}
-+  interface(COND_INTER) %{
-+    equal(0x0, "eq");
-+    greater(0x1, "gt");
-+    overflow(0x2, "overflow");
-+    less(0x3, "lt");
-+    not_equal(0x4, "ne");
-+    less_equal(0x5, "le");
-+    no_overflow(0x6, "no_overflow");
-+    greater_equal(0x7, "ge");
-+  %}
-+%}
++  match(Set dst src);
 +
++  ins_cost(ALU_COST);
++  format %{ "li $dst, $src\t# int, #@loadConI" %}
 +
-+// Flags register, used as output of compare logic
-+operand rFlagsReg()
-+%{
-+  constraint(ALLOC_IN_RC(reg_flags));
-+  match(RegFlags);
++  ins_encode(riscv_enc_li_imm(dst, src));
 +
-+  op_cost(0);
-+  format %{ "RFLAGS" %}
-+  interface(REG_INTER);
++  ins_pipe(ialu_imm);
 +%}
 +
-+// Special Registers
-+
-+// Method Register
-+operand inline_cache_RegP(iRegP reg)
++// Load Long Constant
++instruct loadConL(iRegLNoSp dst, immL src)
 +%{
-+  constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
-+  match(reg);
-+  match(iRegPNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++  match(Set dst src);
 +
-+//----------OPERAND CLASSES----------------------------------------------------
-+// Operand Classes are groups of operands that are used as to simplify
-+// instruction definitions by not requiring the AD writer to specify
-+// separate instructions for every form of operand when the
-+// instruction accepts multiple operand types with the same basic
-+// encoding and format. The classic case of this is memory operands.
++  ins_cost(ALU_COST);
++  format %{ "li $dst, $src\t# long, #@loadConL" %}
 +
-+// memory is used to define read/write location for load/store
-+// instruction defs. we can turn a memory op into an Address
++  ins_encode(riscv_enc_li_imm(dst, src));
 +
-+opclass memory(indirect, indOffI, indOffL, indirectN, indOffIN, indOffLN);
++  ins_pipe(ialu_imm);
++%}
 +
-+// iRegIorL2I is used for src inputs in rules for 32 bit int (I)
-+// operations. it allows the src to be either an iRegI or a (ConvL2I
-+// iRegL). in the latter case the l2i normally planted for a ConvL2I
-+// can be elided because the 32-bit instruction will just employ the
-+// lower 32 bits anyway.
-+//
-+// n.b. this does not elide all L2I conversions. if the truncated
-+// value is consumed by more than one operation then the ConvL2I
-+// cannot be bundled into the consuming nodes so an l2i gets planted
-+// (actually a mvw $dst $src) and the downstream instructions consume
-+// the result of the l2i as an iRegI input. That's a shame since the
-+// mvw is actually redundant but its not too costly.
++// Load Pointer Constant
++instruct loadConP(iRegPNoSp dst, immP con)
++%{
++  match(Set dst con);
 +
-+opclass iRegIorL2I(iRegI, iRegL2I);
-+opclass iRegIorL(iRegI, iRegL);
-+opclass iRegNorP(iRegN, iRegP);
-+opclass iRegILNP(iRegI, iRegL, iRegN, iRegP);
-+opclass iRegILNPNoSp(iRegINoSp, iRegLNoSp, iRegNNoSp, iRegPNoSp);
-+opclass immIorL(immI, immL);
++  ins_cost(ALU_COST);
++  format %{ "mv  $dst, $con\t# ptr, #@loadConP" %}
 +
-+//----------PIPELINE-----------------------------------------------------------
-+// Rules which define the behavior of the target architectures pipeline.
++  ins_encode(riscv_enc_mov_p(dst, con));
 +
-+// For specific pipelines, e.g. generic RISC-V, define the stages of that pipeline
-+//pipe_desc(ID, EX, MEM, WR);
-+#define ID   S0
-+#define EX   S1
-+#define MEM  S2
-+#define WR   S3
++  ins_pipe(ialu_imm);
++%}
 +
-+// Integer ALU reg operation
-+pipeline %{
++// Load Null Pointer Constant
++instruct loadConP0(iRegPNoSp dst, immP0 con)
++%{
++  match(Set dst con);
 +
-+attributes %{
-+  // RISC-V instructions are of fixed length
-+  fixed_size_instructions;           // Fixed size instructions TODO does
-+  max_instructions_per_bundle = 2;   // Generic RISC-V 1, Sifive Series 7 2
-+  // RISC-V instructions come in 32-bit word units
-+  instruction_unit_size = 4;         // An instruction is 4 bytes long
-+  instruction_fetch_unit_size = 64;  // The processor fetches one line
-+  instruction_fetch_units = 1;       // of 64 bytes
++  ins_cost(ALU_COST);
++  format %{ "mv  $dst, $con\t# NULL ptr, #@loadConP0" %}
 +
-+  // List of nop instructions
-+  nops( MachNop );
++  ins_encode(riscv_enc_mov_zero(dst));
++
++  ins_pipe(ialu_imm);
 +%}
 +
-+// We don't use an actual pipeline model so don't care about resources
-+// or description. we do use pipeline classes to introduce fixed
-+// latencies
++// Load Pointer Constant One
++instruct loadConP1(iRegPNoSp dst, immP_1 con)
++%{
++  match(Set dst con);
 +
-+//----------RESOURCES----------------------------------------------------------
-+// Resources are the functional units available to the machine
++  ins_cost(ALU_COST);
++  format %{ "mv  $dst, $con\t# load ptr constant one, #@loadConP1" %}
 +
-+// Generic RISC-V pipeline
-+// 1 decoder
-+// 1 instruction decoded per cycle
-+// 1 load/store ops per cycle, 1 branch, 1 FPU
-+// 1 mul, 1 div
++  ins_encode(riscv_enc_mov_p1(dst));
 +
-+resources ( DECODE,
-+            ALU,
-+            MUL,
-+            DIV,
-+            BRANCH,
-+            LDST,
-+            FPU);
++  ins_pipe(ialu_imm);
++%}
 +
-+//----------PIPELINE DESCRIPTION-----------------------------------------------
-+// Pipeline Description specifies the stages in the machine's pipeline
++// Load Poll Page Constant
++instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
++%{
++  match(Set dst con);
 +
-+// Define the pipeline as a generic 6 stage pipeline
-+pipe_desc(S0, S1, S2, S3, S4, S5);
++  ins_cost(ALU_COST * 6);
++  format %{ "movptr  $dst, $con\t# Poll Page Ptr, #@loadConPollPage" %}
 +
-+//----------PIPELINE CLASSES---------------------------------------------------
-+// Pipeline Classes describe the stages in which input and output are
-+// referenced by the hardware pipeline.
++  ins_encode(riscv_enc_mov_poll_page(dst, con));
 +
-+pipe_class fp_dop_reg_reg_s(fRegF dst, fRegF src1, fRegF src2)
-+%{
-+  single_instruction;
-+  src1   : S1(read);
-+  src2   : S2(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
++  ins_pipe(ialu_imm);
 +%}
 +
-+pipe_class fp_dop_reg_reg_d(fRegD dst, fRegD src1, fRegD src2)
++// Load Byte Map Base Constant
++instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
 +%{
-+  src1   : S1(read);
-+  src2   : S2(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++  match(Set dst con);
++  ins_cost(ALU_COST);
++  format %{ "mv  $dst, $con\t# Byte Map Base, #@loadByteMapBase" %}
 +
-+pipe_class fp_uop_s(fRegF dst, fRegF src)
-+%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++  ins_encode(riscv_enc_mov_byte_map_base(dst));
 +
-+pipe_class fp_uop_d(fRegD dst, fRegD src)
-+%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
++  ins_pipe(ialu_imm);
 +%}
 +
-+pipe_class fp_d2f(fRegF dst, fRegD src)
++// Load Narrow Pointer Constant
++instruct loadConN(iRegNNoSp dst, immN con)
 +%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++  match(Set dst con);
 +
-+pipe_class fp_f2d(fRegD dst, fRegF src)
-+%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++  ins_cost(ALU_COST * 4);
++  format %{ "mv  $dst, $con\t# compressed ptr, #@loadConN" %}
 +
-+pipe_class fp_f2i(iRegINoSp dst, fRegF src)
-+%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++  ins_encode(riscv_enc_mov_n(dst, con));
 +
-+pipe_class fp_f2l(iRegLNoSp dst, fRegF src)
-+%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
++  ins_pipe(ialu_imm);
 +%}
 +
-+pipe_class fp_i2f(fRegF dst, iRegIorL2I src)
++// Load Narrow Null Pointer Constant
++instruct loadConN0(iRegNNoSp dst, immN0 con)
 +%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
++  match(Set dst con);
++
++  ins_cost(ALU_COST);
++  format %{ "mv  $dst, $con\t# compressed NULL ptr, #@loadConN0" %}
++
++  ins_encode(riscv_enc_mov_zero(dst));
++
++  ins_pipe(ialu_imm);
 +%}
 +
-+pipe_class fp_l2f(fRegF dst, iRegL src)
++// Load Narrow Klass Constant
++instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
 +%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
++  match(Set dst con);
++
++  ins_cost(ALU_COST * 6);
++  format %{ "mv  $dst, $con\t# compressed klass ptr, #@loadConNKlass" %}
++
++  ins_encode(riscv_enc_mov_nk(dst, con));
++
++  ins_pipe(ialu_imm);
 +%}
 +
-+pipe_class fp_d2i(iRegINoSp dst, fRegD src)
-+%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
-+
-+pipe_class fp_d2l(iRegLNoSp dst, fRegD src)
-+%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++// Load Float Constant
++instruct loadConF(fRegF dst, immF con) %{
++  match(Set dst con);
 +
-+pipe_class fp_i2d(fRegD dst, iRegIorL2I src)
-+%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++  ins_cost(LOAD_COST);
++  format %{
++    "flw $dst, [$constantaddress]\t# load from constant table: float=$con, #@loadConF"
++  %}
 +
-+pipe_class fp_l2d(fRegD dst, iRegIorL2I src)
-+%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++  ins_encode %{
++    __ flw(as_FloatRegister($dst$$reg), $constantaddress($con));
++  %}
 +
-+pipe_class fp_div_s(fRegF dst, fRegF src1, fRegF src2)
-+%{
-+  single_instruction;
-+  src1   : S1(read);
-+  src2   : S2(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
++  ins_pipe(fp_load_constant_s);
 +%}
 +
-+pipe_class fp_div_d(fRegD dst, fRegD src1, fRegD src2)
-+%{
-+  single_instruction;
-+  src1   : S1(read);
-+  src2   : S2(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++instruct loadConF0(fRegF dst, immF0 con) %{
++  match(Set dst con);
 +
-+pipe_class fp_sqrt_s(fRegF dst, fRegF src1, fRegF src2)
-+%{
-+  single_instruction;
-+  src1   : S1(read);
-+  src2   : S2(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++  ins_cost(XFER_COST);
 +
-+pipe_class fp_sqrt_d(fRegD dst, fRegD src1, fRegD src2)
-+%{
-+  single_instruction;
-+  src1   : S1(read);
-+  src2   : S2(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++  format %{ "fmv.w.x $dst, zr\t# float, #@loadConF0" %}
 +
-+pipe_class fp_load_constant_s(fRegF dst)
-+%{
-+  single_instruction;
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
-+%}
++  ins_encode %{
++    __ fmv_w_x(as_FloatRegister($dst$$reg), zr);
++  %}
 +
-+pipe_class fp_load_constant_d(fRegD dst)
-+%{
-+  single_instruction;
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
++  ins_pipe(fp_load_constant_s);
 +%}
 +
-+pipe_class fp_load_mem_s(fRegF dst, memory mem)
-+%{
-+  single_instruction;
-+  mem    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  LDST   : MEM;
-+%}
++// Load Double Constant
++instruct loadConD(fRegD dst, immD con) %{
++  match(Set dst con);
 +
-+pipe_class fp_load_mem_d(fRegD dst, memory mem)
-+%{
-+  single_instruction;
-+  mem    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  LDST   : MEM;
-+%}
++  ins_cost(LOAD_COST);
++  format %{
++    "fld $dst, [$constantaddress]\t# load from constant table: double=$con, #@loadConD"
++  %}
 +
-+pipe_class fp_store_reg_s(fRegF src, memory mem)
-+%{
-+  single_instruction;
-+  src    : S1(read);
-+  mem    : S5(write);
-+  DECODE : ID;
-+  LDST   : MEM;
-+%}
++  ins_encode %{
++    __ fld(as_FloatRegister($dst$$reg), $constantaddress($con));
++  %}
 +
-+pipe_class fp_store_reg_d(fRegD src, memory mem)
-+%{
-+  single_instruction;
-+  src    : S1(read);
-+  mem    : S5(write);
-+  DECODE : ID;
-+  LDST   : MEM;
++  ins_pipe(fp_load_constant_d);
 +%}
 +
-+//------- Integer ALU operations --------------------------
-+
-+// Integer ALU reg-reg operation
-+// Operands needs in ID, result generated in EX
-+// E.g.  ADD   Rd, Rs1, Rs2
-+pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
-+%{
-+  single_instruction;
-+  dst    : EX(write);
-+  src1   : ID(read);
-+  src2   : ID(read);
-+  DECODE : ID;
-+  ALU    : EX;
-+%}
++instruct loadConD0(fRegD dst, immD0 con) %{
++  match(Set dst con);
 +
-+// Integer ALU reg operation with constant shift
-+// E.g. SLLI    Rd, Rs1, #shift
-+pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
-+%{
-+  single_instruction;
-+  dst    : EX(write);
-+  src1   : ID(read);
-+  DECODE : ID;
-+  ALU    : EX;
-+%}
++  ins_cost(XFER_COST);
 +
-+// Integer ALU reg-reg operation with variable shift
-+// both operands must be available in ID
-+// E.g. SLL   Rd, Rs1, Rs2
-+pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
-+%{
-+  single_instruction;
-+  dst    : EX(write);
-+  src1   : ID(read);
-+  src2   : ID(read);
-+  DECODE : ID;
-+  ALU    : EX;
-+%}
++  format %{ "fmv.d.x $dst, zr\t# double, #@loadConD0" %}
 +
-+// Integer ALU reg operation
-+// E.g. NEG   Rd, Rs2
-+pipe_class ialu_reg(iRegI dst, iRegI src)
-+%{
-+  single_instruction;
-+  dst    : EX(write);
-+  src    : ID(read);
-+  DECODE : ID;
-+  ALU    : EX;
-+%}
++  ins_encode %{
++    __ fmv_d_x(as_FloatRegister($dst$$reg), zr);
++  %}
 +
-+// Integer ALU reg immediate operation
-+// E.g. ADDI   Rd, Rs1, #imm
-+pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
-+%{
-+  single_instruction;
-+  dst    : EX(write);
-+  src1   : ID(read);
-+  DECODE : ID;
-+  ALU    : EX;
++  ins_pipe(fp_load_constant_d);
 +%}
 +
-+// Integer ALU immediate operation (no source operands)
-+// E.g. LI    Rd, #imm
-+pipe_class ialu_imm(iRegI dst)
++// Store Instructions
++// Store CMS card-mark Immediate
++instruct storeimmCM0(immI0 zero, memory mem)
 +%{
-+  single_instruction;
-+  dst    : EX(write);
-+  DECODE : ID;
-+  ALU    : EX;
-+%}
++  match(Set mem (StoreCM mem zero));
++  predicate(unnecessary_storestore(n));
 +
-+//------- Multiply pipeline operations --------------------
++  ins_cost(STORE_COST);
++  format %{ "storestore (elided)\n\t"
++            "sb zr, $mem\t# byte, #@storeimmCM0" %}
 +
-+// Multiply reg-reg
-+// E.g. MULW   Rd, Rs1, Rs2
-+pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
-+%{
-+  single_instruction;
-+  dst    : WR(write);
-+  src1   : ID(read);
-+  src2   : ID(read);
-+  DECODE : ID;
-+  MUL    : WR;
-+%}
++  ins_encode %{
++    __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
++  %}
 +
-+// E.g. MUL   RD, Rs1, Rs2
-+pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
-+%{
-+  single_instruction;
-+  fixed_latency(3); // Maximum latency for 64 bit mul
-+  dst    : WR(write);
-+  src1   : ID(read);
-+  src2   : ID(read);
-+  DECODE : ID;
-+  MUL    : WR;
++  ins_pipe(istore_mem);
 +%}
 +
-+//------- Divide pipeline operations --------------------
-+
-+// E.g. DIVW   Rd, Rs1, Rs2
-+pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
++// Store CMS card-mark Immediate with intervening StoreStore
++// needed when using CMS with no conditional card marking
++instruct storeimmCM0_ordered(immI0 zero, memory mem)
 +%{
-+  single_instruction;
-+  fixed_latency(8); // Maximum latency for 32 bit divide
-+  dst    : WR(write);
-+  src1   : ID(read);
-+  src2   : ID(read);
-+  DECODE : ID;
-+  DIV    : WR;
-+%}
++  match(Set mem (StoreCM mem zero));
 +
-+// E.g. DIV   RD, Rs1, Rs2
-+pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
-+%{
-+  single_instruction;
-+  fixed_latency(16); // Maximum latency for 64 bit divide
-+  dst    : WR(write);
-+  src1   : ID(read);
-+  src2   : ID(read);
-+  DECODE : ID;
-+  DIV    : WR;
-+%}
++  ins_cost(ALU_COST + STORE_COST);
++  format %{ "membar(StoreStore)\n\t"
++            "sb zr, $mem\t# byte, #@storeimmCM0_ordered" %}
 +
-+//------- Load pipeline operations ------------------------
++  ins_encode %{
++    __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
++    __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
++  %}
 +
-+// Load - reg, mem
-+// E.g. LA    Rd, mem
-+pipe_class iload_reg_mem(iRegI dst, memory mem)
-+%{
-+  single_instruction;
-+  dst    : WR(write);
-+  mem    : ID(read);
-+  DECODE : ID;
-+  LDST   : MEM;
++  ins_pipe(istore_mem);
 +%}
 +
-+// Load - reg, reg
-+// E.g. LD    Rd, Rs
-+pipe_class iload_reg_reg(iRegI dst, iRegI src)
++// Store Byte
++instruct storeB(iRegIorL2I src, memory mem)
 +%{
-+  single_instruction;
-+  dst    : WR(write);
-+  src    : ID(read);
-+  DECODE : ID;
-+  LDST   : MEM;
-+%}
++  match(Set mem (StoreB mem src));
 +
-+//------- Store pipeline operations -----------------------
++  ins_cost(STORE_COST);
++  format %{ "sb  $src, $mem\t# byte, #@storeB" %}
 +
-+// Store - zr, mem
-+// E.g. SD    zr, mem
-+pipe_class istore_mem(memory mem)
-+%{
-+  single_instruction;
-+  mem    : ID(read);
-+  DECODE : ID;
-+  LDST   : MEM;
-+%}
++  ins_encode %{
++    __ sb(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
++  %}
 +
-+// Store - reg, mem
-+// E.g. SD    Rs, mem
-+pipe_class istore_reg_mem(iRegI src, memory mem)
-+%{
-+  single_instruction;
-+  mem    : ID(read);
-+  src    : EX(read);
-+  DECODE : ID;
-+  LDST   : MEM;
++  ins_pipe(istore_reg_mem);
 +%}
 +
-+// Store - reg, reg
-+// E.g. SD    Rs2, Rs1
-+pipe_class istore_reg_reg(iRegI dst, iRegI src)
++instruct storeimmB0(immI0 zero, memory mem)
 +%{
-+  single_instruction;
-+  dst    : ID(read);
-+  src    : EX(read);
-+  DECODE : ID;
-+  LDST   : MEM;
-+%}
++  match(Set mem (StoreB mem zero));
 +
-+//------- Store pipeline operations -----------------------
++  ins_cost(STORE_COST);
++  format %{ "sb zr, $mem\t# byte, #@storeimmB0" %}
 +
-+// Branch
-+pipe_class pipe_branch()
-+%{
-+  single_instruction;
-+  DECODE : ID;
-+  BRANCH : EX;
-+%}
++  ins_encode %{
++    __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
++  %}
 +
-+// Branch
-+pipe_class pipe_branch_reg(iRegI src)
-+%{
-+  single_instruction;
-+  src    : ID(read);
-+  DECODE : ID;
-+  BRANCH : EX;
++  ins_pipe(istore_mem);
 +%}
 +
-+// Compare & Branch
-+// E.g. BEQ   Rs1, Rs2, L
-+pipe_class pipe_cmp_branch(iRegI src1, iRegI src2)
++// Store Char/Short
++instruct storeC(iRegIorL2I src, memory mem)
 +%{
-+  single_instruction;
-+  src1   : ID(read);
-+  src2   : ID(read);
-+  DECODE : ID;
-+  BRANCH : EX;
-+%}
++  match(Set mem (StoreC mem src));
 +
-+// E.g. BEQZ Rs, L
-+pipe_class pipe_cmpz_branch(iRegI src)
-+%{
-+  single_instruction;
-+  src    : ID(read);
-+  DECODE : ID;
-+  BRANCH : EX;
-+%}
++  ins_cost(STORE_COST);
++  format %{ "sh  $src, $mem\t# short, #@storeC" %}
 +
-+//------- Synchronisation operations ----------------------
-+// Any operation requiring serialization
-+// E.g. FENCE/Atomic Ops/Load Acquire/Store Release
-+pipe_class pipe_serial()
-+%{
-+  single_instruction;
-+  force_serialization;
-+  fixed_latency(16);
-+  DECODE : ID;
-+  LDST   : MEM;
-+%}
++  ins_encode %{
++    __ sh(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
++  %}
 +
-+pipe_class pipe_slow()
-+%{
-+  instruction_count(10);
-+  multiple_bundles;
-+  force_serialization;
-+  fixed_latency(16);
-+  DECODE : ID;
-+  LDST   : MEM;
++  ins_pipe(istore_reg_mem);
 +%}
 +
-+// Empty pipeline class
-+pipe_class pipe_class_empty()
++instruct storeimmC0(immI0 zero, memory mem)
 +%{
-+  single_instruction;
-+  fixed_latency(0);
-+%}
++  match(Set mem (StoreC mem zero));
 +
-+// Default pipeline class.
-+pipe_class pipe_class_default()
-+%{
-+  single_instruction;
-+  fixed_latency(2);
-+%}
++  ins_cost(STORE_COST);
++  format %{ "sh  zr, $mem\t# short, #@storeimmC0" %}
 +
-+// Pipeline class for compares.
-+pipe_class pipe_class_compare()
-+%{
-+  single_instruction;
-+  fixed_latency(16);
-+%}
++  ins_encode %{
++    __ sh(zr, Address(as_Register($mem$$base), $mem$$disp));
++  %}
 +
-+// Pipeline class for memory operations.
-+pipe_class pipe_class_memory()
-+%{
-+  single_instruction;
-+  fixed_latency(16);
++  ins_pipe(istore_mem);
 +%}
 +
-+// Pipeline class for call.
-+pipe_class pipe_class_call()
++// Store Integer
++instruct storeI(iRegIorL2I src, memory mem)
 +%{
-+  single_instruction;
-+  fixed_latency(100);
-+%}
++  match(Set mem(StoreI mem src));
 +
-+// Define the class for the Nop node.
-+define %{
-+   MachNop = pipe_class_empty;
-+%}
-+%}
-+//----------INSTRUCTIONS-------------------------------------------------------
-+//
-+// match      -- States which machine-independent subtree may be replaced
-+//               by this instruction.
-+// ins_cost   -- The estimated cost of this instruction is used by instruction
-+//               selection to identify a minimum cost tree of machine
-+//               instructions that matches a tree of machine-independent
-+//               instructions.
-+// format     -- A string providing the disassembly for this instruction.
-+//               The value of an instruction's operand may be inserted
-+//               by referring to it with a '$' prefix.
-+// opcode     -- Three instruction opcodes may be provided.  These are referred
-+//               to within an encode class as $primary, $secondary, and $tertiary
-+//               rrspectively.  The primary opcode is commonly used to
-+//               indicate the type of machine instruction, while secondary
-+//               and tertiary are often used for prefix options or addressing
-+//               modes.
-+// ins_encode -- A list of encode classes with parameters. The encode class
-+//               name must have been defined in an 'enc_class' specification
-+//               in the encode section of the architecture description.
++  ins_cost(STORE_COST);
++  format %{ "sw  $src, $mem\t# int, #@storeI" %}
 +
-+// ============================================================================
-+// Memory (Load/Store) Instructions
++  ins_encode %{
++    Assembler::CompressibleRegion cr(&_masm);
++    __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
++  %}
 +
-+// Load Instructions
++  ins_pipe(istore_reg_mem);
++%}
 +
-+// Load Byte (8 bit signed)
-+instruct loadB(iRegINoSp dst, memory mem)
++instruct storeimmI0(immI0 zero, memory mem)
 +%{
-+  match(Set dst (LoadB mem));
++  match(Set mem(StoreI mem zero));
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "lb  $dst, $mem\t# byte, #@loadB" %}
++  ins_cost(STORE_COST);
++  format %{ "sw  zr, $mem\t# int, #@storeimmI0" %}
 +
 +  ins_encode %{
-+    __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++    __ sw(zr, Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(iload_reg_mem);
++  ins_pipe(istore_mem);
 +%}
 +
-+// Load Byte (8 bit signed) into long
-+instruct loadB2L(iRegLNoSp dst, memory mem)
++// Store Long (64 bit signed)
++instruct storeL(iRegL src, memory mem)
 +%{
-+  match(Set dst (ConvI2L (LoadB mem)));
++  match(Set mem (StoreL mem src));
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "lb  $dst, $mem\t# byte, #@loadB2L" %}
++  ins_cost(STORE_COST);
++  format %{ "sd  $src, $mem\t# long, #@storeL" %}
 +
 +  ins_encode %{
-+    __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(iload_reg_mem);
++  ins_pipe(istore_reg_mem);
 +%}
 +
-+// Load Byte (8 bit unsigned)
-+instruct loadUB(iRegINoSp dst, memory mem)
++// Store Long (64 bit signed)
++instruct storeimmL0(immL0 zero, memory mem)
 +%{
-+  match(Set dst (LoadUB mem));
++  match(Set mem (StoreL mem zero));
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "lbu  $dst, $mem\t# byte, #@loadUB" %}
++  ins_cost(STORE_COST);
++  format %{ "sd  zr, $mem\t# long, #@storeimmL0" %}
 +
 +  ins_encode %{
-+    __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++    __ sd(zr, Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(iload_reg_mem);
++  ins_pipe(istore_mem);
 +%}
 +
-+// Load Byte (8 bit unsigned) into long
-+instruct loadUB2L(iRegLNoSp dst, memory mem)
++// Store Pointer
++instruct storeP(iRegP src, memory mem)
 +%{
-+  match(Set dst (ConvI2L (LoadUB mem)));
++  match(Set mem (StoreP mem src));
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "lbu  $dst, $mem\t# byte, #@loadUB2L" %}
++  ins_cost(STORE_COST);
++  format %{ "sd  $src, $mem\t# ptr, #@storeP" %}
 +
 +  ins_encode %{
-+    __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(iload_reg_mem);
++  ins_pipe(istore_reg_mem);
 +%}
 +
-+// Load Short (16 bit signed)
-+instruct loadS(iRegINoSp dst, memory mem)
++// Store Pointer
++instruct storeimmP0(immP0 zero, memory mem)
 +%{
-+  match(Set dst (LoadS mem));
++  match(Set mem (StoreP mem zero));
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "lh  $dst, $mem\t# short, #@loadS" %}
++  ins_cost(STORE_COST);
++  format %{ "sd zr, $mem\t# ptr, #@storeimmP0" %}
 +
 +  ins_encode %{
-+    __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++    __ sd(zr, Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(iload_reg_mem);
++  ins_pipe(istore_mem);
 +%}
 +
-+// Load Short (16 bit signed) into long
-+instruct loadS2L(iRegLNoSp dst, memory mem)
++// Store Compressed Pointer
++instruct storeN(iRegN src, memory mem)
 +%{
-+  match(Set dst (ConvI2L (LoadS mem)));
++  match(Set mem (StoreN mem src));
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "lh  $dst, $mem\t# short, #@loadS2L" %}
++  ins_cost(STORE_COST);
++  format %{ "sw  $src, $mem\t# compressed ptr, #@storeN" %}
 +
 +  ins_encode %{
-+    __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(iload_reg_mem);
++  ins_pipe(istore_reg_mem);
 +%}
 +
-+// Load Char (16 bit unsigned)
-+instruct loadUS(iRegINoSp dst, memory mem)
++instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
 +%{
-+  match(Set dst (LoadUS mem));
++  match(Set mem (StoreN mem zero));
++  predicate(Universe::narrow_oop_base() == NULL &&
++            Universe::narrow_klass_base() == NULL);
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "lhu  $dst, $mem\t# short, #@loadUS" %}
++  ins_cost(STORE_COST);
++  format %{ "sw  rheapbase, $mem\t# compressed ptr (rheapbase==0), #@storeImmN0" %}
 +
 +  ins_encode %{
-+    __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++    __ sw(as_Register($heapbase$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(iload_reg_mem);
++  ins_pipe(istore_reg_mem);
 +%}
 +
-+// Load Short/Char (16 bit unsigned) into long
-+instruct loadUS2L(iRegLNoSp dst, memory mem)
++// Store Float
++instruct storeF(fRegF src, memory mem)
 +%{
-+  match(Set dst (ConvI2L (LoadUS mem)));
++  match(Set mem (StoreF mem src));
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "lhu  $dst, $mem\t# short, #@loadUS2L" %}
++  ins_cost(STORE_COST);
++  format %{ "fsw  $src, $mem\t# float, #@storeF" %}
 +
 +  ins_encode %{
-+    __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++    __ fsw(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(iload_reg_mem);
++  ins_pipe(fp_store_reg_s);
 +%}
 +
-+// Load Integer (32 bit signed)
-+instruct loadI(iRegINoSp dst, memory mem)
++// Store Double
++instruct storeD(fRegD src, memory mem)
 +%{
-+  match(Set dst (LoadI mem));
++  match(Set mem (StoreD mem src));
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "lw  $dst, $mem\t# int, #@loadI" %}
++  ins_cost(STORE_COST);
++  format %{ "fsd  $src, $mem\t# double, #@storeD" %}
 +
 +  ins_encode %{
 +    Assembler::CompressibleRegion cr(&_masm);
-+    __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++    __ fsd(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(iload_reg_mem);
++  ins_pipe(fp_store_reg_d);
 +%}
 +
-+// Load Integer (32 bit signed) into long
-+instruct loadI2L(iRegLNoSp dst, memory mem)
++// Store Compressed Klass Pointer
++instruct storeNKlass(iRegN src, memory mem)
 +%{
-+  match(Set dst (ConvI2L (LoadI mem)));
++  match(Set mem (StoreNKlass mem src));
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "lw  $dst, $mem\t# int, #@loadI2L" %}
++  ins_cost(STORE_COST);
++  format %{ "sw  $src, $mem\t# compressed klass ptr, #@storeNKlass" %}
 +
 +  ins_encode %{
 +    Assembler::CompressibleRegion cr(&_masm);
-+    __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++    __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(iload_reg_mem);
++  ins_pipe(istore_reg_mem);
 +%}
 +
-+// Load Integer (32 bit unsigned) into long
-+instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
++// ============================================================================
++// Atomic operation instructions
++//
++// Intel and SPARC both implement Ideal Node LoadPLocked and
++// Store{PIL}Conditional instructions using a normal load for the
++// LoadPLocked and a CAS for the Store{PIL}Conditional.
++//
++// The ideal code appears only to use LoadPLocked/storePConditional as a
++// pair to lock object allocations from Eden space when not using
++// TLABs.
++//
++// There does not appear to be a Load{IL}Locked Ideal Node and the
++// Ideal code appears to use Store{IL}Conditional as an alias for CAS
++// and to use StoreIConditional only for 32-bit and StoreLConditional
++// only for 64-bit.
++//
++// We implement LoadPLocked and storePConditional instructions using,
++// respectively the RISCV hw load-reserve and store-conditional
++// instructions. Whereas we must implement each of
++// Store{IL}Conditional using a CAS which employs a pair of
++// instructions comprising a load-reserve followed by a
++// store-conditional.
++
++
++// Locked-load (load reserved) of the current heap-top
++// used when updating the eden heap top
++// implemented using lr_d on RISCV64
++instruct loadPLocked(iRegPNoSp dst, indirect mem)
 +%{
-+  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
++  match(Set dst (LoadPLocked mem));
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "lwu  $dst, $mem\t# int, #@loadUI2L" %}
++  ins_cost(ALU_COST * 2 + LOAD_COST);
++
++  format %{ "lr.d $dst, $mem\t# ptr load reserved, #@loadPLocked" %}
 +
 +  ins_encode %{
-+    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++    __ la(t0, Address(as_Register($mem$$base), $mem$$disp));
++    __ lr_d($dst$$Register, t0, Assembler::aq);
 +  %}
 +
-+  ins_pipe(iload_reg_mem);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// Load Long (64 bit signed)
-+instruct loadL(iRegLNoSp dst, memory mem)
++// Conditional-store of the updated heap-top.
++// Used during allocation of the shared heap.
++// implemented using sc_d on RISCV64.
++instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
 +%{
-+  match(Set dst (LoadL mem));
++  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "ld  $dst, $mem\t# int, #@loadL" %}
++  ins_cost(ALU_COST * 2 + STORE_COST);
++
++  format %{
++    "sc_d t1, $newval $heap_top_ptr,\t# ptr store conditional, #@storePConditional"
++  %}
 +
 +  ins_encode %{
-+    Assembler::CompressibleRegion cr(&_masm);
-+    __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++    __ la(t0, Address(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp));
++    __ sc_d($cr$$Register, $newval$$Register, t0, Assembler::rl);
 +  %}
 +
-+  ins_pipe(iload_reg_mem);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// Load Range
-+instruct loadRange(iRegINoSp dst, memory mem)
++// storeLConditional is used by PhaseMacroExpand::expand_lock_node
++// when attempting to rebias a lock towards the current thread.  We
++// must use the acquire form of cmpxchg in order to guarantee acquire
++// semantics in this case.
++instruct storeLConditional(indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr)
 +%{
-+  match(Set dst (LoadRange mem));
++  match(Set cr (StoreLConditional mem (Binary oldval newval)));
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "lwu  $dst, $mem\t# range, #@loadRange" %}
++  ins_cost(LOAD_COST + STORE_COST + 2 * BRANCH_COST);
++
++  format %{
++    "cmpxchg t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
++    "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeLConditional"
++  %}
 +
 +  ins_encode %{
-+    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register);
++    __ xorr($cr$$Register,$cr$$Register, $oldval$$Register);
 +  %}
 +
-+  ins_pipe(iload_reg_mem);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Load Pointer
-+instruct loadP(iRegPNoSp dst, memory mem)
++// storeIConditional also has acquire semantics, for no better reason
++// than matching storeLConditional.
++instruct storeIConditional(indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr)
 +%{
-+  match(Set dst (LoadP mem));
-+  predicate(n->as_Load()->barrier_data() == 0);
++  match(Set cr (StoreIConditional mem (Binary oldval newval)));
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "ld  $dst, $mem\t# ptr, #@loadP" %}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2);
++
++  format %{
++    "cmpxchgw t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
++    "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeIConditional"
++  %}
 +
 +  ins_encode %{
-+    Assembler::CompressibleRegion cr(&_masm);
-+    __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
++               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register);
++    __ xorr($cr$$Register,$cr$$Register, $oldval$$Register);
 +  %}
 +
-+  ins_pipe(iload_reg_mem);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Load Compressed Pointer
-+instruct loadN(iRegNNoSp dst, memory mem)
++// standard CompareAndSwapX when we are using barriers
++// these have higher priority than the rules selected by a predicate
++instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                         iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 +%{
-+  match(Set dst (LoadN mem));
++  match(Set res (CompareAndSwapB mem (Binary oldval newval)));
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "lwu  $dst, $mem\t# loadN, compressed ptr, #@loadN" %}
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4);
++
++  effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
++
++  format %{
++    "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapB"
++  %}
 +
 +  ins_encode %{
-+    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
++                            Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
++                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
 +  %}
 +
-+  ins_pipe(iload_reg_mem);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Load Klass Pointer
-+instruct loadKlass(iRegPNoSp dst, memory mem)
++instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                         iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 +%{
-+  match(Set dst (LoadKlass mem));
++  match(Set res (CompareAndSwapS mem (Binary oldval newval)));
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "ld  $dst, $mem\t# class, #@loadKlass" %}
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4);
++
++  effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
++
++  format %{
++    "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapS"
++  %}
 +
 +  ins_encode %{
-+    Assembler::CompressibleRegion cr(&_masm);
-+    __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
++                            Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
++                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
 +  %}
 +
-+  ins_pipe(iload_reg_mem);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Load Narrow Klass Pointer
-+instruct loadNKlass(iRegNNoSp dst, memory mem)
++instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
 +%{
-+  match(Set dst (LoadNKlass mem));
++  match(Set res (CompareAndSwapI mem (Binary oldval newval)));
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "lwu  $dst, $mem\t# loadNKlass, compressed class ptr, #@loadNKlass" %}
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
 +
-+  ins_encode %{
-+    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++  format %{
++    "cmpxchg $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapI"
 +  %}
 +
-+  ins_pipe(iload_reg_mem);
++  ins_encode(riscv_enc_cmpxchgw(res, mem, oldval, newval));
++
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Load Float
-+instruct loadF(fRegF dst, memory mem)
++instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
 +%{
-+  match(Set dst (LoadF mem));
++  match(Set res (CompareAndSwapL mem (Binary oldval newval)));
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "flw  $dst, $mem\t# float, #@loadF" %}
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
 +
-+  ins_encode %{
-+    __ flw(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++  format %{
++    "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapL"
 +  %}
 +
-+  ins_pipe(fp_load_mem_s);
++  ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval));
++
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Load Double
-+instruct loadD(fRegD dst, memory mem)
++instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
 +%{
-+  match(Set dst (LoadD mem));
++  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
 +
-+  ins_cost(LOAD_COST);
-+  format %{ "fld  $dst, $mem\t# double, #@loadD" %}
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
 +
-+  ins_encode %{
-+    Assembler::CompressibleRegion cr(&_masm);
-+    __ fld(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++  format %{
++    "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapP"
 +  %}
 +
-+  ins_pipe(fp_load_mem_d);
++  ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval));
++
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Load Int Constant
-+instruct loadConI(iRegINoSp dst, immI src)
++instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
 +%{
-+  match(Set dst src);
++  match(Set res (CompareAndSwapN mem (Binary oldval newval)));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "li $dst, $src\t# int, #@loadConI" %}
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4);
 +
-+  ins_encode(riscv_enc_li_imm(dst, src));
++  format %{
++    "cmpxchg $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapN"
++  %}
 +
-+  ins_pipe(ialu_imm);
++  ins_encode(riscv_enc_cmpxchgn(res, mem, oldval, newval));
++
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Load Long Constant
-+instruct loadConL(iRegLNoSp dst, immL src)
++// alternative CompareAndSwapX when we are eliding barriers
++instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                            iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 +%{
-+  match(Set dst src);
-+
-+  ins_cost(ALU_COST);
-+  format %{ "li $dst, $src\t# long, #@loadConL" %}
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_encode(riscv_enc_li_imm(dst, src));
++  match(Set res (CompareAndSwapB mem (Binary oldval newval)));
 +
-+  ins_pipe(ialu_imm);
-+%}
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4);
 +
-+// Load Pointer Constant
-+instruct loadConP(iRegPNoSp dst, immP con)
-+%{
-+  match(Set dst con);
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
 +
-+  ins_cost(ALU_COST);
-+  format %{ "mv  $dst, $con\t# ptr, #@loadConP" %}
++  format %{
++    "cmpxchg_acq $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapBAcq"
++  %}
 +
-+  ins_encode(riscv_enc_mov_p(dst, con));
++  ins_encode %{
++    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
++                            Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
++                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++  %}
 +
-+  ins_pipe(ialu_imm);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Load Null Pointer Constant
-+instruct loadConP0(iRegPNoSp dst, immP0 con)
++instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                            iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 +%{
-+  match(Set dst con);
-+
-+  ins_cost(ALU_COST);
-+  format %{ "mv  $dst, $con\t# NULL ptr, #@loadConP0" %}
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_encode(riscv_enc_mov_zero(dst));
++  match(Set res (CompareAndSwapS mem (Binary oldval newval)));
 +
-+  ins_pipe(ialu_imm);
-+%}
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4);
 +
-+// Load Pointer Constant One
-+instruct loadConP1(iRegPNoSp dst, immP_1 con)
-+%{
-+  match(Set dst con);
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
 +
-+  ins_cost(ALU_COST);
-+  format %{ "mv  $dst, $con\t# load ptr constant one, #@loadConP1" %}
++  format %{
++    "cmpxchg_acq $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapSAcq"
++  %}
 +
-+  ins_encode(riscv_enc_mov_p1(dst));
++  ins_encode %{
++    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
++                            Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
++                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++  %}
 +
-+  ins_pipe(ialu_imm);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Load Byte Map Base Constant
-+instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
++instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
 +%{
-+  match(Set dst con);
-+  ins_cost(ALU_COST);
-+  format %{ "mv  $dst, $con\t# Byte Map Base, #@loadByteMapBase" %}
-+
-+  ins_encode(riscv_enc_mov_byte_map_base(dst));
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_pipe(ialu_imm);
-+%}
++  match(Set res (CompareAndSwapI mem (Binary oldval newval)));
 +
-+// Load Narrow Pointer Constant
-+instruct loadConN(iRegNNoSp dst, immN con)
-+%{
-+  match(Set dst con);
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
 +
-+  ins_cost(ALU_COST * 4);
-+  format %{ "mv  $dst, $con\t# compressed ptr, #@loadConN" %}
++  format %{
++    "cmpxchg_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapIAcq"
++  %}
 +
-+  ins_encode(riscv_enc_mov_n(dst, con));
++  ins_encode(riscv_enc_cmpxchgw_acq(res, mem, oldval, newval));
 +
-+  ins_pipe(ialu_imm);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Load Narrow Null Pointer Constant
-+instruct loadConN0(iRegNNoSp dst, immN0 con)
++instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
 +%{
-+  match(Set dst con);
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "mv  $dst, $con\t# compressed NULL ptr, #@loadConN0" %}
++  match(Set res (CompareAndSwapL mem (Binary oldval newval)));
 +
-+  ins_encode(riscv_enc_mov_zero(dst));
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
 +
-+  ins_pipe(ialu_imm);
-+%}
++  format %{
++    "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapLAcq"
++  %}
 +
-+// Load Narrow Klass Constant
-+instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
-+%{
-+  match(Set dst con);
++  ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval));
 +
-+  ins_cost(ALU_COST * 6);
-+  format %{ "mv  $dst, $con\t# compressed klass ptr, #@loadConNKlass" %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  ins_encode(riscv_enc_mov_nk(dst, con));
++instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_pipe(ialu_imm);
-+%}
++  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
 +
-+// Load Float Constant
-+instruct loadConF(fRegF dst, immF con) %{
-+  match(Set dst con);
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
 +
-+  ins_cost(LOAD_COST);
 +  format %{
-+    "flw $dst, [$constantaddress]\t# load from constant table: float=$con, #@loadConF"
++    "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapPAcq"
 +  %}
 +
-+  ins_encode %{
-+    __ flw(as_FloatRegister($dst$$reg), $constantaddress($con));
-+  %}
++  ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval));
 +
-+  ins_pipe(fp_load_constant_s);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct loadConF0(fRegF dst, immF0 con) %{
-+  match(Set dst con);
++instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(XFER_COST);
++  match(Set res (CompareAndSwapN mem (Binary oldval newval)));
 +
-+  format %{ "fmv.w.x $dst, zr\t# float, #@loadConF0" %}
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4);
 +
-+  ins_encode %{
-+    __ fmv_w_x(as_FloatRegister($dst$$reg), zr);
++  format %{
++    "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapNAcq"
 +  %}
 +
-+  ins_pipe(fp_load_constant_s);
++  ins_encode(riscv_enc_cmpxchgn_acq(res, mem, oldval, newval));
++
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Load Double Constant
-+instruct loadConD(fRegD dst, immD con) %{
-+  match(Set dst con);
++// Sundry CAS operations.  Note that release is always true,
++// regardless of the memory ordering of the CAS.  This is because we
++// need the volatile case to be sequentially consistent but there is
++// no trailing StoreLoad barrier emitted by C2.  Unfortunately we
++// can't check the type of memory ordering here, so we always emit a
++// sc_d(w) with rl bit set.
++instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                             iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
++%{
++  match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
++
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5);
++
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
 +
-+  ins_cost(LOAD_COST);
 +  format %{
-+    "fld $dst, [$constantaddress]\t# load from constant table: double=$con, #@loadConD"
++    "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeB"
 +  %}
 +
 +  ins_encode %{
-+    __ fld(as_FloatRegister($dst$$reg), $constantaddress($con));
++    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
++                            /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
++                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
 +  %}
 +
-+  ins_pipe(fp_load_constant_d);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct loadConD0(fRegD dst, immD0 con) %{
-+  match(Set dst con);
++instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                             iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
++%{
++  match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
 +
-+  ins_cost(XFER_COST);
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6);
 +
-+  format %{ "fmv.d.x $dst, zr\t# double, #@loadConD0" %}
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
 +
-+  ins_encode %{
-+    __ fmv_d_x(as_FloatRegister($dst$$reg), zr);
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeS"
 +  %}
 +
-+  ins_pipe(fp_load_constant_d);
-+%}
-+
-+// Store Instructions
-+// Store CMS card-mark Immediate
-+instruct storeimmCM0(immI0 zero, memory mem)
-+%{
-+  match(Set mem (StoreCM mem zero));
-+
-+  ins_cost(STORE_COST);
-+  format %{ "storestore (elided)\n\t"
-+            "sb zr, $mem\t# byte, #@storeimmCM0" %}
-+
 +  ins_encode %{
-+    __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
++    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
++                            /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
++                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
 +  %}
 +
-+  ins_pipe(istore_mem);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Store CMS card-mark Immediate with intervening StoreStore
-+// needed when using CMS with no conditional card marking
-+instruct storeimmCM0_ordered(immI0 zero, memory mem)
++instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
 +%{
-+  match(Set mem (StoreCM mem zero));
-+
-+  ins_cost(ALU_COST + STORE_COST);
-+  format %{ "membar(StoreStore)\n\t"
-+            "sb zr, $mem\t# byte, #@storeimmCM0_ordered" %}
-+
-+  ins_encode %{
-+    __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
-+    __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++  match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
 +
-+  ins_pipe(istore_mem);
-+%}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
 +
-+// Store Byte
-+instruct storeB(iRegIorL2I src, memory mem)
-+%{
-+  match(Set mem (StoreB mem src));
++  effect(TEMP_DEF res);
 +
-+  ins_cost(STORE_COST);
-+  format %{ "sb  $src, $mem\t# byte, #@storeB" %}
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeI"
++  %}
 +
 +  ins_encode %{
-+    __ sb(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
++               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
 +  %}
 +
-+  ins_pipe(istore_reg_mem);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct storeimmB0(immI0 zero, memory mem)
++instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval)
 +%{
-+  match(Set mem (StoreB mem zero));
-+
-+  ins_cost(STORE_COST);
-+  format %{ "sb zr, $mem\t# byte, #@storeimmB0" %}
-+
-+  ins_encode %{
-+    __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++  match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
 +
-+  ins_pipe(istore_mem);
-+%}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
 +
-+// Store Char/Short
-+instruct storeC(iRegIorL2I src, memory mem)
-+%{
-+  match(Set mem (StoreC mem src));
++  effect(TEMP_DEF res);
 +
-+  ins_cost(STORE_COST);
-+  format %{ "sh  $src, $mem\t# short, #@storeC" %}
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeL"
++  %}
 +
 +  ins_encode %{
-+    __ sh(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
 +  %}
 +
-+  ins_pipe(istore_reg_mem);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct storeimmC0(immI0 zero, memory mem)
++instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval)
 +%{
-+  match(Set mem (StoreC mem zero));
-+
-+  ins_cost(STORE_COST);
-+  format %{ "sh  zr, $mem\t# short, #@storeimmC0" %}
-+
-+  ins_encode %{
-+    __ sh(zr, Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++  match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
 +
-+  ins_pipe(istore_mem);
-+%}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 3);
 +
-+// Store Integer
-+instruct storeI(iRegIorL2I src, memory mem)
-+%{
-+  match(Set mem(StoreI mem src));
++  effect(TEMP_DEF res);
 +
-+  ins_cost(STORE_COST);
-+  format %{ "sw  $src, $mem\t# int, #@storeI" %}
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN"
++  %}
 +
 +  ins_encode %{
-+    Assembler::CompressibleRegion cr(&_masm);
-+    __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
++               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
 +  %}
 +
-+  ins_pipe(istore_reg_mem);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct storeimmI0(immI0 zero, memory mem)
++instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
 +%{
-+  match(Set mem(StoreI mem zero));
-+
-+  ins_cost(STORE_COST);
-+  format %{ "sw  zr, $mem\t# int, #@storeimmI0" %}
-+
-+  ins_encode %{
-+    __ sw(zr, Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
 +
-+  ins_pipe(istore_mem);
-+%}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
 +
-+// Store Long (64 bit signed)
-+instruct storeL(iRegL src, memory mem)
-+%{
-+  match(Set mem (StoreL mem src));
++  effect(TEMP_DEF res);
 +
-+  ins_cost(STORE_COST);
-+  format %{ "sd  $src, $mem\t# long, #@storeL" %}
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeP"
++  %}
 +
 +  ins_encode %{
-+    Assembler::CompressibleRegion cr(&_masm);
-+    __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
 +  %}
 +
-+  ins_pipe(istore_reg_mem);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Store Long (64 bit signed)
-+instruct storeimmL0(immL0 zero, memory mem)
++instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                                iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 +%{
-+  match(Set mem (StoreL mem zero));
-+
-+  ins_cost(STORE_COST);
-+  format %{ "sd  zr, $mem\t# long, #@storeimmL0" %}
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_encode %{
-+    __ sd(zr, Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++  match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
 +
-+  ins_pipe(istore_mem);
-+%}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5);
 +
-+// Store Pointer
-+instruct storeP(iRegP src, memory mem)
-+%{
-+  match(Set mem (StoreP mem src));
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
 +
-+  ins_cost(STORE_COST);
-+  format %{ "sd  $src, $mem\t# ptr, #@storeP" %}
++  format %{
++    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeBAcq"
++  %}
 +
 +  ins_encode %{
-+    Assembler::CompressibleRegion cr(&_masm);
-+    __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
++    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
++                            /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
++                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
 +  %}
 +
-+  ins_pipe(istore_reg_mem);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Store Pointer
-+instruct storeimmP0(immP0 zero, memory mem)
++instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                                iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 +%{
-+  match(Set mem (StoreP mem zero));
-+
-+  ins_cost(STORE_COST);
-+  format %{ "sd zr, $mem\t# ptr, #@storeimmP0" %}
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_encode %{
-+    __ sd(zr, Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++  match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
 +
-+  ins_pipe(istore_mem);
-+%}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6);
 +
-+// Store Compressed Pointer
-+instruct storeN(iRegN src, memory mem)
-+%{
-+  match(Set mem (StoreN mem src));
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
 +
-+  ins_cost(STORE_COST);
-+  format %{ "sw  $src, $mem\t# compressed ptr, #@storeN" %}
++  format %{
++    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeSAcq"
++  %}
 +
 +  ins_encode %{
-+    Assembler::CompressibleRegion cr(&_masm);
-+    __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
++    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
++                            /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
++                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
 +  %}
 +
-+  ins_pipe(istore_reg_mem);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
++instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
 +%{
-+  match(Set mem (StoreN mem zero));
-+
-+  ins_cost(STORE_COST);
-+  format %{ "sw  rheapbase, $mem\t# compressed ptr (rheapbase==0), #@storeImmN0" %}
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_encode %{
-+    __ sw(as_Register($heapbase$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++  match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
 +
-+  ins_pipe(istore_reg_mem);
-+%}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
 +
-+// Store Float
-+instruct storeF(fRegF src, memory mem)
-+%{
-+  match(Set mem (StoreF mem src));
++  effect(TEMP_DEF res);
 +
-+  ins_cost(STORE_COST);
-+  format %{ "fsw  $src, $mem\t# float, #@storeF" %}
++  format %{
++    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeIAcq"
++  %}
 +
 +  ins_encode %{
-+    __ fsw(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
++               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
 +  %}
 +
-+  ins_pipe(fp_store_reg_s);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Store Double
-+instruct storeD(fRegD src, memory mem)
++instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval)
 +%{
-+  match(Set mem (StoreD mem src));
-+
-+  ins_cost(STORE_COST);
-+  format %{ "fsd  $src, $mem\t# double, #@storeD" %}
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_encode %{
-+    Assembler::CompressibleRegion cr(&_masm);
-+    __ fsd(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++  match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
 +
-+  ins_pipe(fp_store_reg_d);
-+%}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
 +
-+// Store Compressed Klass Pointer
-+instruct storeNKlass(iRegN src, memory mem)
-+%{
-+  match(Set mem (StoreNKlass mem src));
++  effect(TEMP_DEF res);
 +
-+  ins_cost(STORE_COST);
-+  format %{ "sw  $src, $mem\t# compressed klass ptr, #@storeNKlass" %}
++  format %{
++    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeLAcq"
++  %}
 +
 +  ins_encode %{
-+    Assembler::CompressibleRegion cr(&_masm);
-+    __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
 +  %}
 +
-+  ins_pipe(istore_reg_mem);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// ============================================================================
-+// Atomic operation instructions
-+//
-+// Intel and SPARC both implement Ideal Node LoadPLocked and
-+// Store{PIL}Conditional instructions using a normal load for the
-+// LoadPLocked and a CAS for the Store{PIL}Conditional.
-+//
-+// The ideal code appears only to use LoadPLocked/storePConditional as a
-+// pair to lock object allocations from Eden space when not using
-+// TLABs.
-+//
-+// There does not appear to be a Load{IL}Locked Ideal Node and the
-+// Ideal code appears to use Store{IL}Conditional as an alias for CAS
-+// and to use StoreIConditional only for 32-bit and StoreLConditional
-+// only for 64-bit.
-+//
-+// We implement LoadPLocked and storePConditional instructions using,
-+// respectively the RISCV hw load-reserve and store-conditional
-+// instructions. Whereas we must implement each of
-+// Store{IL}Conditional using a CAS which employs a pair of
-+// instructions comprising a load-reserve followed by a
-+// store-conditional.
-+
-+
-+// Locked-load (load reserved) of the current heap-top
-+// used when updating the eden heap top
-+// implemented using lr_d on RISCV64
-+instruct loadPLocked(iRegPNoSp dst, indirect mem)
++instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval)
 +%{
-+  match(Set dst (LoadPLocked mem));
-+
-+  ins_cost(ALU_COST * 2 + LOAD_COST);
-+
-+  format %{ "lr.d $dst, $mem\t# ptr load reserved, #@loadPLocked" %}
-+
-+  ins_encode %{
-+    __ la(t0, Address(as_Register($mem$$base), $mem$$disp));
-+    __ lr_d($dst$$Register, t0, Assembler::aq);
-+  %}
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_pipe(pipe_serial);
-+%}
++  match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
 +
-+// Conditional-store of the updated heap-top.
-+// Used during allocation of the shared heap.
-+// implemented using sc_d on RISCV64.
-+instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
-+%{
-+  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
 +
-+  ins_cost(ALU_COST * 2 + STORE_COST);
++  effect(TEMP_DEF res);
 +
 +  format %{
-+    "sc_d t1, $newval $heap_top_ptr,\t# ptr store conditional, #@storePConditional"
++    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq"
 +  %}
 +
 +  ins_encode %{
-+    __ la(t0, Address(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp));
-+    __ sc_d($cr$$Register, $newval$$Register, t0, Assembler::rl);
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
++               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
++instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
 +%{
-+  match(Set cr (StoreLConditional mem (Binary oldval newval)));
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(LOAD_COST + STORE_COST + 2 * BRANCH_COST);
++  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
++
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
++
++  effect(TEMP_DEF res);
 +
 +  format %{
-+    "cmpxchg t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
-+    "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeLConditional"
++    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq"
 +  %}
 +
 +  ins_encode %{
 +    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register);
-+    __ xorr($cr$$Register,$cr$$Register, $oldval$$Register);
++               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
 +  %}
 +
 +  ins_pipe(pipe_slow);
 +%}
 +
-+// storeIConditional also has acquire semantics, for no better reason
-+// than matching storeLConditional.
-+instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
++instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                             iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 +%{
-+  match(Set cr (StoreIConditional mem (Binary oldval newval)));
++  match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2);
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6);
++
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
 +
 +  format %{
-+    "cmpxchgw t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
-+    "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeIConditional"
++    "cmpxchg_weak $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapB"
 +  %}
 +
 +  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
-+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register);
-+    __ xorr($cr$$Register,$cr$$Register, $oldval$$Register);
++    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
++                                 /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
++                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
 +  ins_pipe(pipe_slow);
 +%}
 +
-+// standard CompareAndSwapX when we are using barriers
-+// these have higher priority than the rules selected by a predicate
-+instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                         iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                             iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 +%{
-+  match(Set res (CompareAndSwapB mem (Binary oldval newval)));
++  match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
 +
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4);
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7);
 +
-+  effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
 +
 +  format %{
-+    "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapB"
++    "cmpxchg_weak $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapS"
 +  %}
 +
 +  ins_encode %{
-+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
-+                            Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
-+                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
++                                 /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
++                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
 +  ins_pipe(pipe_slow);
 +%}
 +
-+instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                         iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
 +%{
-+  match(Set res (CompareAndSwapS mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4);
++  match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
 +
-+  effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
 +
 +  format %{
-+    "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapS"
++    "cmpxchg_weak $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapI"
 +  %}
 +
 +  ins_encode %{
-+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
-+                            Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
-+                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
++                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
 +  ins_pipe(pipe_slow);
 +%}
 +
-+instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval)
++instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
 +%{
-+  match(Set res (CompareAndSwapI mem (Binary oldval newval)));
++  match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
 +
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
 +
 +  format %{
-+    "cmpxchg $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapI"
++    "cmpxchg_weak $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapL"
 +  %}
 +
-+  ins_encode(riscv_enc_cmpxchgw(res, mem, oldval, newval));
++  ins_encode %{
++    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
++  %}
 +
 +  ins_pipe(pipe_slow);
 +%}
 +
-+instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval)
++instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
 +%{
-+  match(Set res (CompareAndSwapL mem (Binary oldval newval)));
++  match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
 +
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4);
 +
 +  format %{
-+    "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapL"
++    "cmpxchg_weak $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapN"
 +  %}
 +
-+  ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval));
++  ins_encode %{
++    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
++                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
++  %}
 +
 +  ins_pipe(pipe_slow);
 +%}
 +
-+instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
++instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
 +%{
-+  predicate(n->as_LoadStore()->barrier_data() == 0);
-+
-+  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
++  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
 +
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
 +
 +  format %{
-+    "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapP"
++    "cmpxchg_weak $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapP"
 +  %}
 +
-+  ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval));
++  ins_encode %{
++    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
++  %}
 +
 +  ins_pipe(pipe_slow);
 +%}
 +
-+instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval)
++instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                                iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 +%{
-+  match(Set res (CompareAndSwapN mem (Binary oldval newval)));
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4);
++  match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
++
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6);
++
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
 +
 +  format %{
-+    "cmpxchg $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapN"
++    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapBAcq"
 +  %}
 +
-+  ins_encode(riscv_enc_cmpxchgn(res, mem, oldval, newval));
++  ins_encode %{
++    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
++                                 /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
++                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
++  %}
 +
 +  ins_pipe(pipe_slow);
 +%}
 +
-+// alternative CompareAndSwapX when we are eliding barriers
-+instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                            iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                                iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 +%{
 +  predicate(needs_acquiring_load_reserved(n));
 +
-+  match(Set res (CompareAndSwapB mem (Binary oldval newval)));
++  match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
 +
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4);
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7);
 +
 +  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
 +
 +  format %{
-+    "cmpxchg_acq $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapBAcq"
++    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapSAcq"
 +  %}
 +
 +  ins_encode %{
-+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
-+                            Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
-+                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
++                                 /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
++                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
 +  ins_pipe(pipe_slow);
 +%}
 +
-+instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                            iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
 +%{
 +  predicate(needs_acquiring_load_reserved(n));
 +
-+  match(Set res (CompareAndSwapS mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4);
++  match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
 +
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
 +
 +  format %{
-+    "cmpxchg_acq $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapSAcq"
++    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapIAcq"
 +  %}
 +
 +  ins_encode %{
-+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
-+                            Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
-+                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
++                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
 +  ins_pipe(pipe_slow);
 +%}
 +
-+instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval)
++instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
 +%{
 +  predicate(needs_acquiring_load_reserved(n));
 +
-+  match(Set res (CompareAndSwapI mem (Binary oldval newval)));
++  match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
 +
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
 +
 +  format %{
-+    "cmpxchg_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapIAcq"
++    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapLAcq"
 +  %}
 +
-+  ins_encode(riscv_enc_cmpxchgw_acq(res, mem, oldval, newval));
++  ins_encode %{
++    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
++  %}
 +
 +  ins_pipe(pipe_slow);
 +%}
 +
-+instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval)
++instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
 +%{
 +  predicate(needs_acquiring_load_reserved(n));
 +
-+  match(Set res (CompareAndSwapL mem (Binary oldval newval)));
++  match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
 +
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4);
 +
 +  format %{
-+    "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapLAcq"
++    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapNAcq"
 +  %}
 +
-+  ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval));
-+
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
-+%{
-+  predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));
-+
-+  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
-+
-+  format %{
-+    "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapPAcq"
++  ins_encode %{
++    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
++                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
-+  ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval));
-+
 +  ins_pipe(pipe_slow);
 +%}
 +
-+instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval)
++instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
 +%{
 +  predicate(needs_acquiring_load_reserved(n));
 +
-+  match(Set res (CompareAndSwapN mem (Binary oldval newval)));
++  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
 +
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4);
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
 +
 +  format %{
-+    "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapNAcq"
++    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapPAcq"
 +  %}
 +
-+  ins_encode(riscv_enc_cmpxchgn_acq(res, mem, oldval, newval));
++  ins_encode %{
++    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
++  %}
 +
 +  ins_pipe(pipe_slow);
 +%}
 +
-+// Sundry CAS operations.  Note that release is always true,
-+// regardless of the memory ordering of the CAS.  This is because we
-+// need the volatile case to be sequentially consistent but there is
-+// no trailing StoreLoad barrier emitted by C2.  Unfortunately we
-+// can't check the type of memory ordering here, so we always emit a
-+// sc_d(w) with rl bit set.
-+instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                             iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev)
 +%{
-+  match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5);
++  match(Set prev (GetAndSetI mem newv));
 +
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++  ins_cost(ALU_COST);
 +
-+  format %{
-+    "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeB"
-+  %}
++  format %{ "atomic_xchgw  $prev, $newv, [$mem]\t#@get_and_setI" %}
 +
 +  ins_encode %{
-+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
-+                            /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
-+                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++    __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                             iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev)
 +%{
-+  match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6);
++  match(Set prev (GetAndSetL mem newv));
 +
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++  ins_cost(ALU_COST);
 +
-+  format %{
-+    "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeS"
-+  %}
++  format %{ "atomic_xchg  $prev, $newv, [$mem]\t#@get_and_setL" %}
 +
 +  ins_encode %{
-+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
-+                            /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
-+                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
++instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev)
 +%{
-+  match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
++  match(Set prev (GetAndSetN mem newv));
 +
-+  effect(TEMP_DEF res);
++  ins_cost(ALU_COST);
 +
-+  format %{
-+    "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeI"
-+  %}
++  format %{ "atomic_xchgwu $prev, $newv, [$mem]\t#@get_and_setN" %}
 +
 +  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
-+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
++    __ atomic_xchgwu($prev$$Register, $newv$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval)
++instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev)
 +%{
-+  match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
++  match(Set prev (GetAndSetP mem newv));
 +
-+  effect(TEMP_DEF res);
++  ins_cost(ALU_COST);
 +
-+  format %{
-+    "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeL"
-+  %}
++  format %{ "atomic_xchg  $prev, $newv, [$mem]\t#@get_and_setP" %}
 +
 +  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
++    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval)
++instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev)
 +%{
-+  match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 3);
++  match(Set prev (GetAndSetI mem newv));
 +
-+  effect(TEMP_DEF res);
++  ins_cost(ALU_COST);
 +
-+  format %{
-+    "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN"
-+  %}
++  format %{ "atomic_xchgw_acq  $prev, $newv, [$mem]\t#@get_and_setIAcq" %}
 +
 +  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
-+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
++    __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
++instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev)
 +%{
-+  predicate(n->as_LoadStore()->barrier_data() == 0);
-+  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
++  match(Set prev (GetAndSetL mem newv));
 +
-+  effect(TEMP_DEF res);
++  ins_cost(ALU_COST);
 +
-+  format %{
-+    "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeP"
-+  %}
++  format %{ "atomic_xchg_acq  $prev, $newv, [$mem]\t#@get_and_setLAcq" %}
 +
 +  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
++    __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                                iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev)
 +%{
 +  predicate(needs_acquiring_load_reserved(n));
 +
-+  match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5);
++  match(Set prev (GetAndSetN mem newv));
 +
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++  ins_cost(ALU_COST);
 +
-+  format %{
-+    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeBAcq"
-+  %}
++  format %{ "atomic_xchgwu_acq $prev, $newv, [$mem]\t#@get_and_setNAcq" %}
 +
 +  ins_encode %{
-+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
-+                            /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
-+                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++    __ atomic_xchgalwu($prev$$Register, $newv$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                                iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev)
 +%{
 +  predicate(needs_acquiring_load_reserved(n));
 +
-+  match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6);
++  match(Set prev (GetAndSetP mem newv));
 +
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++  ins_cost(ALU_COST);
 +
-+  format %{
-+    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeSAcq"
-+  %}
++  format %{ "atomic_xchg_acq  $prev, $newv, [$mem]\t#@get_and_setPAcq" %}
 +
 +  ins_encode %{
-+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
-+                            /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
-+                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++    __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
++instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr)
 +%{
-+  predicate(needs_acquiring_load_reserved(n));
-+
-+  match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
++  match(Set newval (GetAndAddL mem incr));
 +
-+  effect(TEMP_DEF res);
++  ins_cost(ALU_COST);
 +
-+  format %{
-+    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeIAcq"
-+  %}
++  format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addL" %}
 +
 +  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
-+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
++    __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval)
++instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr)
 +%{
-+  predicate(needs_acquiring_load_reserved(n));
-+
-+  match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
++  predicate(n->as_LoadStore()->result_not_used());
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
++  match(Set dummy (GetAndAddL mem incr));
 +
-+  effect(TEMP_DEF res);
++  ins_cost(ALU_COST);
 +
-+  format %{
-+    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeLAcq"
-+  %}
++  format %{ "get_and_addL [$mem], $incr\t#@get_and_addL_no_res" %}
 +
 +  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
++    __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval)
++instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAdd incr)
 +%{
-+  predicate(needs_acquiring_load_reserved(n));
-+
-+  match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
++  match(Set newval (GetAndAddL mem incr));
 +
-+  effect(TEMP_DEF res);
++  ins_cost(ALU_COST);
 +
-+  format %{
-+    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq"
-+  %}
++  format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addLi" %}
 +
 +  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
-+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
++    __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
++instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAdd incr)
 +%{
-+  predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));
-+
-+  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
++  predicate(n->as_LoadStore()->result_not_used());
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
++  match(Set dummy (GetAndAddL mem incr));
 +
-+  effect(TEMP_DEF res);
++  ins_cost(ALU_COST);
 +
-+  format %{
-+    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq"
-+  %}
++  format %{ "get_and_addL [$mem], $incr\t#@get_and_addLi_no_res" %}
 +
 +  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
++    __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                             iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr)
 +%{
-+  match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6);
++  match(Set newval (GetAndAddI mem incr));
 +
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++  ins_cost(ALU_COST);
 +
-+  format %{
-+    "cmpxchg_weak $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapB"
-+  %}
++  format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addI" %}
 +
 +  ins_encode %{
-+    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
-+                                 /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
-+                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
-+    __ xori($res$$Register, $res$$Register, 1);
++    __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                             iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr)
 +%{
-+  match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
++  predicate(n->as_LoadStore()->result_not_used());
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7);
++  match(Set dummy (GetAndAddI mem incr));
 +
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++  ins_cost(ALU_COST);
 +
-+  format %{
-+    "cmpxchg_weak $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapS"
-+  %}
++  format %{ "get_and_addI [$mem], $incr\t#@get_and_addI_no_res" %}
 +
 +  ins_encode %{
-+    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
-+                                 /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
-+                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
-+    __ xori($res$$Register, $res$$Register, 1);
++    __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
++instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAdd incr)
 +%{
-+  match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
++  match(Set newval (GetAndAddI mem incr));
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
++  ins_cost(ALU_COST);
 +
-+  format %{
-+    "cmpxchg_weak $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapI"
-+  %}
++  format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addIi" %}
 +
 +  ins_encode %{
-+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
-+                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
-+    __ xori($res$$Register, $res$$Register, 1);
++    __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
++instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAdd incr)
 +%{
-+  match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
++  predicate(n->as_LoadStore()->result_not_used());
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
++  match(Set dummy (GetAndAddI mem incr));
 +
-+  format %{
-+    "cmpxchg_weak $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapL"
-+  %}
++  ins_cost(ALU_COST);
++
++  format %{ "get_and_addI [$mem], $incr\t#@get_and_addIi_no_res" %}
 +
 +  ins_encode %{
-+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
-+    __ xori($res$$Register, $res$$Register, 1);
++    __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
++instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr)
 +%{
-+  match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4);
++  match(Set newval (GetAndAddL mem incr));
 +
-+  format %{
-+    "cmpxchg_weak $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapN"
-+  %}
++  ins_cost(ALU_COST);
++
++  format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLAcq" %}
 +
 +  ins_encode %{
-+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
-+                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
-+    __ xori($res$$Register, $res$$Register, 1);
++    __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
-+%{
-+  predicate(n->as_LoadStore()->barrier_data() == 0);
-+  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
++instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{
++  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
++  match(Set dummy (GetAndAddL mem incr));
 +
-+  format %{
-+    "cmpxchg_weak $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapP"
-+  %}
++  ins_cost(ALU_COST);
++
++  format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addL_no_resAcq" %}
 +
 +  ins_encode %{
-+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
-+    __ xori($res$$Register, $res$$Register, 1);
++    __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                                iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAdd incr)
 +%{
 +  predicate(needs_acquiring_load_reserved(n));
 +
-+  match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6);
++  match(Set newval (GetAndAddL mem incr));
 +
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++  ins_cost(ALU_COST);
 +
-+  format %{
-+    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapBAcq"
-+  %}
++  format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLiAcq" %}
 +
 +  ins_encode %{
-+    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
-+                                 /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
-+                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
-+    __ xori($res$$Register, $res$$Register, 1);
++    __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                                iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAdd incr)
 +%{
-+  predicate(needs_acquiring_load_reserved(n));
-+
-+  match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
++  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7);
++  match(Set dummy (GetAndAddL mem incr));
 +
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++  ins_cost(ALU_COST);
 +
-+  format %{
-+    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapSAcq"
-+  %}
++  format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addLi_no_resAcq" %}
 +
 +  ins_encode %{
-+    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
-+                                 /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
-+                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
-+    __ xori($res$$Register, $res$$Register, 1);
++    __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
++instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr)
 +%{
 +  predicate(needs_acquiring_load_reserved(n));
 +
-+  match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
++  match(Set newval (GetAndAddI mem incr));
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
++  ins_cost(ALU_COST);
 +
-+  format %{
-+    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapIAcq"
-+  %}
++  format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIAcq" %}
 +
 +  ins_encode %{
-+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
-+                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
-+    __ xori($res$$Register, $res$$Register, 1);
++    __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
++instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr)
 +%{
-+  predicate(needs_acquiring_load_reserved(n));
++  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));
 +
-+  match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
++  match(Set dummy (GetAndAddI mem incr));
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
++  ins_cost(ALU_COST);
 +
-+  format %{
-+    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapLAcq"
-+  %}
++  format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addI_no_resAcq" %}
 +
 +  ins_encode %{
-+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
-+    __ xori($res$$Register, $res$$Register, 1);
++    __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
++instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAdd incr)
 +%{
 +  predicate(needs_acquiring_load_reserved(n));
 +
-+  match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
++  match(Set newval (GetAndAddI mem incr));
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4);
++  ins_cost(ALU_COST);
 +
-+  format %{
-+    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapNAcq"
-+  %}
++  format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIiAcq" %}
 +
 +  ins_encode %{
-+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
-+                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
-+    __ xori($res$$Register, $res$$Register, 1);
++    __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
++instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAdd incr)
 +%{
-+  predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));
++  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));
 +
-+  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
++  match(Set dummy (GetAndAddI mem incr));
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
++  ins_cost(ALU_COST);
 +
-+  format %{
-+    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapPAcq"
-+  %}
++  format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addIi_no_resAcq" %}
 +
 +  ins_encode %{
-+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
-+    __ xori($res$$Register, $res$$Register, 1);
++    __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev)
-+%{
-+  match(Set prev (GetAndSetI mem newv));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "atomic_xchgw  $prev, $newv, [$mem]\t#@get_and_setI" %}
-+
-+  ins_encode %{
-+    __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
-+%}
-+
-+instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev)
-+%{
-+  match(Set prev (GetAndSetL mem newv));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "atomic_xchg  $prev, $newv, [$mem]\t#@get_and_setL" %}
-+
-+  ins_encode %{
-+    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
-+%}
-+
-+instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev)
-+%{
-+  match(Set prev (GetAndSetN mem newv));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "atomic_xchgwu $prev, $newv, [$mem]\t#@get_and_setN" %}
-+
-+  ins_encode %{
-+    __ atomic_xchgwu($prev$$Register, $newv$$Register, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
-+%}
-+
-+instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev)
-+%{
-+  predicate(n->as_LoadStore()->barrier_data() == 0);
-+  match(Set prev (GetAndSetP mem newv));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "atomic_xchg  $prev, $newv, [$mem]\t#@get_and_setP" %}
-+
-+  ins_encode %{
-+    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
-+%}
-+
-+instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev)
-+%{
-+  predicate(needs_acquiring_load_reserved(n));
-+
-+  match(Set prev (GetAndSetI mem newv));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "atomic_xchgw_acq  $prev, $newv, [$mem]\t#@get_and_setIAcq" %}
-+
-+  ins_encode %{
-+    __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
-+%}
-+
-+instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev)
-+%{
-+  predicate(needs_acquiring_load_reserved(n));
-+
-+  match(Set prev (GetAndSetL mem newv));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "atomic_xchg_acq  $prev, $newv, [$mem]\t#@get_and_setLAcq" %}
-+
-+  ins_encode %{
-+    __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
-+%}
-+
-+instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev)
-+%{
-+  predicate(needs_acquiring_load_reserved(n));
-+
-+  match(Set prev (GetAndSetN mem newv));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "atomic_xchgwu_acq $prev, $newv, [$mem]\t#@get_and_setNAcq" %}
-+
-+  ins_encode %{
-+    __ atomic_xchgalwu($prev$$Register, $newv$$Register, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
-+%}
-+
-+instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev)
-+%{
-+  predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));
-+
-+  match(Set prev (GetAndSetP mem newv));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "atomic_xchg_acq  $prev, $newv, [$mem]\t#@get_and_setPAcq" %}
-+
-+  ins_encode %{
-+    __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
-+%}
-+
-+instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr)
-+%{
-+  match(Set newval (GetAndAddL mem incr));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addL" %}
-+
-+  ins_encode %{
-+    __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
-+%}
-+
-+instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr)
-+%{
-+  predicate(n->as_LoadStore()->result_not_used());
-+
-+  match(Set dummy (GetAndAddL mem incr));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "get_and_addL [$mem], $incr\t#@get_and_addL_no_res" %}
-+
-+  ins_encode %{
-+    __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
-+%}
-+
-+instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAdd incr)
-+%{
-+  match(Set newval (GetAndAddL mem incr));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addLi" %}
-+
-+  ins_encode %{
-+    __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
-+%}
-+
-+instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAdd incr)
-+%{
-+  predicate(n->as_LoadStore()->result_not_used());
-+
-+  match(Set dummy (GetAndAddL mem incr));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "get_and_addL [$mem], $incr\t#@get_and_addLi_no_res" %}
-+
-+  ins_encode %{
-+    __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
-+%}
-+
-+instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr)
-+%{
-+  match(Set newval (GetAndAddI mem incr));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addI" %}
-+
-+  ins_encode %{
-+    __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
-+%}
-+
-+instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr)
-+%{
-+  predicate(n->as_LoadStore()->result_not_used());
-+
-+  match(Set dummy (GetAndAddI mem incr));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "get_and_addI [$mem], $incr\t#@get_and_addI_no_res" %}
-+
-+  ins_encode %{
-+    __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
-+%}
-+
-+instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAdd incr)
-+%{
-+  match(Set newval (GetAndAddI mem incr));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addIi" %}
-+
-+  ins_encode %{
-+    __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
-+%}
-+
-+instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAdd incr)
-+%{
-+  predicate(n->as_LoadStore()->result_not_used());
-+
-+  match(Set dummy (GetAndAddI mem incr));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "get_and_addI [$mem], $incr\t#@get_and_addIi_no_res" %}
-+
-+  ins_encode %{
-+    __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
-+%}
-+
-+instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr)
-+%{
-+  predicate(needs_acquiring_load_reserved(n));
-+
-+  match(Set newval (GetAndAddL mem incr));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLAcq" %}
-+
-+  ins_encode %{
-+    __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
-+%}
-+
-+instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{
-+  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));
-+
-+  match(Set dummy (GetAndAddL mem incr));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addL_no_resAcq" %}
-+
-+  ins_encode %{
-+    __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
-+%}
-+
-+instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAdd incr)
-+%{
-+  predicate(needs_acquiring_load_reserved(n));
-+
-+  match(Set newval (GetAndAddL mem incr));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLiAcq" %}
-+
-+  ins_encode %{
-+    __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
-+%}
-+
-+instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAdd incr)
-+%{
-+  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));
-+
-+  match(Set dummy (GetAndAddL mem incr));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addLi_no_resAcq" %}
-+
-+  ins_encode %{
-+    __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
-+%}
-+
-+instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr)
-+%{
-+  predicate(needs_acquiring_load_reserved(n));
-+
-+  match(Set newval (GetAndAddI mem incr));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIAcq" %}
-+
-+  ins_encode %{
-+    __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
-+%}
-+
-+instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr)
-+%{
-+  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));
-+
-+  match(Set dummy (GetAndAddI mem incr));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addI_no_resAcq" %}
-+
-+  ins_encode %{
-+    __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
-+%}
-+
-+instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAdd incr)
-+%{
-+  predicate(needs_acquiring_load_reserved(n));
-+
-+  match(Set newval (GetAndAddI mem incr));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIiAcq" %}
-+
-+  ins_encode %{
-+    __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
-+%}
-+
-+instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAdd incr)
-+%{
-+  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));
-+
-+  match(Set dummy (GetAndAddI mem incr));
-+
-+  ins_cost(ALU_COST);
-+
-+  format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addIi_no_resAcq" %}
-+
-+  ins_encode %{
-+    __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base));
-+  %}
-+
-+  ins_pipe(pipe_serial);
++  ins_pipe(pipe_serial);
 +%}
 +
 +// ============================================================================
@@ -37285,7 +34798,7 @@ index 00000000000..588887e1d96
 +%}
 +
 +instruct sqrtF_reg(fRegF dst, fRegF src) %{
-+  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
++  match(Set dst (SqrtF src));
 +
 +  ins_cost(FSQRT_COST);
 +  format %{ "fsqrt.s  $dst, $src\t#@sqrtF_reg" %}
@@ -37643,7 +35156,6 @@ index 00000000000..588887e1d96
 +
 +instruct membar_storestore() %{
 +  match(MemBarStoreStore);
-+  match(StoreStoreFence);
 +  ins_cost(ALU_COST);
 +
 +  format %{ "MEMBAR-store-store\t#@membar_storestore" %}
@@ -37728,17 +35240,6 @@ index 00000000000..588887e1d96
 +  ins_pipe(pipe_class_empty);
 +%}
 +
-+instruct castLL(iRegL dst)
-+%{
-+  match(Set dst (CastLL dst));
-+
-+  size(0);
-+  format %{ "# castLL of $dst, #@castLL" %}
-+  ins_encode(/* empty encoding */);
-+  ins_cost(0);
-+  ins_pipe(pipe_class_empty);
-+%}
-+
 +instruct castII(iRegI dst)
 +%{
 +  match(Set dst (CastII dst));
@@ -37761,39 +35262,6 @@ index 00000000000..588887e1d96
 +  ins_pipe(pipe_class_empty);
 +%}
 +
-+instruct castFF(fRegF dst)
-+%{
-+  match(Set dst (CastFF dst));
-+
-+  size(0);
-+  format %{ "# castFF of $dst" %}
-+  ins_encode(/* empty encoding */);
-+  ins_cost(0);
-+  ins_pipe(pipe_class_empty);
-+%}
-+
-+instruct castDD(fRegD dst)
-+%{
-+  match(Set dst (CastDD dst));
-+
-+  size(0);
-+  format %{ "# castDD of $dst" %}
-+  ins_encode(/* empty encoding */);
-+  ins_cost(0);
-+  ins_pipe(pipe_class_empty);
-+%}
-+
-+instruct castVV(vReg dst)
-+%{
-+  match(Set dst (CastVV dst));
-+
-+  size(0);
-+  format %{ "# castVV of $dst" %}
-+  ins_encode(/* empty encoding */);
-+  ins_cost(0);
-+  ins_pipe(pipe_class_empty);
-+%}
-+
 +// ============================================================================
 +// Convert Instructions
 +
@@ -38029,7 +35497,7 @@ index 00000000000..588887e1d96
 +// in case of 32bit oops (heap < 4Gb).
 +instruct convN2I(iRegINoSp dst, iRegN src)
 +%{
-+  predicate(CompressedOops::shift() == 0);
++  predicate(Universe::narrow_oop_shift() == 0);
 +  match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 +
 +  ins_cost(ALU_COST);
@@ -38588,7 +36056,7 @@ index 00000000000..588887e1d96
 +  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpU_branch" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
@@ -38608,7 +36076,7 @@ index 00000000000..588887e1d96
 +  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpU_loop" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
@@ -38667,7 +36135,7 @@ index 00000000000..588887e1d96
 +  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpUL_branch" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
@@ -38686,7 +36154,7 @@ index 00000000000..588887e1d96
 +  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpUL_loop" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
@@ -38707,7 +36175,7 @@ index 00000000000..588887e1d96
 +  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpP_branch" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
@@ -38727,7 +36195,7 @@ index 00000000000..588887e1d96
 +  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpP_loop" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
@@ -38748,7 +36216,7 @@ index 00000000000..588887e1d96
 +  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpN_branch" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
@@ -38768,7 +36236,7 @@ index 00000000000..588887e1d96
 +  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpN_loop" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
@@ -38785,7 +36253,7 @@ index 00000000000..588887e1d96
 +  effect(USE lbl);
 +
 +  ins_cost(XFER_COST + BRANCH_COST);
-+  format %{ "float_b$cmp $op1, $op2 \t#@cmpF_branch"%}
++  format %{ "float_b$cmp $op1, $op2, $lbl \t#@cmpF_branch"%}
 +
 +  ins_encode %{
 +    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label));
@@ -38802,7 +36270,7 @@ index 00000000000..588887e1d96
 +  effect(USE lbl);
 +
 +  ins_cost(XFER_COST + BRANCH_COST);
-+  format %{ "float_b$cmp $op1, $op2\t#@cmpF_loop"%}
++  format %{ "float_b$cmp $op1, $op2, $lbl\t#@cmpF_loop"%}
 +
 +  ins_encode %{
 +    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label));
@@ -38820,10 +36288,10 @@ index 00000000000..588887e1d96
 +  effect(USE lbl);
 +
 +  ins_cost(XFER_COST + BRANCH_COST);
-+  format %{ "double_b$cmp $op1, $op2\t#@cmpD_branch"%}
++  format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_branch"%}
 +
 +  ins_encode %{
-+    __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
++    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
 +                        as_FloatRegister($op2$$reg), *($lbl$$label));
 +  %}
 +
@@ -38838,10 +36306,10 @@ index 00000000000..588887e1d96
 +  effect(USE lbl);
 +
 +  ins_cost(XFER_COST + BRANCH_COST);
-+  format %{ "double_b$cmp $op1, $op2\t#@cmpD_loop"%}
++  format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_loop"%}
 +
 +  ins_encode %{
-+    __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
++    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
 +                        as_FloatRegister($op2$$reg), *($lbl$$label));
 +  %}
 +
@@ -39113,7 +36581,7 @@ index 00000000000..588887e1d96
 +  effect(USE lbl);
 +
 +  ins_cost(BRANCH_COST);
-+  format %{ "far_b$cmp $cr, zr, L\t#@far_cmpFlag_branch"%}
++  format %{ "far_b$cmp $cr, zr, $lbl\t#@far_cmpFlag_branch"%}
 +
 +  ins_encode %{
 +    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label), /* is_far */ true);
@@ -39162,7 +36630,7 @@ index 00000000000..588887e1d96
 +  format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_branch" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
@@ -39177,7 +36645,7 @@ index 00000000000..588887e1d96
 +  format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_loop" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
@@ -39220,7 +36688,7 @@ index 00000000000..588887e1d96
 +  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpUL_branch" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
@@ -39235,7 +36703,7 @@ index 00000000000..588887e1d96
 +  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpUL_loop" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
@@ -39253,7 +36721,7 @@ index 00000000000..588887e1d96
 +  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpP_branch" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
@@ -39271,7 +36739,7 @@ index 00000000000..588887e1d96
 +  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpP_loop" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
@@ -39289,7 +36757,7 @@ index 00000000000..588887e1d96
 +  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpN_branch" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
@@ -39307,7 +36775,7 @@ index 00000000000..588887e1d96
 +  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpN_loop" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                  as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
@@ -39322,7 +36790,7 @@ index 00000000000..588887e1d96
 +  effect(USE lbl);
 +
 +  ins_cost(XFER_COST + BRANCH_COST * 2);
-+  format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_branch"%}
++  format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_branch"%}
 +
 +  ins_encode %{
 +    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
@@ -39338,7 +36806,7 @@ index 00000000000..588887e1d96
 +  effect(USE lbl);
 +
 +  ins_cost(XFER_COST + BRANCH_COST * 2);
-+  format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_loop"%}
++  format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_loop"%}
 +
 +  ins_encode %{
 +    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
@@ -39355,10 +36823,10 @@ index 00000000000..588887e1d96
 +  effect(USE lbl);
 +
 +  ins_cost(XFER_COST + BRANCH_COST * 2);
-+  format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_branch"%}
++  format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_branch"%}
 +
 +  ins_encode %{
-+    __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
++    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
 +                        as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
@@ -39371,10 +36839,10 @@ index 00000000000..588887e1d96
 +  effect(USE lbl);
 +
 +  ins_cost(XFER_COST + BRANCH_COST * 2);
-+  format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_loop"%}
++  format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_loop"%}
 +
 +  ins_encode %{
-+    __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
++    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
 +                        as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
@@ -39673,10 +37141,8 @@ index 00000000000..588887e1d96
 +  ins_cost(ALU_COST + BRANCH_COST);
 +
 +  format %{
-+             "bneg$cop $op1, $op2, skip\t#@cmovI_cmpI\n\t"
-+             "mv $dst, $src\n\t"
-+             "skip:"
-+         %}
++    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpI\n\t"
++  %}
 +
 +  ins_encode %{
 +    __ enc_cmove($cop$$cmpcode,
@@ -39684,7 +37150,7 @@ index 00000000000..588887e1d96
 +                 as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_compare);
 +%}
 +
 +instruct cmovI_cmpU(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOpU cop) %{
@@ -39692,18 +37158,16 @@ index 00000000000..588887e1d96
 +  ins_cost(ALU_COST + BRANCH_COST);
 +
 +  format %{
-+             "bneg$cop $op1, $op2, skip\t#@cmovI_cmpU\n\t"
-+             "mv $dst, $src\n\t"
-+             "skip:"
-+         %}
++    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpU\n\t"
++  %}
 +
 +  ins_encode %{
-+    __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
++    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
 +                 as_Register($op1$$reg), as_Register($op2$$reg),
 +                 as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_compare);
 +%}
 +
 +instruct cmovI_cmpL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOp cop) %{
@@ -39711,10 +37175,8 @@ index 00000000000..588887e1d96
 +  ins_cost(ALU_COST + BRANCH_COST);
 +
 +  format %{
-+             "bneg$cop $op1, $op2, skip\t#@cmovI_cmpL\n\t"
-+             "mv $dst, $src\n\t"
-+             "skip:"
-+         %}
++    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpL\n\t"
++  %}
 +
 +  ins_encode %{
 +    __ enc_cmove($cop$$cmpcode,
@@ -39722,7 +37184,24 @@ index 00000000000..588887e1d96
 +                 as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_compare);
++%}
++
++instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop) %{
++  match(Set dst (CMoveI (Binary cop (CmpUL op1 op2)) (Binary dst src)));
++  ins_cost(ALU_COST + BRANCH_COST);
++
++  format %{
++    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpUL\n\t"
++  %}
++
++  ins_encode %{
++    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
++                 as_Register($op1$$reg), as_Register($op2$$reg),
++                 as_Register($dst$$reg), as_Register($src$$reg));
++  %}
++
++  ins_pipe(pipe_class_compare);
 +%}
 +
 +instruct cmovL_cmpL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOp cop) %{
@@ -39730,10 +37209,8 @@ index 00000000000..588887e1d96
 +  ins_cost(ALU_COST + BRANCH_COST);
 +
 +  format %{
-+             "bneg$cop $op1, $op2, skip\t#@cmovL_cmpL\n\t"
-+             "mv $dst, $src\n\t"
-+             "skip:"
-+         %}
++    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpL\n\t"
++  %}
 +
 +  ins_encode %{
 +    __ enc_cmove($cop$$cmpcode,
@@ -39741,7 +37218,7 @@ index 00000000000..588887e1d96
 +                 as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_compare);
 +%}
 +
 +instruct cmovL_cmpUL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOpU cop) %{
@@ -39749,38 +37226,51 @@ index 00000000000..588887e1d96
 +  ins_cost(ALU_COST + BRANCH_COST);
 +
 +  format %{
-+             "bneg$cop $op1, $op2, skip\t#@cmovL_cmpUL\n\t"
-+             "mv $dst, $src\n\t"
-+             "skip:"
-+         %}
++    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpUL\n\t"
++  %}
 +
 +  ins_encode %{
-+    __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
++    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
 +                 as_Register($op1$$reg), as_Register($op2$$reg),
 +                 as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_compare);
 +%}
 +
-+instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop) %{
-+  match(Set dst (CMoveI (Binary cop (CmpUL op1 op2)) (Binary dst src)));
++instruct cmovL_cmpI(iRegLNoSp dst, iRegL src, iRegI op1, iRegI op2, cmpOp cop) %{
++  match(Set dst (CMoveL (Binary cop (CmpI op1 op2)) (Binary dst src)));
 +  ins_cost(ALU_COST + BRANCH_COST);
++
 +  format %{
-+             "bneg$cop $op1, $op2\t#@cmovI_cmpUL\n\t"
-+             "mv $dst, $src\n\t"
-+             "skip:"
-+         %}
++    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpI\n\t"
++  %}
 +
 +  ins_encode %{
-+    __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
++    __ enc_cmove($cop$$cmpcode,
 +                 as_Register($op1$$reg), as_Register($op2$$reg),
 +                 as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_compare);
 +%}
 +
++instruct cmovL_cmpU(iRegLNoSp dst, iRegL src, iRegI op1, iRegI op2, cmpOpU cop) %{
++  match(Set dst (CMoveL (Binary cop (CmpU op1 op2)) (Binary dst src)));
++  ins_cost(ALU_COST + BRANCH_COST);
++
++  format %{
++    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpU\n\t"
++  %}
++
++  ins_encode %{
++    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
++                 as_Register($op1$$reg), as_Register($op2$$reg),
++                 as_Register($dst$$reg), as_Register($src$$reg));
++  %}
++
++  ins_pipe(pipe_class_compare);
++%}
 +
 +// ============================================================================
 +// Procedure Call/Return Instructions
@@ -39920,7 +37410,7 @@ index 00000000000..588887e1d96
 +instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
 +                         iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
 +%{
-+  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
++  predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
 +  match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
 +  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
 +
@@ -39938,7 +37428,7 @@ index 00000000000..588887e1d96
 +instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
 +                         iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
 +%{
-+  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
++  predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
 +  match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
 +  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
 +
@@ -39955,7 +37445,7 @@ index 00000000000..588887e1d96
 +instruct string_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
 +                          iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
 +%{
-+  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
++  predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
 +  match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
 +  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
 +
@@ -39973,7 +37463,7 @@ index 00000000000..588887e1d96
 +                          iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3,
 +                          rFlagsReg cr)
 +%{
-+  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
++  predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
 +  match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
 +  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
 +
@@ -40119,7 +37609,6 @@ index 00000000000..588887e1d96
 +                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
 +%{
 +  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
-+  predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
 +  effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
 +         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
 +
@@ -40133,28 +37622,9 @@ index 00000000000..588887e1d96
 +%}
 +
 +
-+instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
-+                              iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
-+                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
-+%{
-+  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
-+  predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
-+  effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
-+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
-+
-+  format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
-+  ins_encode %{
-+    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
-+                           $result$$Register, $tmp1$$Register, $tmp2$$Register,
-+                           $tmp3$$Register, $tmp4$$Register, true /* isL */);
-+  %}
-+  ins_pipe(pipe_class_memory);
-+%}
-+
 +// clearing of an array
 +instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy)
 +%{
-+  predicate(!UseRVV);
 +  match(Set dummy (ClearArray cnt base));
 +  effect(USE_KILL cnt, USE_KILL base);
 +
@@ -40174,8 +37644,7 @@ index 00000000000..588887e1d96
 +
 +instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr)
 +%{
-+  predicate(!UseRVV && (uint64_t)n->in(2)->get_long()
-+            < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
++  predicate((uint64_t)n->in(2)->get_long() < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
 +  match(Set dummy (ClearArray cnt base));
 +  effect(USE_KILL base, KILL cr);
 +
@@ -40192,7 +37661,7 @@ index 00000000000..588887e1d96
 +instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
 +                        iRegI_R10 result, rFlagsReg cr)
 +%{
-+  predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
++  predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
 +  match(Set result (StrEquals (Binary str1 str2) cnt));
 +  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
 +
@@ -40208,7 +37677,7 @@ index 00000000000..588887e1d96
 +instruct string_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
 +                        iRegI_R10 result, rFlagsReg cr)
 +%{
-+  predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
++  predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
 +  match(Set result (StrEquals (Binary str1 str2) cnt));
 +  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
 +
@@ -40225,7 +37694,7 @@ index 00000000000..588887e1d96
 +                       iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3,
 +                       iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr)
 +%{
-+  predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
++  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
 +  match(Set result (AryEq ary1 ary2));
 +  effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr);
 +
@@ -40242,7 +37711,7 @@ index 00000000000..588887e1d96
 +                       iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3,
 +                       iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr)
 +%{
-+  predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
++  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
 +  match(Set result (AryEq ary1 ary2));
 +  effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr);
 +
@@ -40455,10 +37924,10 @@ index 00000000000..588887e1d96
 +// End:
 diff --git a/src/hotspot/cpu/riscv/riscv_b.ad b/src/hotspot/cpu/riscv/riscv_b.ad
 new file mode 100644
-index 00000000000..4488c1c4031
+index 0000000000..7dda004cd3
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/riscv_b.ad
-@@ -0,0 +1,527 @@
+@@ -0,0 +1,466 @@
 +//
 +// Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
 +// Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
@@ -40486,88 +37955,12 @@ index 00000000000..4488c1c4031
 +
 +// RISCV Bit-Manipulation Extension Architecture Description File
 +
-+instruct rorI_imm_rvb(iRegINoSp dst, iRegI src, immI shift) %{
-+  predicate(UseRVB);
-+  match(Set dst (RotateRight src shift));
-+
-+  format %{ "roriw  $dst, $src, ($shift & 0x1f)\t#@rorI_imm_rvb" %}
-+
-+  ins_cost(ALU_COST);
-+  ins_encode %{
-+    __ roriw(as_Register($dst$$reg), as_Register($src$$reg), $shift$$constant & 0x1f);
-+  %}
-+
-+  ins_pipe(ialu_reg_shift);
-+%}
-+
-+instruct rorL_imm_rvb(iRegLNoSp dst, iRegL src, immI shift) %{
-+  predicate(UseRVB);
-+  match(Set dst (RotateRight src shift));
-+
-+  format %{ "rori  $dst, $src, ($shift & 0x3f)\t#@rorL_imm_rvb" %}
-+
-+  ins_cost(ALU_COST);
-+  ins_encode %{
-+    __ rori(as_Register($dst$$reg), as_Register($src$$reg), $shift$$constant & 0x3f);
-+  %}
-+
-+  ins_pipe(ialu_reg_shift);
-+%}
-+
-+instruct rorI_reg_rvb(iRegINoSp dst, iRegI src, iRegI shift) %{
-+  predicate(UseRVB);
-+  match(Set dst (RotateRight src shift));
-+
-+  format %{ "rorw  $dst, $src, $shift\t#@rorI_reg_rvb" %}
-+  ins_cost(ALU_COST);
-+  ins_encode %{
-+    __ rorw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
-+  %}
-+  ins_pipe(ialu_reg_reg);
-+%}
-+
-+instruct rorL_reg_rvb(iRegLNoSp dst, iRegL src, iRegI shift) %{
-+  predicate(UseRVB);
-+  match(Set dst (RotateRight src shift));
-+
-+  format %{ "ror  $dst, $src, $shift\t#@rorL_reg_rvb" %}
-+  ins_cost(ALU_COST);
-+  ins_encode %{
-+    __ ror(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
-+  %}
-+  ins_pipe(ialu_reg_reg);
-+%}
-+
-+instruct rolI_reg_rvb(iRegINoSp dst, iRegI src, iRegI shift) %{
-+  predicate(UseRVB);
-+  match(Set dst (RotateLeft src shift));
-+
-+  format %{ "rolw  $dst, $src, $shift\t#@rolI_reg_rvb" %}
-+  ins_cost(ALU_COST);
-+  ins_encode %{
-+    __ rolw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
-+  %}
-+  ins_pipe(ialu_reg_reg);
-+%}
-+
-+instruct rolL_reg_rvb(iRegLNoSp dst, iRegL src, iRegI shift) %{
-+  predicate(UseRVB);
-+  match(Set dst (RotateLeft src shift));
-+
-+  format %{ "rol  $dst, $src, $shift\t#@rolL_reg_rvb" %}
-+  ins_cost(ALU_COST);
-+  ins_encode %{
-+    __ rol(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
-+  %}
-+  ins_pipe(ialu_reg_reg);
-+%}
-+
 +// Convert oop into int for vectors alignment masking
-+instruct convP2I_rvb(iRegINoSp dst, iRegP src) %{
-+  predicate(UseRVB);
++instruct convP2I_b(iRegINoSp dst, iRegP src) %{
++  predicate(UseZba);
 +  match(Set dst (ConvL2I (CastP2X src)));
 +
-+  format %{ "zext.w  $dst, $src\t# ptr -> int @convP2I_rvb" %}
++  format %{ "zext.w  $dst, $src\t# ptr -> int @convP2I_b" %}
 +
 +  ins_cost(ALU_COST);
 +  ins_encode %{
@@ -40578,11 +37971,11 @@ index 00000000000..4488c1c4031
 +%}
 +
 +// byte to int
-+instruct convB2I_reg_reg_rvb(iRegINoSp dst, iRegIorL2I src, immI_24 lshift, immI_24 rshift) %{
-+  predicate(UseRVB);
++instruct convB2I_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_24 lshift, immI_24 rshift) %{
++  predicate(UseZbb);
 +  match(Set dst (RShiftI (LShiftI src lshift) rshift));
 +
-+  format %{ "sext.b  $dst, $src\t# b2i, #@convB2I_reg_reg_rvb" %}
++  format %{ "sext.b  $dst, $src\t# b2i, #@convB2I_reg_reg_b" %}
 +
 +  ins_cost(ALU_COST);
 +  ins_encode %{
@@ -40593,11 +37986,11 @@ index 00000000000..4488c1c4031
 +%}
 +
 +// int to short
-+instruct convI2S_reg_reg_rvb(iRegINoSp dst, iRegIorL2I src, immI_16 lshift, immI_16 rshift) %{
-+  predicate(UseRVB);
++instruct convI2S_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_16 lshift, immI_16 rshift) %{
++  predicate(UseZbb);
 +  match(Set dst (RShiftI (LShiftI src lshift) rshift));
 +
-+  format %{ "sext.h  $dst, $src\t# i2s, #@convI2S_reg_reg_rvb" %}
++  format %{ "sext.h  $dst, $src\t# i2s, #@convI2S_reg_reg_b" %}
 +
 +  ins_cost(ALU_COST);
 +  ins_encode %{
@@ -40608,11 +38001,11 @@ index 00000000000..4488c1c4031
 +%}
 +
 +// short to unsigned int
-+instruct convS2UI_reg_reg_rvb(iRegINoSp dst, iRegIorL2I src, immI_16bits mask) %{
-+  predicate(UseRVB);
++instruct convS2UI_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_16bits mask) %{
++  predicate(UseZbb);
 +  match(Set dst (AndI src mask));
 +
-+  format %{ "zext.h  $dst, $src\t# s2ui, #@convS2UI_reg_reg_rvb" %}
++  format %{ "zext.h  $dst, $src\t# s2ui, #@convS2UI_reg_reg_b" %}
 +
 +  ins_cost(ALU_COST);
 +  ins_encode %{
@@ -40623,11 +38016,11 @@ index 00000000000..4488c1c4031
 +%}
 +
 +// int to unsigned long (zero extend)
-+instruct convI2UL_reg_reg_rvb(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) %{
-+  predicate(UseRVB);
++instruct convI2UL_reg_reg_b(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) %{
++  predicate(UseZba);
 +  match(Set dst (AndL (ConvI2L src) mask));
 +
-+  format %{ "zext.w  $dst, $src\t# i2ul, #@convI2UL_reg_reg_rvb" %}
++  format %{ "zext.w  $dst, $src\t# i2ul, #@convI2UL_reg_reg_b" %}
 +
 +  ins_cost(ALU_COST);
 +  ins_encode %{
@@ -40638,12 +38031,12 @@ index 00000000000..4488c1c4031
 +%}
 +
 +// BSWAP instructions
-+instruct bytes_reverse_int_rvb(iRegINoSp dst, iRegIorL2I src) %{
-+  predicate(UseRVB);
++instruct bytes_reverse_int_b(iRegINoSp dst, iRegIorL2I src) %{
++  predicate(UseZbb);
 +  match(Set dst (ReverseBytesI src));
 +
 +  ins_cost(ALU_COST * 2);
-+  format %{ "revb_w_w  $dst, $src\t#@bytes_reverse_int_rvb" %}
++  format %{ "revb_w_w  $dst, $src\t#@bytes_reverse_int_b" %}
 +
 +  ins_encode %{
 +    __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg));
@@ -40652,12 +38045,12 @@ index 00000000000..4488c1c4031
 +  ins_pipe(ialu_reg);
 +%}
 +
-+instruct bytes_reverse_long_rvb(iRegLNoSp dst, iRegL src) %{
-+  predicate(UseRVB);
++instruct bytes_reverse_long_b(iRegLNoSp dst, iRegL src) %{
++  predicate(UseZbb);
 +  match(Set dst (ReverseBytesL src));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "rev8  $dst, $src\t#@bytes_reverse_long_rvb" %}
++  format %{ "rev8  $dst, $src\t#@bytes_reverse_long_b" %}
 +
 +  ins_encode %{
 +    __ rev8(as_Register($dst$$reg), as_Register($src$$reg));
@@ -40666,12 +38059,12 @@ index 00000000000..4488c1c4031
 +  ins_pipe(ialu_reg);
 +%}
 +
-+instruct bytes_reverse_unsigned_short_rvb(iRegINoSp dst, iRegIorL2I src) %{
-+  predicate(UseRVB);
++instruct bytes_reverse_unsigned_short_b(iRegINoSp dst, iRegIorL2I src) %{
++  predicate(UseZbb);
 +  match(Set dst (ReverseBytesUS src));
 +
 +  ins_cost(ALU_COST * 2);
-+  format %{ "revb_h_h_u  $dst, $src\t#@bytes_reverse_unsigned_short_rvb" %}
++  format %{ "revb_h_h_u  $dst, $src\t#@bytes_reverse_unsigned_short_b" %}
 +
 +  ins_encode %{
 +    __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg));
@@ -40680,12 +38073,12 @@ index 00000000000..4488c1c4031
 +  ins_pipe(ialu_reg);
 +%}
 +
-+instruct bytes_reverse_short_rvb(iRegINoSp dst, iRegIorL2I src) %{
-+  predicate(UseRVB);
++instruct bytes_reverse_short_b(iRegINoSp dst, iRegIorL2I src) %{
++  predicate(UseZbb);
 +  match(Set dst (ReverseBytesS src));
 +
 +  ins_cost(ALU_COST * 2);
-+  format %{ "revb_h_h  $dst, $src\t#@bytes_reverse_short_rvb" %}
++  format %{ "revb_h_h  $dst, $src\t#@bytes_reverse_short_b" %}
 +
 +  ins_encode %{
 +    __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg));
@@ -40695,12 +38088,12 @@ index 00000000000..4488c1c4031
 +%}
 +
 +// Shift Add Pointer
-+instruct shaddP_reg_reg_rvb(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale imm) %{
-+  predicate(UseRVB);
++instruct shaddP_reg_reg_b(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale imm) %{
++  predicate(UseZba);
 +  match(Set dst (AddP src1 (LShiftL src2 imm)));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "shadd  $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_rvb" %}
++  format %{ "shadd  $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_b" %}
 +
 +  ins_encode %{
 +    __ shadd(as_Register($dst$$reg),
@@ -40713,12 +38106,12 @@ index 00000000000..4488c1c4031
 +  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct shaddP_reg_reg_ext_rvb(iRegPNoSp dst, iRegP src1, iRegI src2, immIScale imm) %{
-+  predicate(UseRVB);
++instruct shaddP_reg_reg_ext_b(iRegPNoSp dst, iRegP src1, iRegI src2, immIScale imm) %{
++  predicate(UseZba);
 +  match(Set dst (AddP src1 (LShiftL (ConvI2L src2) imm)));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "shadd  $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_ext_rvb" %}
++  format %{ "shadd  $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_ext_b" %}
 +
 +  ins_encode %{
 +    __ shadd(as_Register($dst$$reg),
@@ -40732,12 +38125,12 @@ index 00000000000..4488c1c4031
 +%}
 +
 +// Shift Add Long
-+instruct shaddL_reg_reg_rvb(iRegLNoSp dst, iRegL src1, iRegL src2, immIScale imm) %{
-+  predicate(UseRVB);
++instruct shaddL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immIScale imm) %{
++  predicate(UseZba);
 +  match(Set dst (AddL src1 (LShiftL src2 imm)));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "shadd  $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_rvb" %}
++  format %{ "shadd  $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_b" %}
 +
 +  ins_encode %{
 +    __ shadd(as_Register($dst$$reg),
@@ -40750,12 +38143,12 @@ index 00000000000..4488c1c4031
 +  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct shaddL_reg_reg_ext_rvb(iRegLNoSp dst, iRegL src1, iRegI src2, immIScale imm) %{
-+  predicate(UseRVB);
++instruct shaddL_reg_reg_ext_b(iRegLNoSp dst, iRegL src1, iRegI src2, immIScale imm) %{
++  predicate(UseZba);
 +  match(Set dst (AddL src1 (LShiftL (ConvI2L src2) imm)));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "shadd  $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_ext_rvb" %}
++  format %{ "shadd  $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_ext_b" %}
 +
 +  ins_encode %{
 +    __ shadd(as_Register($dst$$reg),
@@ -40769,12 +38162,12 @@ index 00000000000..4488c1c4031
 +%}
 +
 +// Zeros Count instructions
-+instruct countLeadingZerosI_rvb(iRegINoSp dst, iRegIorL2I src) %{
-+  predicate(UseRVB);
++instruct countLeadingZerosI_b(iRegINoSp dst, iRegIorL2I src) %{
++  predicate(UseZbb);
 +  match(Set dst (CountLeadingZerosI src));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "clzw  $dst, $src\t#@countLeadingZerosI_rvb" %}
++  format %{ "clzw  $dst, $src\t#@countLeadingZerosI_b" %}
 +
 +  ins_encode %{
 +    __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
@@ -40783,12 +38176,12 @@ index 00000000000..4488c1c4031
 +  ins_pipe(ialu_reg);
 +%}
 +
-+instruct countLeadingZerosL_rvb(iRegINoSp dst, iRegL src) %{
-+  predicate(UseRVB);
++instruct countLeadingZerosL_b(iRegINoSp dst, iRegL src) %{
++  predicate(UseZbb);
 +  match(Set dst (CountLeadingZerosL src));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "clz  $dst, $src\t#@countLeadingZerosL_rvb" %}
++  format %{ "clz  $dst, $src\t#@countLeadingZerosL_b" %}
 +
 +  ins_encode %{
 +    __ clz(as_Register($dst$$reg), as_Register($src$$reg));
@@ -40797,12 +38190,12 @@ index 00000000000..4488c1c4031
 +  ins_pipe(ialu_reg);
 +%}
 +
-+instruct countTrailingZerosI_rvb(iRegINoSp dst, iRegIorL2I src) %{
-+  predicate(UseRVB);
++instruct countTrailingZerosI_b(iRegINoSp dst, iRegIorL2I src) %{
++  predicate(UseZbb);
 +  match(Set dst (CountTrailingZerosI src));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "ctzw  $dst, $src\t#@countTrailingZerosI_rvb" %}
++  format %{ "ctzw  $dst, $src\t#@countTrailingZerosI_b" %}
 +
 +  ins_encode %{
 +    __ ctzw(as_Register($dst$$reg), as_Register($src$$reg));
@@ -40811,12 +38204,12 @@ index 00000000000..4488c1c4031
 +  ins_pipe(ialu_reg);
 +%}
 +
-+instruct countTrailingZerosL_rvb(iRegINoSp dst, iRegL src) %{
-+  predicate(UseRVB);
++instruct countTrailingZerosL_b(iRegINoSp dst, iRegL src) %{
++  predicate(UseZbb);
 +  match(Set dst (CountTrailingZerosL src));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "ctz  $dst, $src\t#@countTrailingZerosL_rvb" %}
++  format %{ "ctz  $dst, $src\t#@countTrailingZerosL_b" %}
 +
 +  ins_encode %{
 +    __ ctz(as_Register($dst$$reg), as_Register($src$$reg));
@@ -40826,12 +38219,12 @@ index 00000000000..4488c1c4031
 +%}
 +
 +// Population Count instructions
-+instruct popCountI_rvb(iRegINoSp dst, iRegIorL2I src) %{
++instruct popCountI_b(iRegINoSp dst, iRegIorL2I src) %{
 +  predicate(UsePopCountInstruction);
 +  match(Set dst (PopCountI src));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "cpopw  $dst, $src\t#@popCountI_rvb" %}
++  format %{ "cpopw  $dst, $src\t#@popCountI_b" %}
 +
 +  ins_encode %{
 +    __ cpopw(as_Register($dst$$reg), as_Register($src$$reg));
@@ -40841,12 +38234,12 @@ index 00000000000..4488c1c4031
 +%}
 +
 +// Note: Long/bitCount(long) returns an int.
-+instruct popCountL_rvb(iRegINoSp dst, iRegL src) %{
++instruct popCountL_b(iRegINoSp dst, iRegL src) %{
 +  predicate(UsePopCountInstruction);
 +  match(Set dst (PopCountL src));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "cpop  $dst, $src\t#@popCountL_rvb" %}
++  format %{ "cpop  $dst, $src\t#@popCountL_b" %}
 +
 +  ins_encode %{
 +    __ cpop(as_Register($dst$$reg), as_Register($src$$reg));
@@ -40856,12 +38249,12 @@ index 00000000000..4488c1c4031
 +%}
 +
 +// Max and Min
-+instruct minI_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2) %{
-+  predicate(UseRVB);
++instruct minI_reg_b(iRegINoSp dst, iRegI src1, iRegI src2) %{
++  predicate(UseZbb);
 +  match(Set dst (MinI src1 src2));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "min  $dst, $src1, $src2\t#@minI_reg_rvb" %}
++  format %{ "min  $dst, $src1, $src2\t#@minI_reg_b" %}
 +
 +  ins_encode %{
 +    __ min(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg));
@@ -40870,12 +38263,12 @@ index 00000000000..4488c1c4031
 +  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct maxI_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2) %{
-+  predicate(UseRVB);
++instruct maxI_reg_b(iRegINoSp dst, iRegI src1, iRegI src2) %{
++  predicate(UseZbb);
 +  match(Set dst (MaxI src1 src2));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "max  $dst, $src1, $src2\t#@maxI_reg_rvb" %}
++  format %{ "max  $dst, $src1, $src2\t#@maxI_reg_b" %}
 +
 +  ins_encode %{
 +    __ max(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg));
@@ -40885,14 +38278,14 @@ index 00000000000..4488c1c4031
 +%}
 +
 +// Abs
-+instruct absI_reg_rvb(iRegINoSp dst, iRegI src) %{
-+  predicate(UseRVB);
++instruct absI_reg_b(iRegINoSp dst, iRegI src) %{
++  predicate(UseZbb);
 +  match(Set dst (AbsI src));
 +
 +  ins_cost(ALU_COST * 2);
 +  format %{
 +    "negw  t0, $src\n\t"
-+    "max  $dst, $src, t0\t#@absI_reg_rvb"
++    "max  $dst, $src, t0\t#@absI_reg_b"
 +  %}
 +
 +  ins_encode %{
@@ -40903,14 +38296,14 @@ index 00000000000..4488c1c4031
 +  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct absL_reg_rvb(iRegLNoSp dst, iRegL src) %{
-+  predicate(UseRVB);
++instruct absL_reg_b(iRegLNoSp dst, iRegL src) %{
++  predicate(UseZbb);
 +  match(Set dst (AbsL src));
 +
 +  ins_cost(ALU_COST * 2);
 +  format %{
 +    "neg  t0, $src\n\t"
-+    "max $dst, $src, t0\t#@absL_reg_rvb"
++    "max  $dst, $src, t0\t#@absL_reg_b"
 +  %}
 +
 +  ins_encode %{
@@ -40922,12 +38315,12 @@ index 00000000000..4488c1c4031
 +%}
 +
 +// And Not
-+instruct andnI_reg_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{
-+  predicate(UseRVB);
++instruct andnI_reg_reg_b(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{
++  predicate(UseZbb);
 +  match(Set dst (AndI src1 (XorI src2 m1)));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "andn  $dst, $src1, $src2\t#@andnI_reg_reg_rvb" %}
++  format %{ "andn  $dst, $src1, $src2\t#@andnI_reg_reg_b" %}
 +
 +  ins_encode %{
 +    __ andn(as_Register($dst$$reg),
@@ -40938,12 +38331,12 @@ index 00000000000..4488c1c4031
 +  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct andnL_reg_reg_rvb(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{
-+  predicate(UseRVB);
++instruct andnL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{
++  predicate(UseZbb);
 +  match(Set dst (AndL src1 (XorL src2 m1)));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "andn  $dst, $src1, $src2\t#@andnL_reg_reg_rvb" %}
++  format %{ "andn  $dst, $src1, $src2\t#@andnL_reg_reg_b" %}
 +
 +  ins_encode %{
 +    __ andn(as_Register($dst$$reg),
@@ -40955,12 +38348,12 @@ index 00000000000..4488c1c4031
 +%}
 +
 +// Or Not
-+instruct ornI_reg_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{
-+  predicate(UseRVB);
++instruct ornI_reg_reg_b(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{
++  predicate(UseZbb);
 +  match(Set dst (OrI src1 (XorI src2 m1)));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "orn  $dst, $src1, $src2\t#@ornI_reg_reg_rvb" %}
++  format %{ "orn  $dst, $src1, $src2\t#@ornI_reg_reg_b" %}
 +
 +  ins_encode %{
 +    __ orn(as_Register($dst$$reg),
@@ -40971,12 +38364,12 @@ index 00000000000..4488c1c4031
 +  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct ornL_reg_reg_rvb(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{
-+  predicate(UseRVB);
++instruct ornL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{
++  predicate(UseZbb);
 +  match(Set dst (OrL src1 (XorL src2 m1)));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "orn  $dst, $src1, $src2\t#@ornL_reg_reg_rvb" %}
++  format %{ "orn  $dst, $src1, $src2\t#@ornL_reg_reg_b" %}
 +
 +  ins_encode %{
 +    __ orn(as_Register($dst$$reg),
@@ -40985,6723 +38378,4615 @@ index 00000000000..4488c1c4031
 +  %}
 +
 +  ins_pipe(ialu_reg_reg);
-+%}
-\ No newline at end of file
-diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad
-new file mode 100644
-index 00000000000..3828e096b21
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/riscv_v.ad
-@@ -0,0 +1,2065 @@
-+//
-+// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-+// Copyright (c) 2020, Arm Limited. All rights reserved.
-+// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+//
-+// This code is free software; you can redistribute it and/or modify it
-+// under the terms of the GNU General Public License version 2 only, as
-+// published by the Free Software Foundation.
-+//
-+// This code is distributed in the hope that it will be useful, but WITHOUT
-+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+// version 2 for more details (a copy is included in the LICENSE file that
-+// accompanied this code).
-+//
-+// You should have received a copy of the GNU General Public License version
-+// 2 along with this work; if not, write to the Free Software Foundation,
-+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+//
-+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+// or visit www.oracle.com if you need additional information or have any
-+// questions.
-+//
-+//
-+
-+// RISCV Vector Extension Architecture Description File
-+
-+opclass vmemA(indirect);
-+
-+source_hpp %{
-+  bool op_vec_supported(int opcode);
-+%}
-+
-+source %{
-+
-+  static void loadStore(C2_MacroAssembler masm, bool is_store,
-+                        VectorRegister reg, BasicType bt, Register base) {
-+    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
-+    masm.vsetvli(t0, x0, sew);
-+    if (is_store) {
-+      masm.vsex_v(reg, base, sew);
-+    } else {
-+      masm.vlex_v(reg, base, sew);
-+    }
-+  }
-+
-+  bool op_vec_supported(int opcode) {
-+    switch (opcode) {
-+      // No multiply reduction instructions
-+      case Op_MulReductionVD:
-+      case Op_MulReductionVF:
-+      case Op_MulReductionVI:
-+      case Op_MulReductionVL:
-+      // Others
-+      case Op_Extract:
-+      case Op_ExtractB:
-+      case Op_ExtractC:
-+      case Op_ExtractD:
-+      case Op_ExtractF:
-+      case Op_ExtractI:
-+      case Op_ExtractL:
-+      case Op_ExtractS:
-+      case Op_ExtractUB:
-+      // Vector API specific
-+      case Op_AndReductionV:
-+      case Op_OrReductionV:
-+      case Op_XorReductionV:
-+      case Op_LoadVectorGather:
-+      case Op_StoreVectorScatter:
-+      case Op_VectorBlend:
-+      case Op_VectorCast:
-+      case Op_VectorCastB2X:
-+      case Op_VectorCastD2X:
-+      case Op_VectorCastF2X:
-+      case Op_VectorCastI2X:
-+      case Op_VectorCastL2X:
-+      case Op_VectorCastS2X:
-+      case Op_VectorInsert:
-+      case Op_VectorLoadConst:
-+      case Op_VectorLoadMask:
-+      case Op_VectorLoadShuffle:
-+      case Op_VectorMaskCmp:
-+      case Op_VectorRearrange:
-+      case Op_VectorReinterpret:
-+      case Op_VectorStoreMask:
-+      case Op_VectorTest:
-+        return false;
-+      default:
-+        return UseRVV;
-+    }
-+  }
-+
-+%}
 +
-+definitions %{
-+  int_def VEC_COST             (200, 200);
 +%}
 +
-+// All VEC instructions
-+
-+// vector load/store
-+instruct loadV(vReg dst, vmemA mem) %{
-+  match(Set dst (LoadVector mem));
-+  ins_cost(VEC_COST);
-+  format %{ "vle $dst, $mem\t#@loadV" %}
-+  ins_encode %{
-+    VectorRegister dst_reg = as_VectorRegister($dst$$reg);
-+    loadStore(C2_MacroAssembler(&cbuf), false, dst_reg,
-+              Matcher::vector_element_basic_type(this), as_Register($mem$$base));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// AndI 0b0..010..0 + ConvI2B
++instruct convI2Bool_andI_reg_immIpowerOf2(iRegINoSp dst, iRegIorL2I src, immIpowerOf2 mask) %{
++  predicate(UseZbs);
++  match(Set dst (Conv2B (AndI src mask)));
++  ins_cost(ALU_COST);
 +
-+instruct storeV(vReg src, vmemA mem) %{
-+  match(Set mem (StoreVector mem src));
-+  ins_cost(VEC_COST);
-+  format %{ "vse $src, $mem\t#@storeV" %}
++  format %{ "bexti  $dst, $src, $mask\t#@convI2Bool_andI_reg_immIpowerOf2" %}
 +  ins_encode %{
-+    VectorRegister src_reg = as_VectorRegister($src$$reg);
-+    loadStore(C2_MacroAssembler(&cbuf), true, src_reg,
-+              Matcher::vector_element_basic_type(this, $src), as_Register($mem$$base));
++    __ bexti($dst$$Register, $src$$Register, exact_log2((juint)($mask$$constant)));
 +  %}
-+  ins_pipe(pipe_slow);
-+%}
 +
-+// vector abs
-+
-+instruct vabsB(vReg dst, vReg src, vReg tmp) %{
-+  match(Set dst (AbsVB src));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsB\n\t"
-+            "vmax.vv $dst, $tmp, $src" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
-+    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(ialu_reg_reg);
 +%}
+\ No newline at end of file
+diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+new file mode 100644
+index 0000000000..f41a496093
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+@@ -0,0 +1,2666 @@
++/*
++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+instruct vabsS(vReg dst, vReg src, vReg tmp) %{
-+  match(Set dst (AbsVS src));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsS\n\t"
-+            "vmax.vv $dst, $tmp, $src" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
-+    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/debugInfoRec.hpp"
++#include "code/icBuffer.hpp"
++#include "code/vtableStubs.hpp"
++#include "compiler/oopMap.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/interpreter.hpp"
++#include "logging/log.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_riscv.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "oops/klass.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/jniHandles.hpp"
++#include "runtime/safepointMechanism.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/signature.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/align.hpp"
++#include "utilities/formatBuffer.hpp"
++#include "vmreg_riscv.inline.hpp"
++#ifdef COMPILER1
++#include "c1/c1_Runtime1.hpp"
++#endif
++#ifdef COMPILER2
++#include "adfiles/ad_riscv.hpp"
++#include "opto/runtime.hpp"
++#endif
 +
-+instruct vabsI(vReg dst, vReg src, vReg tmp) %{
-+  match(Set dst (AbsVI src));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsI\n\t"
-+            "vmax.vv $dst, $tmp, $src" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
-+    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++#define __ masm->
 +
-+instruct vabsL(vReg dst, vReg src, vReg tmp) %{
-+  match(Set dst (AbsVL src));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsL\n\t"
-+            "vmax.vv $dst, $tmp, $src" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
-+    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
 +
-+instruct vabsF(vReg dst, vReg src) %{
-+  match(Set dst (AbsVF src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++class SimpleRuntimeFrame {
++public:
 +
-+instruct vabsD(vReg dst, vReg src) %{
-+  match(Set dst (AbsVD src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // Most of the runtime stubs have this simple frame layout.
++  // This class exists to make the layout shared in one place.
++  // Offsets are for compiler stack slots, which are jints.
++  enum layout {
++    // The frame sender code expects that fp will be in the "natural" place and
++    // will override any oopMap setting for it. We must therefore force the layout
++    // so that it agrees with the frame sender code.
++    // we don't expect any arg reg save area so riscv asserts that
++    // frame::arg_reg_save_area_bytes == 0
++    fp_off = 0, fp_off2,
++    return_off, return_off2,
++    framesize
++  };
++};
 +
-+// vector add
++class RegisterSaver {
++ public:
++  RegisterSaver() {}
++  ~RegisterSaver() {}
++  OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
++  void restore_live_registers(MacroAssembler* masm);
 +
-+instruct vaddB(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (AddVB src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddB" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vadd_vv(as_VectorRegister($dst$$reg),
-+               as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // Offsets into the register save area
++  // Used by deoptimization when it is managing result register
++  // values on its own
++  // gregs:28, float_register:32; except: x1(ra) & x2(sp) & gp(x3) & tp(x4)
++  // |---f0---|<---SP
++  // |---f1---|
++  // |   ..   |
++  // |---f31--|
++  // |---reserved slot for stack alignment---|
++  // |---x5---|
++  // |   x6   |
++  // |---.. --|
++  // |---x31--|
++  // |---fp---|
++  // |---ra---|
++  int f0_offset_in_bytes(void) {
++    return 0;
++  }
++  int reserved_slot_offset_in_bytes(void) {
++    return f0_offset_in_bytes() +
++           FloatRegisterImpl::max_slots_per_register *
++           FloatRegisterImpl::number_of_registers *
++           BytesPerInt;
++  }
 +
-+instruct vaddS(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (AddVS src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddS" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vadd_vv(as_VectorRegister($dst$$reg),
-+               as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  int reg_offset_in_bytes(Register r) {
++    assert (r->encoding() > 4, "ra, sp, gp and tp not saved");
++    return reserved_slot_offset_in_bytes() + (r->encoding() - 4 /* x1, x2, x3, x4 */) * wordSize;
++  }
 +
-+instruct vaddI(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (AddVI src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vadd_vv(as_VectorRegister($dst$$reg),
-+               as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  int freg_offset_in_bytes(FloatRegister f) {
++    return f0_offset_in_bytes() + f->encoding() * wordSize;
++  }
 +
-+instruct vaddL(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (AddVL src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddL" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vadd_vv(as_VectorRegister($dst$$reg),
-+               as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  int ra_offset_in_bytes(void) {
++    return reserved_slot_offset_in_bytes() +
++           (RegisterImpl::number_of_registers - 3) *
++           RegisterImpl::max_slots_per_register *
++           BytesPerInt;
++  }
++};
 +
-+instruct vaddF(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (AddVF src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfadd_vv(as_VectorRegister($dst$$reg),
-+                as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
++  assert_cond(masm != NULL && total_frame_words != NULL);
++  int frame_size_in_bytes = align_up(additional_frame_words * wordSize + ra_offset_in_bytes() + wordSize, 16);
++  // OopMap frame size is in compiler stack slots (jint's) not bytes or words
++  int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
++  // The caller will allocate additional_frame_words
++  int additional_frame_slots = additional_frame_words * wordSize / BytesPerInt;
++  // CodeBlob frame size is in words.
++  int frame_size_in_words = frame_size_in_bytes / wordSize;
++  *total_frame_words = frame_size_in_words;
 +
-+instruct vaddD(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (AddVD src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfadd_vv(as_VectorRegister($dst$$reg),
-+                as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // Save Integer and Float registers.
++  __ enter();
++  __ push_CPU_state();
 +
-+// vector and
++  // Set an oopmap for the call site.  This oopmap will map all
++  // oop-registers and debug-info registers as callee-saved.  This
++  // will allow deoptimization at this safepoint to find all possible
++  // debug-info recordings, as well as let GC find all oops.
 +
-+instruct vand(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (AndV src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vand.vv  $dst, $src1, $src2\t#@vand" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vand_vv(as_VectorRegister($dst$$reg),
-+               as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
++  assert_cond(oop_maps != NULL && oop_map != NULL);
 +
-+// vector or
++  int sp_offset_in_slots = 0;
++  int step_in_slots = 0;
 +
-+instruct vor(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (OrV src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vor.vv  $dst, $src1, $src2\t#@vor" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vor_vv(as_VectorRegister($dst$$reg),
-+              as_VectorRegister($src1$$reg),
-+              as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  step_in_slots = FloatRegisterImpl::max_slots_per_register;
++  for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
++    FloatRegister r = as_FloatRegister(i);
++    oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg());
++  }
 +
-+// vector xor
++  step_in_slots = RegisterImpl::max_slots_per_register;
++  // skip the slot reserved for alignment, see MacroAssembler::push_reg;
++  // also skip x5 ~ x6 on the stack because they are caller-saved registers.
++  sp_offset_in_slots += RegisterImpl::max_slots_per_register * 3;
++  // besides, we ignore x0 ~ x4 because push_CPU_state won't push them on the stack.
++  for (int i = 7; i < RegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
++    Register r = as_Register(i);
++    if (r != xthread) {
++      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots + additional_frame_slots), r->as_VMReg());
++    }
++  }
 +
-+instruct vxor(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (XorV src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vxor.vv  $dst, $src1, $src2\t#@vxor" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vxor_vv(as_VectorRegister($dst$$reg),
-+               as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  return oop_map;
++}
 +
-+// vector float div
++void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
++  assert_cond(masm != NULL);
++  __ pop_CPU_state();
++  __ leave();
++}
 +
-+instruct vdivF(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (DivVF src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfdiv.vv  $dst, $src1, $src2\t#@vdivF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfdiv_vv(as_VectorRegister($dst$$reg),
-+                as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// Is vector's size (in bytes) bigger than a size saved by default?
++bool SharedRuntime::is_wide_vector(int size) {
++  return false;
++}
 +
-+instruct vdivD(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (DivVD src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfdiv.vv  $dst, $src1, $src2\t#@vdivD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfdiv_vv(as_VectorRegister($dst$$reg),
-+                as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++size_t SharedRuntime::trampoline_size() {
++  return 6 * NativeInstruction::instruction_size;
++}
 +
-+// vector integer max/min
++void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
++  int32_t offset = 0;
++  __ movptr_with_offset(t0, destination, offset);
++  __ jalr(x0, t0, offset);
++}
 +
-+instruct vmax(vReg dst, vReg src1, vReg src2) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT &&
-+            n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE);
-+  match(Set dst (MaxV src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vmax.vv $dst, $src1, $src2\t#@vmax" %}
-+  ins_encode %{
-+    BasicType bt = Matcher::vector_element_basic_type(this);
-+    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
-+    __ vsetvli(t0, x0, sew);
-+    __ vmax_vv(as_VectorRegister($dst$$reg),
-+               as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// The java_calling_convention describes stack locations as ideal slots on
++// a frame with no abi restrictions. Since we must observe abi restrictions
++// (like the placement of the register window) the slots must be biased by
++// the following value.
++static int reg2offset_in(VMReg r) {
++  // Account for saved fp and ra
++  // This should really be in_preserve_stack_slots
++  return r->reg2stack() * VMRegImpl::stack_slot_size;
++}
 +
-+instruct vmin(vReg dst, vReg src1, vReg src2) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT &&
-+            n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE);
-+  match(Set dst (MinV src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vmin.vv $dst, $src1, $src2\t#@vmin" %}
-+  ins_encode %{
-+    BasicType bt = Matcher::vector_element_basic_type(this);
-+    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
-+    __ vsetvli(t0, x0, sew);
-+    __ vmin_vv(as_VectorRegister($dst$$reg),
-+               as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++static int reg2offset_out(VMReg r) {
++  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
++}
 +
-+// vector float-point max/min
++// ---------------------------------------------------------------------------
++// Read the array of BasicTypes from a signature, and compute where the
++// arguments should go.  Values in the VMRegPair regs array refer to 4-byte
++// quantities.  Values less than VMRegImpl::stack0 are registers, those above
++// refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
++// as framesizes are fixed.
++// VMRegImpl::stack0 refers to the first slot 0(sp).
++// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
++// up to RegisterImpl::number_of_registers) are the 64-bit
++// integer registers.
 +
-+instruct vmaxF(vReg dst, vReg src1, vReg src2) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
-+  match(Set dst (MaxV src1 src2));
-+  effect(TEMP_DEF dst);
-+  ins_cost(VEC_COST);
-+  format %{ "vmaxF $dst, $src1, $src2\t#@vmaxF" %}
-+  ins_encode %{
-+    __ minmax_FD_v(as_VectorRegister($dst$$reg),
-+                   as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
-+                   false /* is_double */, false /* is_min */);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// Note: the INPUTS in sig_bt are in units of Java argument words,
++// which are 64-bit.  The OUTPUTS are in 32-bit units.
 +
-+instruct vmaxD(vReg dst, vReg src1, vReg src2) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
-+  match(Set dst (MaxV src1 src2));
-+  effect(TEMP_DEF dst);
-+  ins_cost(VEC_COST);
-+  format %{ "vmaxD $dst, $src1, $src2\t#@vmaxD" %}
-+  ins_encode %{
-+    __ minmax_FD_v(as_VectorRegister($dst$$reg),
-+                   as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
-+                   true /* is_double */, false /* is_min */);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// The Java calling convention is a "shifted" version of the C ABI.
++// By skipping the first C ABI register we can call non-static jni
++// methods with small numbers of arguments without having to shuffle
++// the arguments at all. Since we control the java ABI we ought to at
++// least get some advantage out of it.
 +
-+instruct vminF(vReg dst, vReg src1, vReg src2) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
-+  match(Set dst (MinV src1 src2));
-+  effect(TEMP_DEF dst);
-+  ins_cost(VEC_COST);
-+  format %{ "vminF $dst, $src1, $src2\t#@vminF" %}
-+  ins_encode %{
-+    __ minmax_FD_v(as_VectorRegister($dst$$reg),
-+                   as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
-+                   false /* is_double */, true /* is_min */);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
++                                           VMRegPair *regs,
++                                           int total_args_passed,
++                                           int is_outgoing) {
++  // Create the mapping between argument positions and
++  // registers.
++  static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
++    j_rarg0, j_rarg1, j_rarg2, j_rarg3,
++    j_rarg4, j_rarg5, j_rarg6, j_rarg7
++  };
++  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = {
++    j_farg0, j_farg1, j_farg2, j_farg3,
++    j_farg4, j_farg5, j_farg6, j_farg7
++  };
 +
-+instruct vminD(vReg dst, vReg src1, vReg src2) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
-+  match(Set dst (MinV src1 src2));
-+  effect(TEMP_DEF dst);
-+  ins_cost(VEC_COST);
-+  format %{ "vminD $dst, $src1, $src2\t#@vminD" %}
-+  ins_encode %{
-+    __ minmax_FD_v(as_VectorRegister($dst$$reg),
-+                   as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
-+                   true /* is_double */, true /* is_min */);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  uint int_args = 0;
++  uint fp_args = 0;
++  uint stk_args = 0; // inc by 2 each time
 +
-+// vector fmla
++  for (int i = 0; i < total_args_passed; i++) {
++    switch (sig_bt[i]) {
++      case T_BOOLEAN: // fall through
++      case T_CHAR:    // fall through
++      case T_BYTE:    // fall through
++      case T_SHORT:   // fall through
++      case T_INT:
++        if (int_args < Argument::n_int_register_parameters_j) {
++          regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
++        } else {
++          regs[i].set1(VMRegImpl::stack2reg(stk_args));
++          stk_args += 2;
++        }
++        break;
++      case T_VOID:
++        // halves of T_LONG or T_DOUBLE
++        assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
++        regs[i].set_bad();
++        break;
++      case T_LONG:      // fall through
++        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
++      case T_OBJECT:    // fall through
++      case T_ARRAY:     // fall through
++      case T_ADDRESS:
++        if (int_args < Argument::n_int_register_parameters_j) {
++          regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
++        } else {
++          regs[i].set2(VMRegImpl::stack2reg(stk_args));
++          stk_args += 2;
++        }
++        break;
++      case T_FLOAT:
++        if (fp_args < Argument::n_float_register_parameters_j) {
++          regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
++        } else {
++          regs[i].set1(VMRegImpl::stack2reg(stk_args));
++          stk_args += 2;
++        }
++        break;
++      case T_DOUBLE:
++        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
++        if (fp_args < Argument::n_float_register_parameters_j) {
++          regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
++        } else {
++          regs[i].set2(VMRegImpl::stack2reg(stk_args));
++          stk_args += 2;
++        }
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++  }
 +
-+// dst_src1 = dst_src1 + src2 * src3
-+instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  return align_up(stk_args, 2);
++}
 +
-+// dst_src1 = dst_src1 + src2 * src3
-+instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// Patch the callers callsite with entry to compiled code if it exists.
++static void patch_callers_callsite(MacroAssembler *masm) {
++  assert_cond(masm != NULL);
++  Label L;
++  __ ld(t0, Address(xmethod, in_bytes(Method::code_offset())));
++  __ beqz(t0, L);
 +
-+// vector fmls
++  __ enter();
++  __ push_CPU_state();
 +
-+// dst_src1 = dst_src1 + -src2 * src3
-+// dst_src1 = dst_src1 + src2 * -src3
-+instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3)));
-+  match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3))));
-+  ins_cost(VEC_COST);
-+  format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // VM needs caller's callsite
++  // VM needs target method
++  // This needs to be a long call since we will relocate this adapter to
++  // the codeBuffer and it may not reach
 +
-+// dst_src1 = dst_src1 + -src2 * src3
-+// dst_src1 = dst_src1 + src2 * -src3
-+instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3)));
-+  match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3))));
-+  ins_cost(VEC_COST);
-+  format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++#ifndef PRODUCT
++  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
++#endif
 +
-+// vector fnmla
++  __ mv(c_rarg0, xmethod);
++  __ mv(c_rarg1, ra);
++  int32_t offset = 0;
++  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)), offset);
++  __ jalr(x1, t0, offset);
 +
-+// dst_src1 = -dst_src1 + -src2 * src3
-+// dst_src1 = -dst_src1 + src2 * -src3
-+instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3)));
-+  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3))));
-+  ins_cost(VEC_COST);
-+  format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  __ pop_CPU_state();
++  // restore sp
++  __ leave();
++  __ bind(L);
++}
 +
-+// dst_src1 = -dst_src1 + -src2 * src3
-+// dst_src1 = -dst_src1 + src2 * -src3
-+instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3)));
-+  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3))));
-+  ins_cost(VEC_COST);
-+  format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++static void gen_c2i_adapter(MacroAssembler *masm,
++                            int total_args_passed,
++                            int comp_args_on_stack,
++                            const BasicType *sig_bt,
++                            const VMRegPair *regs,
++                            Label& skip_fixup) {
++  // Before we get into the guts of the C2I adapter, see if we should be here
++  // at all.  We've come from compiled code and are attempting to jump to the
++  // interpreter, which means the caller made a static call to get here
++  // (vcalls always get a compiled target if there is one).  Check for a
++  // compiled target.  If there is one, we need to patch the caller's call.
++  patch_callers_callsite(masm);
 +
-+// vector fnmls
++  __ bind(skip_fixup);
 +
-+// dst_src1 = -dst_src1 + src2 * src3
-+instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  int words_pushed = 0;
 +
-+// dst_src1 = -dst_src1 + src2 * src3
-+instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // Since all args are passed on the stack, total_args_passed *
++  // Interpreter::stackElementSize is the space we need.
 +
-+// vector mla
++  int extraspace = total_args_passed * Interpreter::stackElementSize;
 +
-+// dst_src1 = dst_src1 + src2 * src3
-+instruct vmlaB(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaB" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  __ mv(x30, sp);
 +
-+// dst_src1 = dst_src1 + src2 * src3
-+instruct vmlaS(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaS" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // stack is aligned, keep it that way
++  extraspace = align_up(extraspace, 2 * wordSize);
 +
-+// dst_src1 = dst_src1 + src2 * src3
-+instruct vmlaI(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  if (extraspace) {
++    __ sub(sp, sp, extraspace);
++  }
 +
-+// dst_src1 = dst_src1 + src2 * src3
-+instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaL" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // Now write the args into the outgoing interpreter space
++  for (int i = 0; i < total_args_passed; i++) {
++    if (sig_bt[i] == T_VOID) {
++      assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half");
++      continue;
++    }
 +
-+// vector mls
++    // offset to start parameters
++    int st_off   = (total_args_passed - i - 1) * Interpreter::stackElementSize;
++    int next_off = st_off - Interpreter::stackElementSize;
 +
-+// dst_src1 = dst_src1 - src2 * src3
-+instruct vmlsB(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsB" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++    // Say 4 args:
++    // i   st_off
++    // 0   32 T_LONG
++    // 1   24 T_VOID
++    // 2   16 T_OBJECT
++    // 3    8 T_BOOL
++    // -    0 return address
++    //
++    // However to make thing extra confusing. Because we can fit a Java long/double in
++    // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
++    // leaves one slot empty and only stores to a single slot. In this case the
++    // slot that is occupied is the T_VOID slot. See I said it was confusing.
 +
-+// dst_src1 = dst_src1 - src2 * src3
-+instruct vmlsS(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsS" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++    VMReg r_1 = regs[i].first();
++    VMReg r_2 = regs[i].second();
++    if (!r_1->is_valid()) {
++      assert(!r_2->is_valid(), "");
++      continue;
++    }
++    if (r_1->is_stack()) {
++      // memory to memory use t0
++      int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size
++                    + extraspace
++                    + words_pushed * wordSize);
++      if (!r_2->is_valid()) {
++        __ lwu(t0, Address(sp, ld_off));
++        __ sd(t0, Address(sp, st_off), /*temp register*/esp);
++      } else {
++        __ ld(t0, Address(sp, ld_off), /*temp register*/esp);
 +
-+// dst_src1 = dst_src1 - src2 * src3
-+instruct vmlsI(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++        // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
++        // T_DOUBLE and T_LONG use two slots in the interpreter
++        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
++          // ld_off == LSW, ld_off+wordSize == MSW
++          // st_off == MSW, next_off == LSW
++          __ sd(t0, Address(sp, next_off), /*temp register*/esp);
++#ifdef ASSERT
++          // Overwrite the unused slot with known junk
++          __ li(t0, 0xdeadffffdeadaaaaul);
++          __ sd(t0, Address(sp, st_off), /*temp register*/esp);
++#endif /* ASSERT */
++        } else {
++          __ sd(t0, Address(sp, st_off), /*temp register*/esp);
++        }
++      }
++    } else if (r_1->is_Register()) {
++      Register r = r_1->as_Register();
++      if (!r_2->is_valid()) {
++        // must be only an int (or less ) so move only 32bits to slot
++        __ sd(r, Address(sp, st_off));
++      } else {
++        // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
++        // T_DOUBLE and T_LONG use two slots in the interpreter
++        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
++          // long/double in gpr
++#ifdef ASSERT
++          // Overwrite the unused slot with known junk
++          __ li(t0, 0xdeadffffdeadaaabul);
++          __ sd(t0, Address(sp, st_off), /*temp register*/esp);
++#endif /* ASSERT */
++          __ sd(r, Address(sp, next_off));
++        } else {
++          __ sd(r, Address(sp, st_off));
++        }
++      }
++    } else {
++      assert(r_1->is_FloatRegister(), "");
++      if (!r_2->is_valid()) {
++        // only a float use just part of the slot
++        __ fsw(r_1->as_FloatRegister(), Address(sp, st_off));
++      } else {
++#ifdef ASSERT
++        // Overwrite the unused slot with known junk
++        __ li(t0, 0xdeadffffdeadaaacul);
++        __ sd(t0, Address(sp, st_off), /*temp register*/esp);
++#endif /* ASSERT */
++        __ fsd(r_1->as_FloatRegister(), Address(sp, next_off));
++      }
++    }
++  }
 +
-+// dst_src1 = dst_src1 - src2 * src3
-+instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsL" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  __ mv(esp, sp); // Interp expects args on caller's expression stack
 +
-+// vector mul
++  __ ld(t0, Address(xmethod, in_bytes(Method::interpreter_entry_offset())));
++  __ jr(t0);
++}
 +
-+instruct vmulB(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (MulVB src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulB" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
++                                    int total_args_passed,
++                                    int comp_args_on_stack,
++                                    const BasicType *sig_bt,
++                                    const VMRegPair *regs) {
++  // Cut-out for having no stack args.
++  int comp_words_on_stack = align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, wordSize) >> LogBytesPerWord;
++  if (comp_args_on_stack != 0) {
++    __ sub(t0, sp, comp_words_on_stack * wordSize);
++    __ andi(sp, t0, -16);
++  }
 +
-+instruct vmulS(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (MulVS src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulS" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // Will jump to the compiled code just as if compiled code was doing it.
++  // Pre-load the register-jump target early, to schedule it better.
++  __ ld(t1, Address(xmethod, in_bytes(Method::from_compiled_offset())));
 +
-+instruct vmulI(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (MulVI src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // Now generate the shuffle code.
++  for (int i = 0; i < total_args_passed; i++) {
++    if (sig_bt[i] == T_VOID) {
++      assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half");
++      continue;
++    }
 +
-+instruct vmulL(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (MulVL src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulL" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++    // Pick up 0, 1 or 2 words from SP+offset.
 +
-+instruct vmulF(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (MulVF src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
++           "scrambled load targets?");
++    // Load in argument order going down.
++    int ld_off = (total_args_passed - i - 1) * Interpreter::stackElementSize;
++    // Point to interpreter value (vs. tag)
++    int next_off = ld_off - Interpreter::stackElementSize;
 +
-+instruct vmulD(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (MulVD src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++    VMReg r_1 = regs[i].first();
++    VMReg r_2 = regs[i].second();
++    if (!r_1->is_valid()) {
++      assert(!r_2->is_valid(), "");
++      continue;
++    }
++    if (r_1->is_stack()) {
++      // Convert stack slot to an SP offset (+ wordSize to account for return address )
++      int st_off = regs[i].first()->reg2stack() * VMRegImpl::stack_slot_size;
++      if (!r_2->is_valid()) {
++        __ lw(t0, Address(esp, ld_off));
++        __ sd(t0, Address(sp, st_off), /*temp register*/t2);
++      } else {
++        //
++        // We are using two optoregs. This can be either T_OBJECT,
++        // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
++        // two slots but only uses one for thr T_LONG or T_DOUBLE case
++        // So we must adjust where to pick up the data to match the
++        // interpreter.
++        //
++        // Interpreter local[n] == MSW, local[n+1] == LSW however locals
++        // are accessed as negative so LSW is at LOW address
 +
-+// vector fneg
++        // ld_off is MSW so get LSW
++        const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?
++                           next_off : ld_off;
++        __ ld(t0, Address(esp, offset));
++        // st_off is LSW (i.e. reg.first())
++        __ sd(t0, Address(sp, st_off), /*temp register*/t2);
++      }
++    } else if (r_1->is_Register()) {  // Register argument
++      Register r = r_1->as_Register();
++      if (r_2->is_valid()) {
++        //
++        // We are using two VMRegs. This can be either T_OBJECT,
++        // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
++        // two slots but only uses one for thr T_LONG or T_DOUBLE case
++        // So we must adjust where to pick up the data to match the
++        // interpreter.
 +
-+instruct vnegF(vReg dst, vReg src) %{
-+  match(Set dst (NegVF src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++        const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?
++                           next_off : ld_off;
 +
-+instruct vnegD(vReg dst, vReg src) %{
-+  match(Set dst (NegVD src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++        // this can be a misaligned move
++        __ ld(r, Address(esp, offset));
++      } else {
++        // sign extend and use a full word?
++        __ lw(r, Address(esp, ld_off));
++      }
++    } else {
++      if (!r_2->is_valid()) {
++        __ flw(r_1->as_FloatRegister(), Address(esp, ld_off));
++      } else {
++        __ fld(r_1->as_FloatRegister(), Address(esp, next_off));
++      }
++    }
++  }
 +
-+// popcount vector
++  // 6243940 We might end up in handle_wrong_method if
++  // the callee is deoptimized as we race thru here. If that
++  // happens we don't want to take a safepoint because the
++  // caller frame will look interpreted and arguments are now
++  // "compiled" so it is much better to make this transition
++  // invisible to the stack walking code. Unfortunately if
++  // we try and find the callee by normal means a safepoint
++  // is possible. So we stash the desired callee in the thread
++  // and the vm will find there should this case occur.
 +
-+instruct vpopcountI(iRegINoSp dst, vReg src) %{
-+  match(Set dst (PopCountVI src));
-+  format %{ "vpopc.m $dst, $src\t#@vpopcountI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vpopc_m(as_Register($dst$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  __ sd(xmethod, Address(xthread, JavaThread::callee_target_offset()));
 +
-+// vector add reduction
++  __ jr(t1);
++}
 +
-+instruct reduce_addB(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
-+  match(Set dst (AddReductionVI src1 src2));
-+  effect(TEMP tmp);
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addB\n\t"
-+            "vredsum.vs $tmp, $src2, $tmp\n\t"
-+            "vmv.x.s  $dst, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
-+    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
-+                  as_VectorRegister($tmp$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// ---------------------------------------------------------------
++AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
++                                                            int total_args_passed,
++                                                            int comp_args_on_stack,
++                                                            const BasicType *sig_bt,
++                                                            const VMRegPair *regs,
++                                                            AdapterFingerPrint* fingerprint) {
++  address i2c_entry = __ pc();
++  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
 +
-+instruct reduce_addS(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
-+  match(Set dst (AddReductionVI src1 src2));
-+  effect(TEMP tmp);
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addS\n\t"
-+            "vredsum.vs $tmp, $src2, $tmp\n\t"
-+            "vmv.x.s  $dst, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
-+    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
-+                  as_VectorRegister($tmp$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  address c2i_unverified_entry = __ pc();
++  Label skip_fixup;
 +
-+instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
-+  match(Set dst (AddReductionVI src1 src2));
-+  effect(TEMP tmp);
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addI\n\t"
-+            "vredsum.vs $tmp, $src2, $tmp\n\t"
-+            "vmv.x.s  $dst, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
-+    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
-+                  as_VectorRegister($tmp$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  Label ok;
 +
-+instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
-+  match(Set dst (AddReductionVL src1 src2));
-+  effect(TEMP tmp);
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addL\n\t"
-+            "vredsum.vs $tmp, $src2, $tmp\n\t"
-+            "vmv.x.s  $dst, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
-+    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
-+                  as_VectorRegister($tmp$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  const Register holder = t1;
++  const Register receiver = j_rarg0;
++  const Register tmp = t2;  // A call-clobbered register not used for arg passing
 +
-+instruct reduce_addF(fRegF src1_dst, vReg src2, vReg tmp) %{
-+  match(Set src1_dst (AddReductionVF src1_dst src2));
-+  effect(TEMP tmp);
-+  ins_cost(VEC_COST);
-+  format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addF\n\t"
-+            "vfredosum.vs $tmp, $src2, $tmp\n\t"
-+            "vfmv.f.s $src1_dst, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister);
-+    __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
-+                    as_VectorRegister($tmp$$reg));
-+    __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // -------------------------------------------------------------------------
++  // Generate a C2I adapter.  On entry we know xmethod holds the Method* during calls
++  // to the interpreter.  The args start out packed in the compiled layout.  They
++  // need to be unpacked into the interpreter layout.  This will almost always
++  // require some stack space.  We grow the current (compiled) stack, then repack
++  // the args.  We  finally end in a jump to the generic interpreter entry point.
++  // On exit from the interpreter, the interpreter will restore our SP (lest the
++  // compiled code, which relys solely on SP and not FP, get sick).
 +
-+instruct reduce_addD(fRegD src1_dst, vReg src2, vReg tmp) %{
-+  match(Set src1_dst (AddReductionVD src1_dst src2));
-+  effect(TEMP tmp);
-+  ins_cost(VEC_COST);
-+  format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addD\n\t"
-+            "vfredosum.vs $tmp, $src2, $tmp\n\t"
-+            "vfmv.f.s $src1_dst, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister);
-+    __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
-+                    as_VectorRegister($tmp$$reg));
-+    __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  {
++    __ block_comment("c2i_unverified_entry {");
++    __ load_klass(t0, receiver);
++    __ ld(tmp, Address(holder, CompiledICHolder::holder_klass_offset()));
++    __ ld(xmethod, Address(holder, CompiledICHolder::holder_metadata_offset()));
++    __ beq(t0, tmp, ok);
++    __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 +
-+// vector integer max reduction
-+instruct vreduce_maxB(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
-+  match(Set dst (MaxReductionV src1 src2));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vreduce_maxB $dst, $src1, $src2, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
-+    Label Ldone;
-+    __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
-+    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
-+    __ bind(Ldone);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct vreduce_maxS(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
-+  match(Set dst (MaxReductionV src1 src2));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vreduce_maxS $dst, $src1, $src2, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
-+    Label Ldone;
-+    __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
-+    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
-+    __ bind(Ldone);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct vreduce_maxI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
-+  match(Set dst (MaxReductionV src1 src2));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vreduce_maxI $dst, $src1, $src2, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
-+    __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct vreduce_maxL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
-+  match(Set dst (MaxReductionV src1 src2));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vreduce_maxL $dst, $src1, $src2, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
-+    __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+// vector integer min reduction
-+instruct vreduce_minB(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
-+  match(Set dst (MinReductionV src1 src2));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vreduce_minB $dst, $src1, $src2, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
-+    Label Ldone;
-+    __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
-+    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
-+    __ bind(Ldone);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct vreduce_minS(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
-+  match(Set dst (MinReductionV src1 src2));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vreduce_minS $dst, $src1, $src2, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
-+    Label Ldone;
-+    __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
-+    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
-+    __ bind(Ldone);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct vreduce_minI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
-+  match(Set dst (MinReductionV src1 src2));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vreduce_minI $dst, $src1, $src2, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
-+    __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct vreduce_minL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
-+  match(Set dst (MinReductionV src1 src2));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vreduce_minL $dst, $src1, $src2, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
-+    __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++    __ bind(ok);
++    // Method might have been compiled since the call site was patched to
++    // interpreted; if that is the case treat it as a miss so we can get
++    // the call site corrected.
++    __ ld(t0, Address(xmethod, in_bytes(Method::code_offset())));
++    __ beqz(t0, skip_fixup);
++    __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
++    __ block_comment("} c2i_unverified_entry");
++  }
 +
-+// vector float max reduction
++  address c2i_entry = __ pc();
 +
-+instruct vreduce_maxF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
-+  match(Set dst (MaxReductionV src1 src2));
-+  ins_cost(VEC_COST);
-+  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
-+  format %{ "reduce_maxF $dst, $src1, $src2, $tmp1, $tmp2" %}
-+  ins_encode %{
-+    __ reduce_minmax_FD_v($dst$$FloatRegister,
-+                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
-+                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
-+                          false /* is_double */, false /* is_min */);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
 +
-+instruct vreduce_maxD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
-+  match(Set dst (MaxReductionV src1 src2));
-+  ins_cost(VEC_COST);
-+  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
-+  format %{ "reduce_maxD $dst, $src1, $src2, $tmp1, $tmp2" %}
-+  ins_encode %{
-+    __ reduce_minmax_FD_v($dst$$FloatRegister,
-+                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
-+                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
-+                          true /* is_double */, false /* is_min */);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  __ flush();
++  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
++}
 +
-+// vector float min reduction
++int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
++                                         VMRegPair *regs,
++                                         VMRegPair *regs2,
++                                         int total_args_passed) {
++  assert(regs2 == NULL, "not needed on riscv");
 +
-+instruct vreduce_minF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
-+  match(Set dst (MinReductionV src1 src2));
-+  ins_cost(VEC_COST);
-+  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
-+  format %{ "reduce_minF $dst, $src1, $src2, $tmp1, $tmp2" %}
-+  ins_encode %{
-+    __ reduce_minmax_FD_v($dst$$FloatRegister,
-+                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
-+                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
-+                          false /* is_double */, true /* is_min */);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // We return the amount of VMRegImpl stack slots we need to reserve for all
++  // the arguments NOT counting out_preserve_stack_slots.
 +
-+instruct vreduce_minD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
-+  match(Set dst (MinReductionV src1 src2));
-+  ins_cost(VEC_COST);
-+  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
-+  format %{ "reduce_minD $dst, $src1, $src2, $tmp1, $tmp2" %}
-+  ins_encode %{
-+    __ reduce_minmax_FD_v($dst$$FloatRegister,
-+                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
-+                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
-+                          true /* is_double */, true /* is_min */);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
++    c_rarg0, c_rarg1, c_rarg2, c_rarg3,
++    c_rarg4, c_rarg5,  c_rarg6,  c_rarg7
++  };
++  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
++    c_farg0, c_farg1, c_farg2, c_farg3,
++    c_farg4, c_farg5, c_farg6, c_farg7
++  };
 +
-+// vector Math.rint, floor, ceil
++  uint int_args = 0;
++  uint fp_args = 0;
++  uint stk_args = 0; // inc by 2 each time
 +
-+instruct vroundD(vReg dst, vReg src, immI rmode) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
-+  match(Set dst (RoundDoubleModeV src rmode));
-+  format %{ "vroundD $dst, $src, $rmode" %}
-+  ins_encode %{
-+    switch ($rmode$$constant) {
-+      case RoundDoubleModeNode::rmode_rint:
-+        __ csrwi(CSR_FRM, C2_MacroAssembler::rne);
-+        __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
++  for (int i = 0; i < total_args_passed; i++) {
++    switch (sig_bt[i]) {
++      case T_BOOLEAN:  // fall through
++      case T_CHAR:     // fall through
++      case T_BYTE:     // fall through
++      case T_SHORT:    // fall through
++      case T_INT:
++        if (int_args < Argument::n_int_register_parameters_c) {
++          regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
++        } else {
++          regs[i].set1(VMRegImpl::stack2reg(stk_args));
++          stk_args += 2;
++        }
++        break;
++      case T_LONG:      // fall through
++        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
++      case T_OBJECT:    // fall through
++      case T_ARRAY:     // fall through
++      case T_ADDRESS:   // fall through
++      case T_METADATA:
++        if (int_args < Argument::n_int_register_parameters_c) {
++          regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
++        } else {
++          regs[i].set2(VMRegImpl::stack2reg(stk_args));
++          stk_args += 2;
++        }
++        break;
++      case T_FLOAT:
++        if (fp_args < Argument::n_float_register_parameters_c) {
++          regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
++        } else if (int_args < Argument::n_int_register_parameters_c) {
++          regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
++        } else {
++          regs[i].set1(VMRegImpl::stack2reg(stk_args));
++          stk_args += 2;
++        }
 +        break;
-+      case RoundDoubleModeNode::rmode_floor:
-+        __ csrwi(CSR_FRM, C2_MacroAssembler::rdn);
-+        __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
++      case T_DOUBLE:
++        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
++        if (fp_args < Argument::n_float_register_parameters_c) {
++          regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
++        } else if (int_args < Argument::n_int_register_parameters_c) {
++          regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
++        } else {
++          regs[i].set2(VMRegImpl::stack2reg(stk_args));
++          stk_args += 2;
++        }
 +        break;
-+      case RoundDoubleModeNode::rmode_ceil:
-+        __ csrwi(CSR_FRM, C2_MacroAssembler::rup);
-+        __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
++      case T_VOID: // Halves of longs and doubles
++        assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
++        regs[i].set_bad();
 +        break;
 +      default:
 +        ShouldNotReachHere();
-+        break;
 +    }
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+// vector replicate
-+
-+instruct replicateB(vReg dst, iRegIorL2I src) %{
-+  match(Set dst (ReplicateB src));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.x  $dst, $src\t#@replicateB" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct replicateS(vReg dst, iRegIorL2I src) %{
-+  match(Set dst (ReplicateS src));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.x  $dst, $src\t#@replicateS" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct replicateI(vReg dst, iRegIorL2I src) %{
-+  match(Set dst (ReplicateI src));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.x  $dst, $src\t#@replicateI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct replicateL(vReg dst, iRegL src) %{
-+  match(Set dst (ReplicateL src));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.x  $dst, $src\t#@replicateL" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct replicateB_imm5(vReg dst, immI5 con) %{
-+  match(Set dst (ReplicateB con));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.i  $dst, $con\t#@replicateB_imm5" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct replicateS_imm5(vReg dst, immI5 con) %{
-+  match(Set dst (ReplicateS con));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.i  $dst, $con\t#@replicateS_imm5" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct replicateI_imm5(vReg dst, immI5 con) %{
-+  match(Set dst (ReplicateI con));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.i  $dst, $con\t#@replicateI_imm5" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct replicateL_imm5(vReg dst, immL5 con) %{
-+  match(Set dst (ReplicateL con));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.i  $dst, $con\t#@replicateL_imm5" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct replicateF(vReg dst, fRegF src) %{
-+  match(Set dst (ReplicateF src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmv.v.f  $dst, $src\t#@replicateF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct replicateD(vReg dst, fRegD src) %{
-+  match(Set dst (ReplicateD src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmv.v.f  $dst, $src\t#@replicateD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+// vector shift
-+
-+instruct vasrB(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (RShiftVB src shift));
-+  ins_cost(VEC_COST);
-+  effect(TEMP_DEF dst);
-+  format %{ "vmsgtu.vi v0, $shift 7\t#@vasrB\n\t"
-+            "vsra.vi $dst, $src, 7, Assembler::v0_t\n\t"
-+            "vmnot.m v0, v0\n\t"
-+            "vsra.vv $dst, $src, $shift, Assembler::v0_t" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    // if shift > BitsPerByte - 1, clear the low BitsPerByte - 1 bits
-+    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
-+    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               BitsPerByte - 1, Assembler::v0_t);
-+    // otherwise, shift
-+    __ vmnot_m(v0, v0);
-+    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg), Assembler::v0_t);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct vasrS(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (RShiftVS src shift));
-+  ins_cost(VEC_COST);
-+  effect(TEMP_DEF dst);
-+  format %{ "vmsgtu.vi v0, $shift, 15\t#@vasrS\n\t"
-+            "vsra.vi $dst, $src, 15, Assembler::v0_t\n\t"
-+            "vmnot.m v0, v0\n\t"
-+            "vsra.vv $dst, $src, $shift, Assembler::v0_t" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    // if shift > BitsPerShort - 1, clear the low BitsPerShort - 1 bits
-+    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
-+    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               BitsPerShort - 1, Assembler::v0_t);
-+    // otherwise, shift
-+    __ vmnot_m(v0, v0);
-+    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg), Assembler::v0_t);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct vasrI(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (RShiftVI src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsra.vv $dst, $src, $shift\t#@vasrI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct vasrL(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (RShiftVL src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsra.vv $dst, $src, $shift\t#@vasrL" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+         as_VectorRegister($shift$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct vlslB(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (LShiftVB src shift));
-+  ins_cost(VEC_COST);
-+  effect( TEMP_DEF dst);
-+  format %{ "vmsgtu.vi v0, $shift, 7\t#@vlslB\n\t"
-+            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
-+            "vmnot.m v0, v0\n\t"
-+            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    // if shift > BitsPerByte - 1, clear the element
-+    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
-+    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($src$$reg), Assembler::v0_t);
-+    // otherwise, shift
-+    __ vmnot_m(v0, v0);
-+    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg), Assembler::v0_t);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct vlslS(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (LShiftVS src shift));
-+  ins_cost(VEC_COST);
-+  effect(TEMP_DEF dst);
-+  format %{ "vmsgtu.vi v0, $shift, 15\t#@vlslS\n\t"
-+            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
-+            "vmnot.m v0, v0\n\t"
-+            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    // if shift > BitsPerShort - 1, clear the element
-+    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
-+    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($src$$reg), Assembler::v0_t);
-+    // otherwise, shift
-+    __ vmnot_m(v0, v0);
-+    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg), Assembler::v0_t);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  }
 +
-+instruct vlslI(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (LShiftVI src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsll.vv $dst, $src, $shift\t#@vlslI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  return stk_args;
++}
 +
-+instruct vlslL(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (LShiftVL src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsll.vv $dst, $src, $shift\t# vector (D)" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// On 64 bit we will store integer like items to the stack as
++// 64 bits items (riscv64 abi) even though java would only store
++// 32bits for a parameter. On 32bit it will simply be 32 bits
++// So this routine will do 32->32 on 32bit and 32->64 on 64bit
++static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  assert_cond(masm != NULL);
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      // stack to stack
++      __ ld(t0, Address(fp, reg2offset_in(src.first())));
++      __ sd(t0, Address(sp, reg2offset_out(dst.first())));
++    } else {
++      // stack to reg
++      __ lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
++    }
++  } else if (dst.first()->is_stack()) {
++    // reg to stack
++    __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
++  } else {
++    if (dst.first() != src.first()) {
++      // 32bits extend sign
++      __ addw(dst.first()->as_Register(), src.first()->as_Register(), zr);
++    }
++  }
++}
 +
-+instruct vlsrB(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (URShiftVB src shift));
-+  ins_cost(VEC_COST);
-+  effect(TEMP_DEF dst);
-+  format %{ "vmsgtu.vi v0, $shift, 7\t#@vlsrB\n\t"
-+            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
-+            "vmnot.m v0, v0, v0\n\t"
-+            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    // if shift > BitsPerByte - 1, clear the element
-+    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
-+    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($src$$reg), Assembler::v0_t);
-+    // otherwise, shift
-+    __ vmnot_m(v0, v0);
-+    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg), Assembler::v0_t);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// An oop arg. Must pass a handle not the oop itself
++static void object_move(MacroAssembler* masm,
++                        OopMap* map,
++                        int oop_handle_offset,
++                        int framesize_in_slots,
++                        VMRegPair src,
++                        VMRegPair dst,
++                        bool is_receiver,
++                        int* receiver_offset) {
++  assert_cond(masm != NULL && map != NULL && receiver_offset != NULL);
++  // must pass a handle. First figure out the location we use as a handle
++  Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register();
 +
-+instruct vlsrS(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (URShiftVS src shift));
-+  ins_cost(VEC_COST);
-+  effect(TEMP_DEF dst);
-+  format %{ "vmsgtu.vi v0, $shift, 15\t#@vlsrS\n\t"
-+            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
-+            "vmnot.m v0, v0\n\t"
-+            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    // if shift > BitsPerShort - 1, clear the element
-+    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
-+    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($src$$reg), Assembler::v0_t);
-+    // otherwise, shift
-+    __ vmnot_m(v0, v0);
-+    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg), Assembler::v0_t);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // See if oop is NULL if it is we need no handle
 +
++  if (src.first()->is_stack()) {
 +
-+instruct vlsrI(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (URShiftVI src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++    // Oop is already on the stack as an argument
++    int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
++    map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
++    if (is_receiver) {
++      *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
++    }
 +
++    __ ld(t0, Address(fp, reg2offset_in(src.first())));
++    __ la(rHandle, Address(fp, reg2offset_in(src.first())));
++    // conditionally move a NULL
++    Label notZero1;
++    __ bnez(t0, notZero1);
++    __ mv(rHandle, zr);
++    __ bind(notZero1);
++  } else {
 +
-+instruct vlsrL(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (URShiftVL src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrL" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++    // Oop is in an a register we must store it to the space we reserve
++    // on the stack for oop_handles and pass a handle if oop is non-NULL
 +
-+instruct vasrB_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (RShiftVB src (RShiftCntV shift)));
-+  ins_cost(VEC_COST);
-+  format %{ "vsra.vi $dst, $src, $shift\t#@vasrB_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
++    const Register rOop = src.first()->as_Register();
++    int oop_slot = -1;
++    if (rOop == j_rarg0) {
++      oop_slot = 0;
++    } else if (rOop == j_rarg1) {
++      oop_slot = 1;
++    } else if (rOop == j_rarg2) {
++      oop_slot = 2;
++    } else if (rOop == j_rarg3) {
++      oop_slot = 3;
++    } else if (rOop == j_rarg4) {
++      oop_slot = 4;
++    } else if (rOop == j_rarg5) {
++      oop_slot = 5;
++    } else if (rOop == j_rarg6) {
++      oop_slot = 6;
++    } else {
++      assert(rOop == j_rarg7, "wrong register");
++      oop_slot = 7;
 +    }
-+    if (con >= BitsPerByte) con = BitsPerByte - 1;
-+    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
 +
-+instruct vasrS_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (RShiftVS src (RShiftCntV shift)));
-+  ins_cost(VEC_COST);
-+  format %{ "vsra.vi $dst, $src, $shift\t#@vasrS_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    if (con >= BitsPerShort) con = BitsPerShort - 1;
-+    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++    oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset;
++    int offset = oop_slot * VMRegImpl::stack_slot_size;
 +
-+instruct vasrI_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (RShiftVI src (RShiftCntV shift)));
-+  ins_cost(VEC_COST);
-+  format %{ "vsrl.vi $dst, $src, $shift\t#@vasrI_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
++    map->set_oop(VMRegImpl::stack2reg(oop_slot));
++    // Store oop in handle area, may be NULL
++    __ sd(rOop, Address(sp, offset));
++    if (is_receiver) {
++      *receiver_offset = offset;
 +    }
-+    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
 +
-+instruct vasrL_imm(vReg dst, vReg src, immI shift) %{
-+  predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32);
-+  match(Set dst (RShiftVL src (RShiftCntV shift)));
-+  ins_cost(VEC_COST);
-+  format %{ "vsrl.vi $dst, $src, $shift\t#@vasrL_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
++    //rOop maybe the same as rHandle
++    if (rOop == rHandle) {
++      Label isZero;
++      __ beqz(rOop, isZero);
++      __ la(rHandle, Address(sp, offset));
++      __ bind(isZero);
++    } else {
++      Label notZero2;
++      __ la(rHandle, Address(sp, offset));
++      __ bnez(rOop, notZero2);
++      __ mv(rHandle, zr);
++      __ bind(notZero2);
 +    }
-+    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  }
 +
-+instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (URShiftVB src (RShiftCntV shift)));
-+  ins_cost(VEC_COST);
-+  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrB_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
++  // If arg is on the stack then place it otherwise it is already in correct reg.
++  if (dst.first()->is_stack()) {
++    __ sd(rHandle, Address(sp, reg2offset_out(dst.first())));
++  }
++}
++
++// A float arg may have to do float reg int reg conversion
++static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  assert(src.first()->is_stack() && dst.first()->is_stack() ||
++         src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
++  assert_cond(masm != NULL);
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      __ lwu(t0, Address(fp, reg2offset_in(src.first())));
++      __ sw(t0, Address(sp, reg2offset_out(dst.first())));
++    } else if (dst.first()->is_Register()) {
++      __ lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
++    } else {
++      ShouldNotReachHere();
 +    }
-+    if (con >= BitsPerByte) {
-+      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                 as_VectorRegister($src$$reg));
-+      return;
++  } else if (src.first() != dst.first()) {
++    if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
++      __ fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++    } else {
++      ShouldNotReachHere();
 +    }
-+    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  }
++}
 +
-+instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (URShiftVS src (RShiftCntV shift)));
-+  ins_cost(VEC_COST);
-+  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrS_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
++// A long move
++static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  assert_cond(masm != NULL);
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      // stack to stack
++      __ ld(t0, Address(fp, reg2offset_in(src.first())));
++      __ sd(t0, Address(sp, reg2offset_out(dst.first())));
++    } else {
++      // stack to reg
++      __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
 +    }
-+    if (con >= BitsPerShort) {
-+      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                 as_VectorRegister($src$$reg));
-+      return;
++  } else if (dst.first()->is_stack()) {
++    // reg to stack
++    __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
++  } else {
++    if (dst.first() != src.first()) {
++      __ mv(dst.first()->as_Register(), src.first()->as_Register());
 +    }
-+    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  }
++}
 +
-+instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (URShiftVI src (RShiftCntV shift)));
-+  ins_cost(VEC_COST);
-+  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrI_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
++// A double move
++static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  assert(src.first()->is_stack() && dst.first()->is_stack() ||
++         src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
++  assert_cond(masm != NULL);
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      __ ld(t0, Address(fp, reg2offset_in(src.first())));
++      __ sd(t0, Address(sp, reg2offset_out(dst.first())));
++    } else if (dst.first()-> is_Register()) {
++      __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
++    } else {
++      ShouldNotReachHere();
 +    }
-+    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{
-+  predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32);
-+  match(Set dst (URShiftVL src (RShiftCntV shift)));
-+  ins_cost(VEC_COST);
-+  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrL_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
++  } else if (src.first() != dst.first()) {
++    if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
++      __ fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++    } else {
++      ShouldNotReachHere();
 +    }
-+    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  }
++}
 +
-+instruct vlslB_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (LShiftVB src (LShiftCntV shift)));
-+  ins_cost(VEC_COST);
-+  format %{ "vsll.vi $dst, $src, $shift\t#@vlslB_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    if (con >= BitsPerByte) {
-+      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                 as_VectorRegister($src$$reg));
-+      return;
++void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
++  assert_cond(masm != NULL);
++  // We always ignore the frame_slots arg and just use the space just below frame pointer
++  // which by this time is free to use
++  switch (ret_type) {
++    case T_FLOAT:
++      __ fsw(f10, Address(fp, -3 * wordSize));
++      break;
++    case T_DOUBLE:
++      __ fsd(f10, Address(fp, -3 * wordSize));
++      break;
++    case T_VOID:  break;
++    default: {
++      __ sd(x10, Address(fp, -3 * wordSize));
 +    }
-+    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  }
++}
 +
-+instruct vlslS_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (LShiftVS src (LShiftCntV shift)));
-+  ins_cost(VEC_COST);
-+  format %{ "vsll.vi $dst, $src, $shift\t#@vlslS_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    if (con >= BitsPerShort) {
-+      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                 as_VectorRegister($src$$reg));
-+      return;
++void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
++  assert_cond(masm != NULL);
++  // We always ignore the frame_slots arg and just use the space just below frame pointer
++  // which by this time is free to use
++  switch (ret_type) {
++    case T_FLOAT:
++      __ flw(f10, Address(fp, -3 * wordSize));
++      break;
++    case T_DOUBLE:
++      __ fld(f10, Address(fp, -3 * wordSize));
++      break;
++    case T_VOID:  break;
++    default: {
++      __ ld(x10, Address(fp, -3 * wordSize));
 +    }
-+    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  }
++}
 +
-+instruct vlslI_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (LShiftVI src (LShiftCntV shift)));
-+  ins_cost(VEC_COST);
-+  format %{ "vsll.vi $dst, $src, $shift\t#@vlslI_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct vlslL_imm(vReg dst, vReg src, immI shift) %{
-+  predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32);
-+  match(Set dst (LShiftVL src (LShiftCntV shift)));
-+  ins_cost(VEC_COST);
-+  format %{ "vsll.vi $dst, $src, $shift\t#@vlslL_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
++  assert_cond(masm != NULL && args != NULL);
++  RegSet x;
++  for ( int i = first_arg ; i < arg_count ; i++ ) {
++    if (args[i].first()->is_Register()) {
++      x = x + args[i].first()->as_Register();
++    } else if (args[i].first()->is_FloatRegister()) {
++      __ addi(sp, sp, -2 * wordSize);
++      __ fsd(args[i].first()->as_FloatRegister(), Address(sp, 0));
++    }
++  }
++  __ push_reg(x, sp);
++}
 +
-+instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
-+  match(Set dst (LShiftCntV cnt));
-+  match(Set dst (RShiftCntV cnt));
-+  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntB" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
++  assert_cond(masm != NULL && args != NULL);
++  RegSet x;
++  for ( int i = first_arg ; i < arg_count ; i++ ) {
++    if (args[i].first()->is_Register()) {
++      x = x + args[i].first()->as_Register();
++    } else {
++      ;
++    }
++  }
++  __ pop_reg(x, sp);
++  for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
++    if (args[i].first()->is_Register()) {
++      ;
++    } else if (args[i].first()->is_FloatRegister()) {
++      __ fld(args[i].first()->as_FloatRegister(), Address(sp, 0));
++      __ add(sp, sp, 2 * wordSize);
++    }
++  }
++}
 +
-+instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
-+            n->bottom_type()->is_vect()->element_basic_type() == T_CHAR);
-+  match(Set dst (LShiftCntV cnt));
-+  match(Set dst (RShiftCntV cnt));
-+  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntS" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++static void rt_call(MacroAssembler* masm, address dest) {
++  assert_cond(masm != NULL);
++  CodeBlob *cb = CodeCache::find_blob(dest);
++  if (cb) {
++    __ far_call(RuntimeAddress(dest));
++  } else {
++    int32_t offset = 0;
++    __ la_patchable(t0, RuntimeAddress(dest), offset);
++    __ jalr(x1, t0, offset);
++  }
++}
 +
-+instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_INT);
-+  match(Set dst (LShiftCntV cnt));
-+  match(Set dst (RShiftCntV cnt));
-+  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++static void verify_oop_args(MacroAssembler* masm,
++                            const methodHandle& method,
++                            const BasicType* sig_bt,
++                            const VMRegPair* regs) {
++  const Register temp_reg = x9;  // not part of any compiled calling seq
++  if (VerifyOops) {
++    for (int i = 0; i < method->size_of_parameters(); i++) {
++      if (sig_bt[i] == T_OBJECT ||
++          sig_bt[i] == T_ARRAY) {
++        VMReg r = regs[i].first();
++        assert(r->is_valid(), "bad oop arg");
++        if (r->is_stack()) {
++          __ ld(temp_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
++          __ verify_oop(temp_reg);
++        } else {
++          __ verify_oop(r->as_Register());
++        }
++      }
++    }
++  }
++}
 +
-+instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
-+  match(Set dst (LShiftCntV cnt));
-+  match(Set dst (RShiftCntV cnt));
-+  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntL" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++static void gen_special_dispatch(MacroAssembler* masm,
++                                 const methodHandle& method,
++                                 const BasicType* sig_bt,
++                                 const VMRegPair* regs) {
++  verify_oop_args(masm, method, sig_bt, regs);
++  vmIntrinsics::ID iid = method->intrinsic_id();
 +
-+// vector sqrt
++  // Now write the args into the outgoing interpreter space
++  bool     has_receiver   = false;
++  Register receiver_reg   = noreg;
++  int      member_arg_pos = -1;
++  Register member_reg     = noreg;
++  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
++  if (ref_kind != 0) {
++    member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
++    member_reg = x9;  // known to be free at this point
++    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
++  } else if (iid == vmIntrinsics::_invokeBasic) {
++    has_receiver = true;
++  } else {
++    fatal("unexpected intrinsic id %d", iid);
++  }
 +
-+instruct vsqrtF(vReg dst, vReg src) %{
-+  match(Set dst (SqrtVF src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsqrt.v $dst, $src\t#@vsqrtF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  if (member_reg != noreg) {
++    // Load the member_arg into register, if necessary.
++    SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
++    VMReg r = regs[member_arg_pos].first();
++    if (r->is_stack()) {
++      __ ld(member_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
++    } else {
++      // no data motion is needed
++      member_reg = r->as_Register();
++    }
++  }
 +
-+instruct vsqrtD(vReg dst, vReg src) %{
-+  match(Set dst (SqrtVD src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsqrt.v $dst, $src\t#@vsqrtD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  if (has_receiver) {
++    // Make sure the receiver is loaded into a register.
++    assert(method->size_of_parameters() > 0, "oob");
++    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
++    VMReg r = regs[0].first();
++    assert(r->is_valid(), "bad receiver arg");
++    if (r->is_stack()) {
++      // Porting note:  This assumes that compiled calling conventions always
++      // pass the receiver oop in a register.  If this is not true on some
++      // platform, pick a temp and load the receiver from stack.
++      fatal("receiver always in a register");
++      receiver_reg = x12;  // known to be free at this point
++      __ ld(receiver_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
++    } else {
++      // no data motion is needed
++      receiver_reg = r->as_Register();
++    }
++  }
 +
-+// vector sub
++  // Figure out which address we are really jumping to:
++  MethodHandles::generate_method_handle_dispatch(masm, iid,
++                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
++}
 +
-+instruct vsubB(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (SubVB src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubB" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// ---------------------------------------------------------------------------
++// Generate a native wrapper for a given method.  The method takes arguments
++// in the Java compiled code convention, marshals them to the native
++// convention (handlizes oops, etc), transitions to native, makes the call,
++// returns to java state (possibly blocking), unhandlizes any result and
++// returns.
++//
++// Critical native functions are a shorthand for the use of
++// GetPrimtiveArrayCritical and disallow the use of any other JNI
++// functions.  The wrapper is expected to unpack the arguments before
++// passing them to the callee and perform checks before and after the
++// native call to ensure that they GCLocker
++// lock_critical/unlock_critical semantics are followed.  Some other
++// parts of JNI setup are skipped like the tear down of the JNI handle
++// block and the check for pending exceptions it's impossible for them
++// to be thrown.
++//
++// They are roughly structured like this:
++//    if (GCLocker::needs_gc()) SharedRuntime::block_for_jni_critical()
++//    tranistion to thread_in_native
++//    unpack arrray arguments and call native entry point
++//    check for safepoint in progress
++//    check if any thread suspend flags are set
++//      call into JVM and possible unlock the JNI critical
++//      if a GC was suppressed while in the critical native.
++//    transition back to thread_in_Java
++//    return to caller
++//
++nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
++                                                const methodHandle& method,
++                                                int compile_id,
++                                                BasicType* in_sig_bt,
++                                                VMRegPair* in_regs,
++                                                BasicType ret_type,
++                                                address critical_entry) {
++  if (method->is_method_handle_intrinsic()) {
++    vmIntrinsics::ID iid = method->intrinsic_id();
++    intptr_t start = (intptr_t)__ pc();
++    int vep_offset = ((intptr_t)__ pc()) - start;
 +
-+instruct vsubS(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (SubVS src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubS" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++    // First instruction must be a nop as it may need to be patched on deoptimisation
++    __ nop();
++    gen_special_dispatch(masm,
++                         method,
++                         in_sig_bt,
++                         in_regs);
++    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
++    __ flush();
++    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
++    return nmethod::new_native_nmethod(method,
++                                       compile_id,
++                                       masm->code(),
++                                       vep_offset,
++                                       frame_complete,
++                                       stack_slots / VMRegImpl::slots_per_word,
++                                       in_ByteSize(-1),
++                                       in_ByteSize(-1),
++                                       (OopMapSet*)NULL);
++  }
++  address native_func = method->native_function();
++  assert(native_func != NULL, "must have function");
 +
-+instruct vsubI(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (SubVI src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // An OopMap for lock (and class if static)
++  OopMapSet *oop_maps = new OopMapSet();
++  assert_cond(oop_maps != NULL);
++  intptr_t start = (intptr_t)__ pc();
 +
-+instruct vsubL(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (SubVL src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubL" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // We have received a description of where all the java arg are located
++  // on entry to the wrapper. We need to convert these args to where
++  // the jni function will expect them. To figure out where they go
++  // we convert the java signature to a C signature by inserting
++  // the hidden arguments as arg[0] and possibly arg[1] (static method)
 +
-+instruct vsubF(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (SubVF src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsub.vv $dst, $src1, $src2\t@vsubF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  const int total_in_args = method->size_of_parameters();
++  int total_c_args = total_in_args + (method->is_static() ? 2 : 1);
 +
-+instruct vsubD(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (SubVD src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsub.vv $dst, $src1, $src2\t#@vsubD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
++  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
++  BasicType* in_elem_bt = NULL;
 +
-+instruct vstring_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
-+                         iRegI_R10 result, vReg_V1 v1,
-+                         vReg_V2 v2, vReg_V3 v3, rFlagsReg cr)
-+%{
-+  predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
-+  match(Set result (StrEquals (Binary str1 str2) cnt));
-+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr);
++  int argc = 0;
++  out_sig_bt[argc++] = T_ADDRESS;
++  if (method->is_static()) {
++    out_sig_bt[argc++] = T_OBJECT;
++  }
 +
-+  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %}
-+  ins_encode %{
-+    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
-+    __ string_equals_v($str1$$Register, $str2$$Register,
-+                       $result$$Register, $cnt$$Register, 1);
-+  %}
-+  ins_pipe(pipe_class_memory);
-+%}
++  for (int i = 0; i < total_in_args ; i++) {
++    out_sig_bt[argc++] = in_sig_bt[i];
++  }
 +
-+instruct vstring_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
-+                         iRegI_R10 result, vReg_V1 v1,
-+                         vReg_V2 v2, vReg_V3 v3, rFlagsReg cr)
-+%{
-+  predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
-+  match(Set result (StrEquals (Binary str1 str2) cnt));
-+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr);
++  // Now figure out where the args must be stored and how much stack space
++  // they require.
++  int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
 +
-+  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %}
-+  ins_encode %{
-+    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
-+    __ string_equals_v($str1$$Register, $str2$$Register,
-+                       $result$$Register, $cnt$$Register, 2);
-+  %}
-+  ins_pipe(pipe_class_memory);
-+%}
++  // Compute framesize for the wrapper.  We need to handlize all oops in
++  // incoming registers
 +
-+instruct varray_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
-+                        vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr)
-+%{
-+  predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
-+  match(Set result (AryEq ary1 ary2));
-+  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr);
++  // Calculate the total number of stack slots we will need.
 +
-+  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %}
-+  ins_encode %{
-+    __ arrays_equals_v($ary1$$Register, $ary2$$Register,
-+                       $result$$Register, $tmp$$Register, 1);
-+    %}
-+  ins_pipe(pipe_class_memory);
-+%}
++  // First count the abi requirement plus all of the outgoing args
++  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
 +
-+instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
-+                        vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr)
-+%{
-+  predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
-+  match(Set result (AryEq ary1 ary2));
-+  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr);
++  // Now the space for the inbound oop handle area
++  int total_save_slots = 8 * VMRegImpl::slots_per_word;  // 8 arguments passed in registers
 +
-+  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %}
-+  ins_encode %{
-+    __ arrays_equals_v($ary1$$Register, $ary2$$Register,
-+                       $result$$Register, $tmp$$Register, 2);
-+  %}
-+  ins_pipe(pipe_class_memory);
-+%}
++  int oop_handle_offset = stack_slots;
++  stack_slots += total_save_slots;
 +
-+instruct vstring_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+                          iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
-+                          iRegP_R28 tmp1, iRegL_R29 tmp2)
-+%{
-+  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
-+  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-+  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
-+         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
++  // Now any space we need for handlizing a klass if static method
 +
-+  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %}
-+  ins_encode %{
-+    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
-+    __ string_compare_v($str1$$Register, $str2$$Register,
-+                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
-+                        $tmp1$$Register, $tmp2$$Register,
-+                        StrIntrinsicNode::UU);
-+  %}
-+  ins_pipe(pipe_class_memory);
-+%}
-+instruct vstring_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+                          iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
-+                          iRegP_R28 tmp1, iRegL_R29 tmp2)
-+%{
-+  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
-+  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-+  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
-+         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
++  int klass_slot_offset = 0;
++  int klass_offset = -1;
++  int lock_slot_offset = 0;
++  bool is_static = false;
 +
-+  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %}
-+  ins_encode %{
-+    __ string_compare_v($str1$$Register, $str2$$Register,
-+                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
-+                        $tmp1$$Register, $tmp2$$Register,
-+                        StrIntrinsicNode::LL);
-+  %}
-+  ins_pipe(pipe_class_memory);
-+%}
++  if (method->is_static()) {
++    klass_slot_offset = stack_slots;
++    stack_slots += VMRegImpl::slots_per_word;
++    klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
++    is_static = true;
++  }
 +
-+instruct vstring_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+                           iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
-+                           iRegP_R28 tmp1, iRegL_R29 tmp2)
-+%{
-+  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
-+  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-+  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
-+         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
++  // Plus a lock if needed
 +
-+  format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %}
-+  ins_encode %{
-+    __ string_compare_v($str1$$Register, $str2$$Register,
-+                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
-+                        $tmp1$$Register, $tmp2$$Register,
-+                        StrIntrinsicNode::UL);
-+  %}
-+  ins_pipe(pipe_class_memory);
-+%}
-+instruct vstring_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+                           iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
-+                           iRegP_R28 tmp1, iRegL_R29 tmp2)
-+%{
-+  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
-+  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-+  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
-+         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
++  if (method->is_synchronized()) {
++    lock_slot_offset = stack_slots;
++    stack_slots += VMRegImpl::slots_per_word;
++  }
 +
-+  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %}
-+  ins_encode %{
-+    __ string_compare_v($str1$$Register, $str2$$Register,
-+                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
-+                        $tmp1$$Register, $tmp2$$Register,
-+                        StrIntrinsicNode::LU);
-+  %}
-+  ins_pipe(pipe_class_memory);
-+%}
++  // Now a place (+2) to save return values or temp during shuffling
++  // + 4 for return address (which we own) and saved fp
++  stack_slots += 6;
 +
-+// fast byte[] to char[] inflation
-+instruct vstring_inflate(Universe dummy, iRegP_R10 src, iRegP_R11 dst, iRegI_R12 len,
-+                         vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
-+%{
-+  predicate(UseRVV);
-+  match(Set dummy (StrInflatedCopy src (Binary dst len)));
-+  effect(TEMP v1, TEMP v2, TEMP v3, TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len);
++  // Ok The space we have allocated will look like:
++  //
++  //
++  // FP-> |                     |
++  //      | 2 slots (ra)        |
++  //      | 2 slots (fp)        |
++  //      |---------------------|
++  //      | 2 slots for moves   |
++  //      |---------------------|
++  //      | lock box (if sync)  |
++  //      |---------------------| <- lock_slot_offset
++  //      | klass (if static)   |
++  //      |---------------------| <- klass_slot_offset
++  //      | oopHandle area      |
++  //      |---------------------| <- oop_handle_offset (8 java arg registers)
++  //      | outbound memory     |
++  //      | based arguments     |
++  //      |                     |
++  //      |---------------------|
++  //      |                     |
++  // SP-> | out_preserved_slots |
++  //
++  //
 +
-+  format %{ "String Inflate $src,$dst" %}
-+  ins_encode %{
-+    __ byte_array_inflate_v($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register);
-+  %}
-+  ins_pipe(pipe_class_memory);
-+%}
 +
-+// encode char[] to byte[] in ISO_8859_1
-+instruct vencode_iso_array(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result,
-+                           vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
-+%{
-+  predicate(UseRVV);
-+  match(Set result (EncodeISOArray src (Binary dst len)));
-+  effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len,
-+         TEMP v1, TEMP v2, TEMP v3, TEMP tmp);
++  // Now compute actual number of stack words we need rounding to make
++  // stack properly aligned.
++  stack_slots = align_up(stack_slots, StackAlignmentInSlots);
 +
-+  format %{ "Encode array $src,$dst,$len -> $result" %}
-+  ins_encode %{
-+    __ encode_iso_array_v($src$$Register, $dst$$Register, $len$$Register,
-+                          $result$$Register, $tmp$$Register);
-+  %}
-+  ins_pipe( pipe_class_memory );
-+%}
++  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
 +
-+// fast char[] to byte[] compression
-+instruct vstring_compress(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result,
-+                          vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
-+%{
-+  predicate(UseRVV);
-+  match(Set result (StrCompressedCopy src (Binary dst len)));
-+  effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len,
-+         TEMP v1, TEMP v2, TEMP v3, TEMP tmp);
++  // First thing make an ic check to see if we should even be here
 +
-+  format %{ "String Compress $src,$dst -> $result    // KILL R11, R12, R13" %}
-+  ins_encode %{
-+    __ char_array_compress_v($src$$Register, $dst$$Register, $len$$Register,
-+                             $result$$Register, $tmp$$Register);
-+  %}
-+  ins_pipe( pipe_slow );
-+%}
++  // We are free to use all registers as temps without saving them and
++  // restoring them except fp. fp is the only callee save register
++  // as far as the interpreter and the compiler(s) are concerned.
 +
-+instruct vcount_positives(iRegP_R11 ary, iRegI_R12 len, iRegI_R10 result, iRegL tmp)
-+%{
-+  predicate(UseRVV);
-+  match(Set result (CountPositives ary len));
-+  effect(USE_KILL ary, USE_KILL len, TEMP tmp);
 +
-+  format %{ "count positives byte[] $ary, $len -> $result" %}
-+  ins_encode %{
-+    __ count_positives_v($ary$$Register, $len$$Register, $result$$Register, $tmp$$Register);
-+  %}
++  const Register ic_reg = t1;
++  const Register receiver = j_rarg0;
 +
-+  ins_pipe(pipe_slow);
-+%}
++  Label hit;
++  Label exception_pending;
 +
-+instruct vstringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
-+                               iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
-+                               vReg_V1 v1, vReg_V2 v2, vReg_V3 v3)
-+%{
-+  predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
-+  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
-+  effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
-+         TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3);
++  assert_different_registers(ic_reg, receiver, t0);
++  __ verify_oop(receiver);
++  __ cmp_klass(receiver, ic_reg, t0, hit);
 +
-+  format %{ "StringUTF16 IndexOf char[] $str1, $cnt1, $ch -> $result" %}
++  __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 +
-+  ins_encode %{
-+    __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register,
-+                             $result$$Register, $tmp1$$Register, $tmp2$$Register,
-+                             false /* isL */);
-+  %}
++  // Verified entry point must be aligned
++  __ align(8);
 +
-+  ins_pipe(pipe_class_memory);
-+%}
++  __ bind(hit);
 +
-+instruct vstringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
-+                               iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
-+                               vReg_V1 v1, vReg_V2 v2, vReg_V3 v3)
-+%{
-+  predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
-+  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
-+  effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
-+         TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3);
++  int vep_offset = ((intptr_t)__ pc()) - start;
 +
-+  format %{ "StringLatin1 IndexOf char[] $str1, $cnt1, $ch -> $result" %}
++  // If we have to make this method not-entrant we'll overwrite its
++  // first instruction with a jump.
++  __ nop();
 +
-+  ins_encode %{
-+    __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register,
-+                             $result$$Register, $tmp1$$Register, $tmp2$$Register,
-+                             true /* isL */);
-+  %}
++  // Generate stack overflow check
++  __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size());
 +
-+  ins_pipe(pipe_class_memory);
-+%}
++  // Generate a new frame for the wrapper.
++  __ enter();
++  // -2 because return address is already present and so is saved fp
++  __ sub(sp, sp, stack_size - 2 * wordSize);
 +
-+// clearing of an array
-+instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy,
-+                             vReg_V1 vReg1, vReg_V2 vReg2, vReg_V3 vReg3)
-+%{
-+  predicate(UseRVV);
-+  match(Set dummy (ClearArray cnt base));
-+  effect(USE_KILL cnt, USE_KILL base, TEMP vReg1, TEMP vReg2, TEMP vReg3);
++  // Frame is now completed as far as size and linkage.
++  int frame_complete = ((intptr_t)__ pc()) - start;
 +
-+  format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %}
++  // We use x18 as the oop handle for the receiver/klass
++  // It is callee save so it survives the call to native
 +
-+  ins_encode %{
-+    __ clear_array_v($base$$Register, $cnt$$Register);
-+  %}
++  const Register oop_handle_reg = x18;
 +
-+  ins_pipe(pipe_class_memory);
-+%}
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-new file mode 100644
-index 00000000000..f85d4b25a76
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -0,0 +1,2761 @@
-+/*
-+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  //
++  // We immediately shuffle the arguments so that any vm call we have to
++  // make from here on out (sync slow path, jvmti, etc.) we will have
++  // captured the oops from our caller and have a valid oopMap for
++  // them.
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "code/debugInfoRec.hpp"
-+#include "code/icBuffer.hpp"
-+#include "code/vtableStubs.hpp"
-+#include "compiler/oopMap.hpp"
-+#include "gc/shared/barrierSetAssembler.hpp"
-+#include "interpreter/interp_masm.hpp"
-+#include "interpreter/interpreter.hpp"
-+#include "logging/log.hpp"
-+#include "memory/resourceArea.hpp"
-+#include "nativeInst_riscv.hpp"
-+#include "oops/compiledICHolder.hpp"
-+#include "oops/klass.inline.hpp"
-+#include "prims/methodHandles.hpp"
-+#include "runtime/jniHandles.hpp"
-+#include "runtime/safepointMechanism.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "runtime/signature.hpp"
-+#include "runtime/stubRoutines.hpp"
-+#include "runtime/vframeArray.hpp"
-+#include "utilities/align.hpp"
-+#include "utilities/formatBuffer.hpp"
-+#include "vmreg_riscv.inline.hpp"
-+#ifdef COMPILER1
-+#include "c1/c1_Runtime1.hpp"
-+#endif
-+#ifdef COMPILER2
-+#include "adfiles/ad_riscv.hpp"
-+#include "opto/runtime.hpp"
-+#endif
++  // -----------------
++  // The Grand Shuffle
 +
-+#define __ masm->
++  // The Java calling convention is either equal (linux) or denser (win64) than the
++  // c calling convention. However the because of the jni_env argument the c calling
++  // convention always has at least one more (and two for static) arguments than Java.
++  // Therefore if we move the args from java -> c backwards then we will never have
++  // a register->register conflict and we don't have to build a dependency graph
++  // and figure out how to break any cycles.
++  //
 +
-+const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
-+
-+class SimpleRuntimeFrame {
-+public:
++  // Record esp-based slot for receiver on stack for non-static methods
++  int receiver_offset = -1;
 +
-+  // Most of the runtime stubs have this simple frame layout.
-+  // This class exists to make the layout shared in one place.
-+  // Offsets are for compiler stack slots, which are jints.
-+  enum layout {
-+    // The frame sender code expects that fp will be in the "natural" place and
-+    // will override any oopMap setting for it. We must therefore force the layout
-+    // so that it agrees with the frame sender code.
-+    // we don't expect any arg reg save area so riscv asserts that
-+    // frame::arg_reg_save_area_bytes == 0
-+    fp_off = 0, fp_off2,
-+    return_off, return_off2,
-+    framesize
-+  };
-+};
++  // This is a trick. We double the stack slots so we can claim
++  // the oops in the caller's frame. Since we are sure to have
++  // more args than the caller doubling is enough to make
++  // sure we can capture all the incoming oop args from the
++  // caller.
++  //
++  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
++  assert_cond(map != NULL);
 +
-+class RegisterSaver {
-+  const bool _save_vectors;
-+ public:
-+  RegisterSaver(bool save_vectors) : _save_vectors(UseRVV && save_vectors) {}
-+  ~RegisterSaver() {}
-+  OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
-+  void restore_live_registers(MacroAssembler* masm);
++  int float_args = 0;
++  int int_args = 0;
 +
-+  // Offsets into the register save area
-+  // Used by deoptimization when it is managing result register
-+  // values on its own
-+  // gregs:28, float_register:32; except: x1(ra) & x2(sp) & gp(x3) & tp(x4)
-+  // |---v0---|<---SP
-+  // |---v1---|save vectors only in generate_handler_blob
-+  // |-- .. --|
-+  // |---v31--|-----
-+  // |---f0---|
-+  // |---f1---|
-+  // |   ..   |
-+  // |---f31--|
-+  // |---reserved slot for stack alignment---|
-+  // |---x5---|
-+  // |   x6   |
-+  // |---.. --|
-+  // |---x31--|
-+  // |---fp---|
-+  // |---ra---|
-+  int v0_offset_in_bytes(void) { return 0; }
-+  int f0_offset_in_bytes(void) {
-+    int f0_offset = 0;
-+#ifdef COMPILER2
-+    if (_save_vectors) {
-+      f0_offset += Matcher::scalable_vector_reg_size(T_INT) * VectorRegisterImpl::number_of_registers *
-+                   BytesPerInt;
-+    }
-+#endif
-+    return f0_offset;
++#ifdef ASSERT
++  bool reg_destroyed[RegisterImpl::number_of_registers];
++  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
++  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
++    reg_destroyed[r] = false;
 +  }
-+  int reserved_slot_offset_in_bytes(void) {
-+    return f0_offset_in_bytes() +
-+           FloatRegisterImpl::max_slots_per_register *
-+           FloatRegisterImpl::number_of_registers *
-+           BytesPerInt;
++  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
++    freg_destroyed[f] = false;
 +  }
 +
-+  int reg_offset_in_bytes(Register r) {
-+    assert (r->encoding() > 4, "ra, sp, gp and tp not saved");
-+    return reserved_slot_offset_in_bytes() + (r->encoding() - 4 /* x1, x2, x3, x4 */) * wordSize;
-+  }
++#endif /* ASSERT */
 +
-+  int freg_offset_in_bytes(FloatRegister f) {
-+    return f0_offset_in_bytes() + f->encoding() * wordSize;
-+  }
++  // For JNI natives the incoming and outgoing registers are offset upwards.
++  GrowableArray<int> arg_order(2 * total_in_args);
++  VMRegPair tmp_vmreg;
++  tmp_vmreg.set2(x9->as_VMReg());
 +
-+  int ra_offset_in_bytes(void) {
-+    return reserved_slot_offset_in_bytes() +
-+           (RegisterImpl::number_of_registers - 3) *
-+           RegisterImpl::max_slots_per_register *
-+           BytesPerInt;
++  for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
++    arg_order.push(i);
++    arg_order.push(c_arg);
 +  }
-+};
 +
-+OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
-+  int vector_size_in_bytes = 0;
-+  int vector_size_in_slots = 0;
-+#ifdef COMPILER2
-+  if (_save_vectors) {
-+    vector_size_in_bytes += Matcher::scalable_vector_reg_size(T_BYTE);
-+    vector_size_in_slots += Matcher::scalable_vector_reg_size(T_INT);
-+  }
-+#endif
++  int temploc = -1;
++  for (int ai = 0; ai < arg_order.length(); ai += 2) {
++    int i = arg_order.at(ai);
++    int c_arg = arg_order.at(ai + 1);
++    __ block_comment(err_msg("mv %d -> %d", i, c_arg));
++    assert(c_arg != -1 && i != -1, "wrong order");
++#ifdef ASSERT
++    if (in_regs[i].first()->is_Register()) {
++      assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
++    } else if (in_regs[i].first()->is_FloatRegister()) {
++      assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
++    }
++    if (out_regs[c_arg].first()->is_Register()) {
++      reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
++    } else if (out_regs[c_arg].first()->is_FloatRegister()) {
++      freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
++    }
++#endif /* ASSERT */
++    switch (in_sig_bt[i]) {
++      case T_ARRAY:
++      case T_OBJECT:
++        object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
++                    ((i == 0) && (!is_static)),
++                    &receiver_offset);
++        int_args++;
++        break;
++      case T_VOID:
++        break;
 +
-+  assert_cond(masm != NULL && total_frame_words != NULL);
-+  int frame_size_in_bytes = align_up(additional_frame_words * wordSize + ra_offset_in_bytes() + wordSize, 16);
-+  // OopMap frame size is in compiler stack slots (jint's) not bytes or words
-+  int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
-+  // The caller will allocate additional_frame_words
-+  int additional_frame_slots = additional_frame_words * wordSize / BytesPerInt;
-+  // CodeBlob frame size is in words.
-+  int frame_size_in_words = frame_size_in_bytes / wordSize;
-+  *total_frame_words = frame_size_in_words;
++      case T_FLOAT:
++        float_move(masm, in_regs[i], out_regs[c_arg]);
++        float_args++;
++        break;
 +
-+  // Save Integer, Float and Vector registers.
-+  __ enter();
-+  __ push_CPU_state(_save_vectors, vector_size_in_bytes);
++      case T_DOUBLE:
++        assert( i + 1 < total_in_args &&
++                in_sig_bt[i + 1] == T_VOID &&
++                out_sig_bt[c_arg + 1] == T_VOID, "bad arg list");
++        double_move(masm, in_regs[i], out_regs[c_arg]);
++        float_args++;
++        break;
 +
-+  // Set an oopmap for the call site.  This oopmap will map all
-+  // oop-registers and debug-info registers as callee-saved.  This
-+  // will allow deoptimization at this safepoint to find all possible
-+  // debug-info recordings, as well as let GC find all oops.
++      case T_LONG :
++        long_move(masm, in_regs[i], out_regs[c_arg]);
++        int_args++;
++        break;
 +
-+  OopMapSet *oop_maps = new OopMapSet();
-+  OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
-+  assert_cond(oop_maps != NULL && oop_map != NULL);
++      case T_ADDRESS:
++        assert(false, "found T_ADDRESS in java args");
++        break;
 +
-+  int sp_offset_in_slots = 0;
-+  int step_in_slots = 0;
-+  if (_save_vectors) {
-+    step_in_slots = vector_size_in_slots;
-+    for (int i = 0; i < VectorRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
-+      VectorRegister r = as_VectorRegister(i);
-+      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg());
++      default:
++        move32_64(masm, in_regs[i], out_regs[c_arg]);
++        int_args++;
 +    }
 +  }
 +
-+  step_in_slots = FloatRegisterImpl::max_slots_per_register;
-+  for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
-+    FloatRegister r = as_FloatRegister(i);
-+    oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg());
++  // point c_arg at the first arg that is already loaded in case we
++  // need to spill before we call out
++  int c_arg = total_c_args - total_in_args;
++
++  // Pre-load a static method's oop into c_rarg1.
++  if (method->is_static()) {
++
++    //  load oop into a register
++    __ movoop(c_rarg1,
++              JNIHandles::make_local(method->method_holder()->java_mirror()),
++              /*immediate*/true);
++
++    // Now handlize the static class mirror it's known not-null.
++    __ sd(c_rarg1, Address(sp, klass_offset));
++    map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
++
++    // Now get the handle
++    __ la(c_rarg1, Address(sp, klass_offset));
++    // and protect the arg if we must spill
++    c_arg--;
 +  }
 +
-+  step_in_slots = RegisterImpl::max_slots_per_register;
-+  // skip the slot reserved for alignment, see MacroAssembler::push_reg;
-+  // also skip x5 ~ x6 on the stack because they are caller-saved registers.
-+  sp_offset_in_slots += RegisterImpl::max_slots_per_register * 3;
-+  // besides, we ignore x0 ~ x4 because push_CPU_state won't push them on the stack.
-+  for (int i = 7; i < RegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
-+    Register r = as_Register(i);
-+    if (r != xthread) {
-+      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots + additional_frame_slots), r->as_VMReg());
-+    }
++  // Change state to native (we save the return address in the thread, since it might not
++  // be pushed on the stack when we do a stack traversal).
++  // We use the same pc/oopMap repeatedly when we call out
++
++  Label native_return;
++  __ set_last_Java_frame(sp, noreg, native_return, t0);
++
++  Label dtrace_method_entry, dtrace_method_entry_done;
++  {
++    int32_t offset = 0;
++    __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset);
++    __ lbu(t0, Address(t0, offset));
++    __ addw(t0, t0, zr);
++    __ bnez(t0, dtrace_method_entry);
++    __ bind(dtrace_method_entry_done);
 +  }
 +
-+  return oop_map;
-+}
++  // RedefineClasses() tracing support for obsolete method entry
++  if (log_is_enabled(Trace, redefine, class, obsolete)) {
++    // protect the args we've loaded
++    save_args(masm, total_c_args, c_arg, out_regs);
++    __ mov_metadata(c_rarg1, method());
++    __ call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
++      xthread, c_rarg1);
++    restore_args(masm, total_c_args, c_arg, out_regs);
++  }
 +
-+void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
-+  assert_cond(masm != NULL);
-+#ifdef COMPILER2
-+  __ pop_CPU_state(_save_vectors, Matcher::scalable_vector_reg_size(T_BYTE));
-+#else
-+  __ pop_CPU_state(_save_vectors);
-+#endif
-+  __ leave();
-+}
++  // Lock a synchronized method
 +
-+// Is vector's size (in bytes) bigger than a size saved by default?
-+// riscv does not ovlerlay the floating-point registers on vector registers like aarch64.
-+bool SharedRuntime::is_wide_vector(int size) {
-+  return UseRVV;
-+}
++  // Register definitions used by locking and unlocking
 +
-+// The java_calling_convention describes stack locations as ideal slots on
-+// a frame with no abi restrictions. Since we must observe abi restrictions
-+// (like the placement of the register window) the slots must be biased by
-+// the following value.
-+static int reg2offset_in(VMReg r) {
-+  // Account for saved fp and ra
-+  // This should really be in_preserve_stack_slots
-+  return r->reg2stack() * VMRegImpl::stack_slot_size;
-+}
++  const Register swap_reg = x10;
++  const Register obj_reg  = x9;  // Will contain the oop
++  const Register lock_reg = x30;  // Address of compiler lock object (BasicLock)
++  const Register old_hdr  = x30;  // value of old header at unlock time
++  const Register tmp      = ra;
 +
-+static int reg2offset_out(VMReg r) {
-+  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
-+}
++  Label slow_path_lock;
++  Label lock_done;
 +
-+// ---------------------------------------------------------------------------
-+// Read the array of BasicTypes from a signature, and compute where the
-+// arguments should go.  Values in the VMRegPair regs array refer to 4-byte
-+// quantities.  Values less than VMRegImpl::stack0 are registers, those above
-+// refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
-+// as framesizes are fixed.
-+// VMRegImpl::stack0 refers to the first slot 0(sp).
-+// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
-+// up to RegisterImpl::number_of_registers) are the 64-bit
-+// integer registers.
++  if (method->is_synchronized()) {
 +
-+// Note: the INPUTS in sig_bt are in units of Java argument words,
-+// which are 64-bit.  The OUTPUTS are in 32-bit units.
++    const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
 +
-+// The Java calling convention is a "shifted" version of the C ABI.
-+// By skipping the first C ABI register we can call non-static jni
-+// methods with small numbers of arguments without having to shuffle
-+// the arguments at all. Since we control the java ABI we ought to at
-+// least get some advantage out of it.
++    // Get the handle (the 2nd argument)
++    __ mv(oop_handle_reg, c_rarg1);
 +
-+int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
-+                                           VMRegPair *regs,
-+                                           int total_args_passed) {
-+  // Create the mapping between argument positions and
-+  // registers.
-+  static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
-+    j_rarg0, j_rarg1, j_rarg2, j_rarg3,
-+    j_rarg4, j_rarg5, j_rarg6, j_rarg7
-+  };
-+  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = {
-+    j_farg0, j_farg1, j_farg2, j_farg3,
-+    j_farg4, j_farg5, j_farg6, j_farg7
-+  };
++    // Get address of the box
 +
-+  uint int_args = 0;
-+  uint fp_args = 0;
-+  uint stk_args = 0; // inc by 2 each time
++    __ la(lock_reg, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
 +
-+  for (int i = 0; i < total_args_passed; i++) {
-+    switch (sig_bt[i]) {
-+      case T_BOOLEAN: // fall through
-+      case T_CHAR:    // fall through
-+      case T_BYTE:    // fall through
-+      case T_SHORT:   // fall through
-+      case T_INT:
-+        if (int_args < Argument::n_int_register_parameters_j) {
-+          regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
-+        } else {
-+          regs[i].set1(VMRegImpl::stack2reg(stk_args));
-+          stk_args += 2;
-+        }
-+        break;
-+      case T_VOID:
-+        // halves of T_LONG or T_DOUBLE
-+        assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
-+        regs[i].set_bad();
-+        break;
-+      case T_LONG:      // fall through
-+        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
-+      case T_OBJECT:    // fall through
-+      case T_ARRAY:     // fall through
-+      case T_ADDRESS:
-+        if (int_args < Argument::n_int_register_parameters_j) {
-+          regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
-+        } else {
-+          regs[i].set2(VMRegImpl::stack2reg(stk_args));
-+          stk_args += 2;
-+        }
-+        break;
-+      case T_FLOAT:
-+        if (fp_args < Argument::n_float_register_parameters_j) {
-+          regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
-+        } else {
-+          regs[i].set1(VMRegImpl::stack2reg(stk_args));
-+          stk_args += 2;
-+        }
-+        break;
-+      case T_DOUBLE:
-+        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
-+        if (fp_args < Argument::n_float_register_parameters_j) {
-+          regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
-+        } else {
-+          regs[i].set2(VMRegImpl::stack2reg(stk_args));
-+          stk_args += 2;
-+        }
-+        break;
-+      default:
-+        ShouldNotReachHere();
++    // Load the oop from the handle
++    __ ld(obj_reg, Address(oop_handle_reg, 0));
++
++    if (UseBiasedLocking) {
++      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, lock_done, &slow_path_lock);
 +    }
-+  }
 +
-+  return align_up(stk_args, 2);
-+}
++    // Load (object->mark() | 1) into swap_reg % x10
++    __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
++    __ ori(swap_reg, t0, 1);
 +
-+// Patch the callers callsite with entry to compiled code if it exists.
-+static void patch_callers_callsite(MacroAssembler *masm) {
-+  assert_cond(masm != NULL);
-+  Label L;
-+  __ ld(t0, Address(xmethod, in_bytes(Method::code_offset())));
-+  __ beqz(t0, L);
++    // Save (object->mark() | 1) into BasicLock's displaced header
++    __ sd(swap_reg, Address(lock_reg, mark_word_offset));
 +
-+  __ enter();
-+  __ push_CPU_state();
++    // src -> dest if dest == x10 else x10 <- dest
++    {
++      Label here;
++      __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL);
++    }
 +
-+  // VM needs caller's callsite
-+  // VM needs target method
-+  // This needs to be a long call since we will relocate this adapter to
-+  // the codeBuffer and it may not reach
++    // Test if the oopMark is an obvious stack pointer, i.e.,
++    //  1) (mark & 3) == 0, and
++    //  2) sp <= mark < mark + os::pagesize()
++    // These 3 tests can be done by evaluating the following
++    // expression: ((mark - sp) & (3 - os::vm_page_size())),
++    // assuming both stack pointer and pagesize have their
++    // least significant 2 bits clear.
++    // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg
 +
-+#ifndef PRODUCT
-+  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
-+#endif
++    __ sub(swap_reg, swap_reg, sp);
++    __ andi(swap_reg, swap_reg, 3 - os::vm_page_size());
 +
-+  __ mv(c_rarg0, xmethod);
-+  __ mv(c_rarg1, ra);
-+  int32_t offset = 0;
-+  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)), offset);
-+  __ jalr(x1, t0, offset);
++    // Save the test result, for recursive case, the result is zero
++    __ sd(swap_reg, Address(lock_reg, mark_word_offset));
++    __ bnez(swap_reg, slow_path_lock);
 +
-+  // Explicit fence.i required because fixup_callers_callsite may change the code
-+  // stream.
-+  __ safepoint_ifence();
++    // Slow path will re-enter here
++    __ bind(lock_done);
++  }
 +
-+  __ pop_CPU_state();
-+  // restore sp
-+  __ leave();
-+  __ bind(L);
-+}
 +
-+static void gen_c2i_adapter(MacroAssembler *masm,
-+                            int total_args_passed,
-+                            int comp_args_on_stack,
-+                            const BasicType *sig_bt,
-+                            const VMRegPair *regs,
-+                            Label& skip_fixup) {
-+  // Before we get into the guts of the C2I adapter, see if we should be here
-+  // at all.  We've come from compiled code and are attempting to jump to the
-+  // interpreter, which means the caller made a static call to get here
-+  // (vcalls always get a compiled target if there is one).  Check for a
-+  // compiled target.  If there is one, we need to patch the caller's call.
-+  patch_callers_callsite(masm);
++  // Finally just about ready to make the JNI call
 +
-+  __ bind(skip_fixup);
++  // get JNIEnv* which is first argument to native
++  __ la(c_rarg0, Address(xthread, in_bytes(JavaThread::jni_environment_offset())));
 +
-+  int words_pushed = 0;
++  // Now set thread in native
++  __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
++  __ mv(t0, _thread_in_native);
++  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
++  __ sw(t0, Address(t1));
 +
-+  // Since all args are passed on the stack, total_args_passed *
-+  // Interpreter::stackElementSize is the space we need.
++  rt_call(masm, native_func);
 +
-+  int extraspace = total_args_passed * Interpreter::stackElementSize;
++  __ bind(native_return);
 +
-+  __ mv(x30, sp);
++  intptr_t return_pc = (intptr_t) __ pc();
++  oop_maps->add_gc_map(return_pc - start, map);
 +
-+  // stack is aligned, keep it that way
-+  extraspace = align_up(extraspace, 2 * wordSize);
++  // Unpack native results.
++  if (ret_type != T_OBJECT && ret_type != T_ARRAY) {
++    __ cast_primitive_type(ret_type, x10);
++  }
 +
-+  if (extraspace) {
-+    __ sub(sp, sp, extraspace);
++  Label safepoint_in_progress, safepoint_in_progress_done;
++  Label after_transition;
++
++  // Switch thread to "native transition" state before reading the synchronization state.
++  // This additional state is necessary because reading and testing the synchronization
++  // state is not atomic w.r.t. GC, as this scenario demonstrates:
++  //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
++  //     VM thread changes sync state to synchronizing and suspends threads for GC.
++  //     Thread A is resumed to finish this native method, but doesn't block here since it
++  //     didn't see any synchronization is progress, and escapes.
++  __ mv(t0, _thread_in_native_trans);
++
++  __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
++
++  // Force this write out before the read below
++  __ membar(MacroAssembler::AnyAny);
++
++  // check for safepoint operation in progress and/or pending suspend requests
++  {
++    __ safepoint_poll_acquire(safepoint_in_progress);
++    __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset()));
++    __ bnez(t0, safepoint_in_progress);
++    __ bind(safepoint_in_progress_done);
 +  }
 +
-+  // Now write the args into the outgoing interpreter space
-+  for (int i = 0; i < total_args_passed; i++) {
-+    if (sig_bt[i] == T_VOID) {
-+      assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half");
-+      continue;
-+    }
++  // change thread state
++  __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
++  __ mv(t0, _thread_in_Java);
++  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
++  __ sw(t0, Address(t1));
++  __ bind(after_transition);
 +
-+    // offset to start parameters
-+    int st_off   = (total_args_passed - i - 1) * Interpreter::stackElementSize;
-+    int next_off = st_off - Interpreter::stackElementSize;
++  Label reguard;
++  Label reguard_done;
++  __ lbu(t0, Address(xthread, JavaThread::stack_guard_state_offset()));
++  __ mv(t1, JavaThread::stack_guard_yellow_reserved_disabled);
++  __ beq(t0, t1, reguard);
++  __ bind(reguard_done);
 +
-+    // Say 4 args:
-+    // i   st_off
-+    // 0   32 T_LONG
-+    // 1   24 T_VOID
-+    // 2   16 T_OBJECT
-+    // 3    8 T_BOOL
-+    // -    0 return address
-+    //
-+    // However to make thing extra confusing. Because we can fit a Java long/double in
-+    // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
-+    // leaves one slot empty and only stores to a single slot. In this case the
-+    // slot that is occupied is the T_VOID slot. See I said it was confusing.
++  // native result if any is live
 +
-+    VMReg r_1 = regs[i].first();
-+    VMReg r_2 = regs[i].second();
-+    if (!r_1->is_valid()) {
-+      assert(!r_2->is_valid(), "");
-+      continue;
-+    }
-+    if (r_1->is_stack()) {
-+      // memory to memory use t0
-+      int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size
-+                    + extraspace
-+                    + words_pushed * wordSize);
-+      if (!r_2->is_valid()) {
-+        __ lwu(t0, Address(sp, ld_off));
-+        __ sd(t0, Address(sp, st_off), /*temp register*/esp);
-+      } else {
-+        __ ld(t0, Address(sp, ld_off), /*temp register*/esp);
++  // Unlock
++  Label unlock_done;
++  Label slow_path_unlock;
++  if (method->is_synchronized()) {
 +
-+        // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
-+        // T_DOUBLE and T_LONG use two slots in the interpreter
-+        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
-+          // ld_off == LSW, ld_off+wordSize == MSW
-+          // st_off == MSW, next_off == LSW
-+          __ sd(t0, Address(sp, next_off), /*temp register*/esp);
-+#ifdef ASSERT
-+          // Overwrite the unused slot with known junk
-+          __ li(t0, 0xdeadffffdeadaaaaul);
-+          __ sd(t0, Address(sp, st_off), /*temp register*/esp);
-+#endif /* ASSERT */
-+        } else {
-+          __ sd(t0, Address(sp, st_off), /*temp register*/esp);
-+        }
-+      }
-+    } else if (r_1->is_Register()) {
-+      Register r = r_1->as_Register();
-+      if (!r_2->is_valid()) {
-+        // must be only an int (or less ) so move only 32bits to slot
-+        __ sd(r, Address(sp, st_off));
-+      } else {
-+        // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
-+        // T_DOUBLE and T_LONG use two slots in the interpreter
-+        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
-+          // long/double in gpr
-+#ifdef ASSERT
-+          // Overwrite the unused slot with known junk
-+          __ li(t0, 0xdeadffffdeadaaabul);
-+          __ sd(t0, Address(sp, st_off), /*temp register*/esp);
-+#endif /* ASSERT */
-+          __ sd(r, Address(sp, next_off));
-+        } else {
-+          __ sd(r, Address(sp, st_off));
-+        }
-+      }
-+    } else {
-+      assert(r_1->is_FloatRegister(), "");
-+      if (!r_2->is_valid()) {
-+        // only a float use just part of the slot
-+        __ fsw(r_1->as_FloatRegister(), Address(sp, st_off));
-+      } else {
-+#ifdef ASSERT
-+        // Overwrite the unused slot with known junk
-+        __ li(t0, 0xdeadffffdeadaaacul);
-+        __ sd(t0, Address(sp, st_off), /*temp register*/esp);
-+#endif /* ASSERT */
-+        __ fsd(r_1->as_FloatRegister(), Address(sp, next_off));
-+      }
-+    }
-+  }
-+
-+  __ mv(esp, sp); // Interp expects args on caller's expression stack
++    // Get locked oop from the handle we passed to jni
++    __ ld(obj_reg, Address(oop_handle_reg, 0));
 +
-+  __ ld(t0, Address(xmethod, in_bytes(Method::interpreter_entry_offset())));
-+  __ jr(t0);
-+}
++    Label done;
 +
-+void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
-+                                    int total_args_passed,
-+                                    int comp_args_on_stack,
-+                                    const BasicType *sig_bt,
-+                                    const VMRegPair *regs) {
-+  // Cut-out for having no stack args.
-+  int comp_words_on_stack = align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, wordSize) >> LogBytesPerWord;
-+  if (comp_args_on_stack != 0) {
-+    __ sub(t0, sp, comp_words_on_stack * wordSize);
-+    __ andi(sp, t0, -16);
-+  }
++    if (UseBiasedLocking) {
++      __ biased_locking_exit(obj_reg, old_hdr, done);
++    }
 +
-+  // Will jump to the compiled code just as if compiled code was doing it.
-+  // Pre-load the register-jump target early, to schedule it better.
-+  __ ld(t1, Address(xmethod, in_bytes(Method::from_compiled_offset())));
++    // Simple recursive lock?
++    __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
++    __ beqz(t0, done);
 +
-+  // Now generate the shuffle code.
-+  for (int i = 0; i < total_args_passed; i++) {
-+    if (sig_bt[i] == T_VOID) {
-+      assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half");
-+      continue;
++    // Must save x10 if if it is live now because cmpxchg must use it
++    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
++      save_native_result(masm, ret_type, stack_slots);
 +    }
 +
-+    // Pick up 0, 1 or 2 words from SP+offset.
++    // get address of the stack lock
++    __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
++    //  get old displaced header
++    __ ld(old_hdr, Address(x10, 0));
 +
-+    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
-+           "scrambled load targets?");
-+    // Load in argument order going down.
-+    int ld_off = (total_args_passed - i - 1) * Interpreter::stackElementSize;
-+    // Point to interpreter value (vs. tag)
-+    int next_off = ld_off - Interpreter::stackElementSize;
++    // Atomic swap old header if oop still contains the stack lock
++    Label succeed;
++    __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock);
++    __ bind(succeed);
 +
-+    VMReg r_1 = regs[i].first();
-+    VMReg r_2 = regs[i].second();
-+    if (!r_1->is_valid()) {
-+      assert(!r_2->is_valid(), "");
-+      continue;
++    // slow path re-enters here
++    __ bind(unlock_done);
++    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
++      restore_native_result(masm, ret_type, stack_slots);
 +    }
-+    if (r_1->is_stack()) {
-+      // Convert stack slot to an SP offset (+ wordSize to account for return address )
-+      int st_off = regs[i].first()->reg2stack() * VMRegImpl::stack_slot_size;
-+      if (!r_2->is_valid()) {
-+        __ lw(t0, Address(esp, ld_off));
-+        __ sd(t0, Address(sp, st_off), /*temp register*/t2);
-+      } else {
-+        //
-+        // We are using two optoregs. This can be either T_OBJECT,
-+        // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
-+        // two slots but only uses one for thr T_LONG or T_DOUBLE case
-+        // So we must adjust where to pick up the data to match the
-+        // interpreter.
-+        //
-+        // Interpreter local[n] == MSW, local[n+1] == LSW however locals
-+        // are accessed as negative so LSW is at LOW address
-+
-+        // ld_off is MSW so get LSW
-+        const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?
-+                           next_off : ld_off;
-+        __ ld(t0, Address(esp, offset));
-+        // st_off is LSW (i.e. reg.first())
-+        __ sd(t0, Address(sp, st_off), /*temp register*/t2);
-+      }
-+    } else if (r_1->is_Register()) {  // Register argument
-+      Register r = r_1->as_Register();
-+      if (r_2->is_valid()) {
-+        //
-+        // We are using two VMRegs. This can be either T_OBJECT,
-+        // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
-+        // two slots but only uses one for thr T_LONG or T_DOUBLE case
-+        // So we must adjust where to pick up the data to match the
-+        // interpreter.
 +
-+        const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?
-+                           next_off : ld_off;
++    __ bind(done);
++  }
 +
-+        // this can be a misaligned move
-+        __ ld(r, Address(esp, offset));
-+      } else {
-+        // sign extend and use a full word?
-+        __ lw(r, Address(esp, ld_off));
-+      }
-+    } else {
-+      if (!r_2->is_valid()) {
-+        __ flw(r_1->as_FloatRegister(), Address(esp, ld_off));
-+      } else {
-+        __ fld(r_1->as_FloatRegister(), Address(esp, next_off));
-+      }
-+    }
++  Label dtrace_method_exit, dtrace_method_exit_done;
++  {
++    int32_t offset = 0;
++    __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset);
++    __ lbu(t0, Address(t0, offset));
++    __ bnez(t0, dtrace_method_exit);
++    __ bind(dtrace_method_exit_done);
 +  }
 +
-+  // 6243940 We might end up in handle_wrong_method if
-+  // the callee is deoptimized as we race thru here. If that
-+  // happens we don't want to take a safepoint because the
-+  // caller frame will look interpreted and arguments are now
-+  // "compiled" so it is much better to make this transition
-+  // invisible to the stack walking code. Unfortunately if
-+  // we try and find the callee by normal means a safepoint
-+  // is possible. So we stash the desired callee in the thread
-+  // and the vm will find there should this case occur.
++  __ reset_last_Java_frame(false);
 +
-+  __ sd(xmethod, Address(xthread, JavaThread::callee_target_offset()));
++  // Unbox oop result, e.g. JNIHandles::resolve result.
++  if (is_reference_type(ret_type)) {
++    __ resolve_jobject(x10, xthread, t1);
++  }
 +
-+  __ jr(t1);
-+}
++  if (CheckJNICalls) {
++    // clear_pending_jni_exception_check
++    __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset()));
++  }
 +
-+// ---------------------------------------------------------------
-+AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
-+                                                            int total_args_passed,
-+                                                            int comp_args_on_stack,
-+                                                            const BasicType *sig_bt,
-+                                                            const VMRegPair *regs,
-+                                                            AdapterFingerPrint* fingerprint) {
-+  address i2c_entry = __ pc();
-+  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
++  // reset handle block
++  __ ld(x12, Address(xthread, JavaThread::active_handles_offset()));
++  __ sd(zr, Address(x12, JNIHandleBlock::top_offset_in_bytes()));
 +
-+  address c2i_unverified_entry = __ pc();
-+  Label skip_fixup;
++  __ leave();
 +
-+  Label ok;
++  // Any exception pending?
++  __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
++  __ bnez(t0, exception_pending);
 +
-+  const Register holder = t1;
-+  const Register receiver = j_rarg0;
-+  const Register tmp = t2;  // A call-clobbered register not used for arg passing
++  // We're done
++  __ ret();
 +
-+  // -------------------------------------------------------------------------
-+  // Generate a C2I adapter.  On entry we know xmethod holds the Method* during calls
-+  // to the interpreter.  The args start out packed in the compiled layout.  They
-+  // need to be unpacked into the interpreter layout.  This will almost always
-+  // require some stack space.  We grow the current (compiled) stack, then repack
-+  // the args.  We  finally end in a jump to the generic interpreter entry point.
-+  // On exit from the interpreter, the interpreter will restore our SP (lest the
-+  // compiled code, which relys solely on SP and not FP, get sick).
++  // Unexpected paths are out of line and go here
 +
-+  {
-+    __ block_comment("c2i_unverified_entry {");
-+    __ load_klass(t0, receiver);
-+    __ ld(tmp, Address(holder, CompiledICHolder::holder_klass_offset()));
-+    __ ld(xmethod, Address(holder, CompiledICHolder::holder_metadata_offset()));
-+    __ beq(t0, tmp, ok);
-+    __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
++  // forward the exception
++  __ bind(exception_pending);
 +
-+    __ bind(ok);
-+    // Method might have been compiled since the call site was patched to
-+    // interpreted; if that is the case treat it as a miss so we can get
-+    // the call site corrected.
-+    __ ld(t0, Address(xmethod, in_bytes(Method::code_offset())));
-+    __ beqz(t0, skip_fixup);
-+    __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
-+    __ block_comment("} c2i_unverified_entry");
-+  }
++  // and forward the exception
++  __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
 +
-+  address c2i_entry = __ pc();
++  // Slow path locking & unlocking
++  if (method->is_synchronized()) {
 +
-+  // Class initialization barrier for static methods
-+  address c2i_no_clinit_check_entry = NULL;
-+  if (VM_Version::supports_fast_class_init_checks()) {
-+    Label L_skip_barrier;
++    __ block_comment("Slow path lock {");
++    __ bind(slow_path_lock);
 +
-+    { // Bypass the barrier for non-static methods
-+      __ lwu(t0, Address(xmethod, Method::access_flags_offset()));
-+      __ andi(t1, t0, JVM_ACC_STATIC);
-+      __ beqz(t1, L_skip_barrier); // non-static
-+    }
++    // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
++    // args are (oop obj, BasicLock* lock, JavaThread* thread)
 +
-+    __ load_method_holder(t1, xmethod);
-+    __ clinit_barrier(t1, t0, &L_skip_barrier);
-+    __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
++    // protect the args we've loaded
++    save_args(masm, total_c_args, c_arg, out_regs);
 +
-+    __ bind(L_skip_barrier);
-+    c2i_no_clinit_check_entry = __ pc();
-+  }
++    __ mv(c_rarg0, obj_reg);
++    __ mv(c_rarg1, lock_reg);
++    __ mv(c_rarg2, xthread);
 +
-+  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+  bs->c2i_entry_barrier(masm);
++    // Not a leaf but we have last_Java_frame setup as we want
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3);
++    restore_args(masm, total_c_args, c_arg, out_regs);
 +
-+  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
++#ifdef ASSERT
++    { Label L;
++      __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
++      __ beqz(t0, L);
++      __ stop("no pending exception allowed on exit from monitorenter");
++      __ bind(L);
++    }
++#endif
++    __ j(lock_done);
 +
-+  __ flush();
-+  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry);
-+}
++    __ block_comment("} Slow path lock");
 +
-+int SharedRuntime::vector_calling_convention(VMRegPair *regs,
-+                                             uint num_bits,
-+                                             uint total_args_passed) {
-+  Unimplemented();
-+  return 0;
-+}
++    __ block_comment("Slow path unlock {");
++    __ bind(slow_path_unlock);
 +
-+int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
-+                                         VMRegPair *regs,
-+                                         VMRegPair *regs2,
-+                                         int total_args_passed) {
-+  assert(regs2 == NULL, "not needed on riscv");
++    if (ret_type == T_FLOAT || ret_type == T_DOUBLE) {
++      save_native_result(masm, ret_type, stack_slots);
++    }
 +
-+  // We return the amount of VMRegImpl stack slots we need to reserve for all
-+  // the arguments NOT counting out_preserve_stack_slots.
++    __ mv(c_rarg2, xthread);
++    __ la(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
++    __ mv(c_rarg0, obj_reg);
 +
-+  static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
-+    c_rarg0, c_rarg1, c_rarg2, c_rarg3,
-+    c_rarg4, c_rarg5,  c_rarg6,  c_rarg7
-+  };
-+  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
-+    c_farg0, c_farg1, c_farg2, c_farg3,
-+    c_farg4, c_farg5, c_farg6, c_farg7
-+  };
++    // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
++    // NOTE that obj_reg == x9 currently
++    __ ld(x9, Address(xthread, in_bytes(Thread::pending_exception_offset())));
++    __ sd(zr, Address(xthread, in_bytes(Thread::pending_exception_offset())));
 +
-+  uint int_args = 0;
-+  uint fp_args = 0;
-+  uint stk_args = 0; // inc by 2 each time
++    rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
 +
-+  for (int i = 0; i < total_args_passed; i++) {
-+    switch (sig_bt[i]) {
-+      case T_BOOLEAN:  // fall through
-+      case T_CHAR:     // fall through
-+      case T_BYTE:     // fall through
-+      case T_SHORT:    // fall through
-+      case T_INT:
-+        if (int_args < Argument::n_int_register_parameters_c) {
-+          regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
-+        } else {
-+          regs[i].set1(VMRegImpl::stack2reg(stk_args));
-+          stk_args += 2;
-+        }
-+        break;
-+      case T_LONG:      // fall through
-+        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
-+      case T_OBJECT:    // fall through
-+      case T_ARRAY:     // fall through
-+      case T_ADDRESS:   // fall through
-+      case T_METADATA:
-+        if (int_args < Argument::n_int_register_parameters_c) {
-+          regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
-+        } else {
-+          regs[i].set2(VMRegImpl::stack2reg(stk_args));
-+          stk_args += 2;
-+        }
-+        break;
-+      case T_FLOAT:
-+        if (fp_args < Argument::n_float_register_parameters_c) {
-+          regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
-+        } else if (int_args < Argument::n_int_register_parameters_c) {
-+          regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
-+        } else {
-+          regs[i].set1(VMRegImpl::stack2reg(stk_args));
-+          stk_args += 2;
-+        }
-+        break;
-+      case T_DOUBLE:
-+        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
-+        if (fp_args < Argument::n_float_register_parameters_c) {
-+          regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
-+        } else if (int_args < Argument::n_int_register_parameters_c) {
-+          regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
-+        } else {
-+          regs[i].set2(VMRegImpl::stack2reg(stk_args));
-+          stk_args += 2;
-+        }
-+        break;
-+      case T_VOID: // Halves of longs and doubles
-+        assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
-+        regs[i].set_bad();
-+        break;
-+      default:
-+        ShouldNotReachHere();
++#ifdef ASSERT
++    {
++      Label L;
++      __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
++      __ beqz(t0, L);
++      __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
++      __ bind(L);
 +    }
-+  }
++#endif /* ASSERT */
 +
-+  return stk_args;
-+}
++    __ sd(x9, Address(xthread, in_bytes(Thread::pending_exception_offset())));
 +
-+// On 64 bit we will store integer like items to the stack as
-+// 64 bits items (riscv64 abi) even though java would only store
-+// 32bits for a parameter. On 32bit it will simply be 32 bits
-+// So this routine will do 32->32 on 32bit and 32->64 on 64bit
-+static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
-+  assert_cond(masm != NULL);
-+  if (src.first()->is_stack()) {
-+    if (dst.first()->is_stack()) {
-+      // stack to stack
-+      __ ld(t0, Address(fp, reg2offset_in(src.first())));
-+      __ sd(t0, Address(sp, reg2offset_out(dst.first())));
-+    } else {
-+      // stack to reg
-+      __ lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
-+    }
-+  } else if (dst.first()->is_stack()) {
-+    // reg to stack
-+    __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
-+  } else {
-+    if (dst.first() != src.first()) {
-+      // 32bits extend sign
-+      __ addw(dst.first()->as_Register(), src.first()->as_Register(), zr);
++    if (ret_type == T_FLOAT || ret_type == T_DOUBLE) {
++      restore_native_result(masm, ret_type, stack_slots);
 +    }
-+  }
-+}
-+
-+// An oop arg. Must pass a handle not the oop itself
-+static void object_move(MacroAssembler* masm,
-+                        OopMap* map,
-+                        int oop_handle_offset,
-+                        int framesize_in_slots,
-+                        VMRegPair src,
-+                        VMRegPair dst,
-+                        bool is_receiver,
-+                        int* receiver_offset) {
-+  assert_cond(masm != NULL && map != NULL && receiver_offset != NULL);
-+  // must pass a handle. First figure out the location we use as a handle
-+  Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register();
-+
-+  // See if oop is NULL if it is we need no handle
++    __ j(unlock_done);
 +
-+  if (src.first()->is_stack()) {
++    __ block_comment("} Slow path unlock");
 +
-+    // Oop is already on the stack as an argument
-+    int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
-+    map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
-+    if (is_receiver) {
-+      *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
-+    }
++  } // synchronized
 +
-+    __ ld(t0, Address(fp, reg2offset_in(src.first())));
-+    __ la(rHandle, Address(fp, reg2offset_in(src.first())));
-+    // conditionally move a NULL
-+    Label notZero1;
-+    __ bnez(t0, notZero1);
-+    __ mv(rHandle, zr);
-+    __ bind(notZero1);
-+  } else {
++  // SLOW PATH Reguard the stack if needed
 +
-+    // Oop is in an a register we must store it to the space we reserve
-+    // on the stack for oop_handles and pass a handle if oop is non-NULL
++  __ bind(reguard);
++  save_native_result(masm, ret_type, stack_slots);
++  rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
++  restore_native_result(masm, ret_type, stack_slots);
++  // and continue
++  __ j(reguard_done);
 +
-+    const Register rOop = src.first()->as_Register();
-+    int oop_slot = -1;
-+    if (rOop == j_rarg0) {
-+      oop_slot = 0;
-+    } else if (rOop == j_rarg1) {
-+      oop_slot = 1;
-+    } else if (rOop == j_rarg2) {
-+      oop_slot = 2;
-+    } else if (rOop == j_rarg3) {
-+      oop_slot = 3;
-+    } else if (rOop == j_rarg4) {
-+      oop_slot = 4;
-+    } else if (rOop == j_rarg5) {
-+      oop_slot = 5;
-+    } else if (rOop == j_rarg6) {
-+      oop_slot = 6;
-+    } else {
-+      assert(rOop == j_rarg7, "wrong register");
-+      oop_slot = 7;
-+    }
++  // SLOW PATH safepoint
++  {
++    __ block_comment("safepoint {");
++    __ bind(safepoint_in_progress);
 +
-+    oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset;
-+    int offset = oop_slot * VMRegImpl::stack_slot_size;
++    // Don't use call_VM as it will see a possible pending exception and forward it
++    // and never return here preventing us from clearing _last_native_pc down below.
++    //
++    save_native_result(masm, ret_type, stack_slots);
++    __ mv(c_rarg0, xthread);
++#ifndef PRODUCT
++    assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
++#endif
++    int32_t offset = 0;
++    __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), offset);
++    __ jalr(x1, t0, offset);
 +
-+    map->set_oop(VMRegImpl::stack2reg(oop_slot));
-+    // Store oop in handle area, may be NULL
-+    __ sd(rOop, Address(sp, offset));
-+    if (is_receiver) {
-+      *receiver_offset = offset;
-+    }
++    // Restore any method result value
++    restore_native_result(masm, ret_type, stack_slots);
 +
-+    //rOop maybe the same as rHandle
-+    if (rOop == rHandle) {
-+      Label isZero;
-+      __ beqz(rOop, isZero);
-+      __ la(rHandle, Address(sp, offset));
-+      __ bind(isZero);
-+    } else {
-+      Label notZero2;
-+      __ la(rHandle, Address(sp, offset));
-+      __ bnez(rOop, notZero2);
-+      __ mv(rHandle, zr);
-+      __ bind(notZero2);
-+    }
++    __ j(safepoint_in_progress_done);
++    __ block_comment("} safepoint");
 +  }
 +
-+  // If arg is on the stack then place it otherwise it is already in correct reg.
-+  if (dst.first()->is_stack()) {
-+    __ sd(rHandle, Address(sp, reg2offset_out(dst.first())));
-+  }
-+}
++  // SLOW PATH dtrace support
++  {
++    __ block_comment("dtrace entry {");
++    __ bind(dtrace_method_entry);
 +
-+// A float arg may have to do float reg int reg conversion
-+static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
-+  assert(src.first()->is_stack() && dst.first()->is_stack() ||
-+         src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
-+  assert_cond(masm != NULL);
-+  if (src.first()->is_stack()) {
-+    if (dst.first()->is_stack()) {
-+      __ lwu(t0, Address(fp, reg2offset_in(src.first())));
-+      __ sw(t0, Address(sp, reg2offset_out(dst.first())));
-+    } else if (dst.first()->is_Register()) {
-+      __ lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
-+    } else {
-+      ShouldNotReachHere();
-+    }
-+  } else if (src.first() != dst.first()) {
-+    if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
-+      __ fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
-+    } else {
-+      ShouldNotReachHere();
-+    }
-+  }
-+}
-+
-+// A long move
-+static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
-+  assert_cond(masm != NULL);
-+  if (src.first()->is_stack()) {
-+    if (dst.first()->is_stack()) {
-+      // stack to stack
-+      __ ld(t0, Address(fp, reg2offset_in(src.first())));
-+      __ sd(t0, Address(sp, reg2offset_out(dst.first())));
-+    } else {
-+      // stack to reg
-+      __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
-+    }
-+  } else if (dst.first()->is_stack()) {
-+    // reg to stack
-+    __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
-+  } else {
-+    if (dst.first() != src.first()) {
-+      __ mv(dst.first()->as_Register(), src.first()->as_Register());
-+    }
-+  }
-+}
-+
-+// A double move
-+static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
-+  assert(src.first()->is_stack() && dst.first()->is_stack() ||
-+         src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
-+  assert_cond(masm != NULL);
-+  if (src.first()->is_stack()) {
-+    if (dst.first()->is_stack()) {
-+      __ ld(t0, Address(fp, reg2offset_in(src.first())));
-+      __ sd(t0, Address(sp, reg2offset_out(dst.first())));
-+    } else if (dst.first()-> is_Register()) {
-+      __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
-+    } else {
-+      ShouldNotReachHere();
-+    }
-+  } else if (src.first() != dst.first()) {
-+    if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
-+      __ fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
-+    } else {
-+      ShouldNotReachHere();
-+    }
-+  }
-+}
++    // We have all of the arguments setup at this point. We must not touch any register
++    // argument registers at this point (what if we save/restore them there are no oop?
 +
-+void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
-+  assert_cond(masm != NULL);
-+  // We always ignore the frame_slots arg and just use the space just below frame pointer
-+  // which by this time is free to use
-+  switch (ret_type) {
-+    case T_FLOAT:
-+      __ fsw(f10, Address(fp, -3 * wordSize));
-+      break;
-+    case T_DOUBLE:
-+      __ fsd(f10, Address(fp, -3 * wordSize));
-+      break;
-+    case T_VOID:  break;
-+    default: {
-+      __ sd(x10, Address(fp, -3 * wordSize));
-+    }
++    save_args(masm, total_c_args, c_arg, out_regs);
++    __ mov_metadata(c_rarg1, method());
++    __ call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
++      xthread, c_rarg1);
++    restore_args(masm, total_c_args, c_arg, out_regs);
++    __ j(dtrace_method_entry_done);
++    __ block_comment("} dtrace entry");
 +  }
-+}
 +
-+void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
-+  assert_cond(masm != NULL);
-+  // We always ignore the frame_slots arg and just use the space just below frame pointer
-+  // which by this time is free to use
-+  switch (ret_type) {
-+    case T_FLOAT:
-+      __ flw(f10, Address(fp, -3 * wordSize));
-+      break;
-+    case T_DOUBLE:
-+      __ fld(f10, Address(fp, -3 * wordSize));
-+      break;
-+    case T_VOID:  break;
-+    default: {
-+      __ ld(x10, Address(fp, -3 * wordSize));
-+    }
++  {
++    __ block_comment("dtrace exit {");
++    __ bind(dtrace_method_exit);
++    save_native_result(masm, ret_type, stack_slots);
++    __ mov_metadata(c_rarg1, method());
++    __ call_VM_leaf(
++         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
++         xthread, c_rarg1);
++    restore_native_result(masm, ret_type, stack_slots);
++    __ j(dtrace_method_exit_done);
++    __ block_comment("} dtrace exit");
 +  }
-+}
 +
-+static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
-+  assert_cond(masm != NULL && args != NULL);
-+  RegSet x;
-+  for ( int i = first_arg ; i < arg_count ; i++ ) {
-+    if (args[i].first()->is_Register()) {
-+      x = x + args[i].first()->as_Register();
-+    } else if (args[i].first()->is_FloatRegister()) {
-+      __ addi(sp, sp, -2 * wordSize);
-+      __ fsd(args[i].first()->as_FloatRegister(), Address(sp, 0));
-+    }
-+  }
-+  __ push_reg(x, sp);
-+}
++  __ flush();
 +
-+static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
-+  assert_cond(masm != NULL && args != NULL);
-+  RegSet x;
-+  for ( int i = first_arg ; i < arg_count ; i++ ) {
-+    if (args[i].first()->is_Register()) {
-+      x = x + args[i].first()->as_Register();
-+    } else {
-+      ;
-+    }
-+  }
-+  __ pop_reg(x, sp);
-+  for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
-+    if (args[i].first()->is_Register()) {
-+      ;
-+    } else if (args[i].first()->is_FloatRegister()) {
-+      __ fld(args[i].first()->as_FloatRegister(), Address(sp, 0));
-+      __ add(sp, sp, 2 * wordSize);
-+    }
-+  }
++  nmethod *nm = nmethod::new_native_nmethod(method,
++                                            compile_id,
++                                            masm->code(),
++                                            vep_offset,
++                                            frame_complete,
++                                            stack_slots / VMRegImpl::slots_per_word,
++                                            (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
++                                            in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
++                                            oop_maps);
++  assert(nm != NULL, "create native nmethod fail!");
++  return nm;
 +}
 +
-+static void rt_call(MacroAssembler* masm, address dest) {
-+  assert_cond(masm != NULL);
-+  CodeBlob *cb = CodeCache::find_blob(dest);
-+  if (cb) {
-+    __ far_call(RuntimeAddress(dest));
-+  } else {
-+    int32_t offset = 0;
-+    __ la_patchable(t0, RuntimeAddress(dest), offset);
-+    __ jalr(x1, t0, offset);
++// this function returns the adjust size (in number of words) to a c2i adapter
++// activation for use during deoptimization
++int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
++  assert(callee_locals >= callee_parameters,
++         "test and remove; got more parms than locals");
++  if (callee_locals < callee_parameters) {
++    return 0;                   // No adjustment for negative locals
 +  }
++  int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
++  // diff is counted in stack words
++  return align_up(diff, 2);
 +}
 +
-+static void verify_oop_args(MacroAssembler* masm,
-+                            const methodHandle& method,
-+                            const BasicType* sig_bt,
-+                            const VMRegPair* regs) {
-+  const Register temp_reg = x9;  // not part of any compiled calling seq
-+  if (VerifyOops) {
-+    for (int i = 0; i < method->size_of_parameters(); i++) {
-+      if (sig_bt[i] == T_OBJECT ||
-+          sig_bt[i] == T_ARRAY) {
-+        VMReg r = regs[i].first();
-+        assert(r->is_valid(), "bad oop arg");
-+        if (r->is_stack()) {
-+          __ ld(temp_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
-+          __ verify_oop(temp_reg);
-+        } else {
-+          __ verify_oop(r->as_Register());
-+        }
-+      }
-+    }
-+  }
-+}
++//------------------------------generate_deopt_blob----------------------------
++void SharedRuntime::generate_deopt_blob() {
++  // Allocate space for the code
++  ResourceMark rm;
++  // Setup code generation tools
++  int pad = 0;
++  CodeBuffer buffer("deopt_blob", 2048 + pad, 1024);
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++  int frame_size_in_words = -1;
++  OopMap* map = NULL;
++  OopMapSet *oop_maps = new OopMapSet();
++  assert_cond(masm != NULL && oop_maps != NULL);
++  RegisterSaver reg_saver;
 +
-+static void gen_special_dispatch(MacroAssembler* masm,
-+                                 const methodHandle& method,
-+                                 const BasicType* sig_bt,
-+                                 const VMRegPair* regs) {
-+  verify_oop_args(masm, method, sig_bt, regs);
-+  vmIntrinsics::ID iid = method->intrinsic_id();
++  // -------------
++  // This code enters when returning to a de-optimized nmethod.  A return
++  // address has been pushed on the the stack, and return values are in
++  // registers.
++  // If we are doing a normal deopt then we were called from the patched
++  // nmethod from the point we returned to the nmethod. So the return
++  // address on the stack is wrong by NativeCall::instruction_size
++  // We will adjust the value so it looks like we have the original return
++  // address on the stack (like when we eagerly deoptimized).
++  // In the case of an exception pending when deoptimizing, we enter
++  // with a return address on the stack that points after the call we patched
++  // into the exception handler. We have the following register state from,
++  // e.g., the forward exception stub (see stubGenerator_riscv.cpp).
++  //    x10: exception oop
++  //    x9: exception handler
++  //    x13: throwing pc
++  // So in this case we simply jam x13 into the useless return address and
++  // the stack looks just like we want.
++  //
++  // At this point we need to de-opt.  We save the argument return
++  // registers.  We call the first C routine, fetch_unroll_info().  This
++  // routine captures the return values and returns a structure which
++  // describes the current frame size and the sizes of all replacement frames.
++  // The current frame is compiled code and may contain many inlined
++  // functions, each with their own JVM state.  We pop the current frame, then
++  // push all the new frames.  Then we call the C routine unpack_frames() to
++  // populate these frames.  Finally unpack_frames() returns us the new target
++  // address.  Notice that callee-save registers are BLOWN here; they have
++  // already been captured in the vframeArray at the time the return PC was
++  // patched.
++  address start = __ pc();
++  Label cont;
 +
-+  // Now write the args into the outgoing interpreter space
-+  bool     has_receiver   = false;
-+  Register receiver_reg   = noreg;
-+  int      member_arg_pos = -1;
-+  Register member_reg     = noreg;
-+  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
-+  if (ref_kind != 0) {
-+    member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
-+    member_reg = x9;  // known to be free at this point
-+    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
-+  } else if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) {
-+    has_receiver = true;
-+  } else {
-+    fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid));
-+  }
++  // Prolog for non exception case!
 +
-+  if (member_reg != noreg) {
-+    // Load the member_arg into register, if necessary.
-+    SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
-+    VMReg r = regs[member_arg_pos].first();
-+    if (r->is_stack()) {
-+      __ ld(member_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
-+    } else {
-+      // no data motion is needed
-+      member_reg = r->as_Register();
-+    }
-+  }
++  // Save everything in sight.
++  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
 +
-+  if (has_receiver) {
-+    // Make sure the receiver is loaded into a register.
-+    assert(method->size_of_parameters() > 0, "oob");
-+    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
-+    VMReg r = regs[0].first();
-+    assert(r->is_valid(), "bad receiver arg");
-+    if (r->is_stack()) {
-+      // Porting note:  This assumes that compiled calling conventions always
-+      // pass the receiver oop in a register.  If this is not true on some
-+      // platform, pick a temp and load the receiver from stack.
-+      fatal("receiver always in a register");
-+      receiver_reg = x12;  // known to be free at this point
-+      __ ld(receiver_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
-+    } else {
-+      // no data motion is needed
-+      receiver_reg = r->as_Register();
-+    }
-+  }
++  // Normal deoptimization.  Save exec mode for unpack_frames.
++  __ mvw(xcpool, Deoptimization::Unpack_deopt); // callee-saved
++  __ j(cont);
 +
-+  // Figure out which address we are really jumping to:
-+  MethodHandles::generate_method_handle_dispatch(masm, iid,
-+                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
-+}
++  int reexecute_offset = __ pc() - start;
 +
-+// ---------------------------------------------------------------------------
-+// Generate a native wrapper for a given method.  The method takes arguments
-+// in the Java compiled code convention, marshals them to the native
-+// convention (handlizes oops, etc), transitions to native, makes the call,
-+// returns to java state (possibly blocking), unhandlizes any result and
-+// returns.
-+//
-+// Critical native functions are a shorthand for the use of
-+// GetPrimtiveArrayCritical and disallow the use of any other JNI
-+// functions.  The wrapper is expected to unpack the arguments before
-+// passing them to the callee and perform checks before and after the
-+// native call to ensure that they GCLocker
-+// lock_critical/unlock_critical semantics are followed.  Some other
-+// parts of JNI setup are skipped like the tear down of the JNI handle
-+// block and the check for pending exceptions it's impossible for them
-+// to be thrown.
-+//
-+// They are roughly structured like this:
-+//    if (GCLocker::needs_gc()) SharedRuntime::block_for_jni_critical()
-+//    tranistion to thread_in_native
-+//    unpack arrray arguments and call native entry point
-+//    check for safepoint in progress
-+//    check if any thread suspend flags are set
-+//      call into JVM and possible unlock the JNI critical
-+//      if a GC was suppressed while in the critical native.
-+//    transition back to thread_in_Java
-+//    return to caller
-+//
-+nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
-+                                                const methodHandle& method,
-+                                                int compile_id,
-+                                                BasicType* in_sig_bt,
-+                                                VMRegPair* in_regs,
-+                                                BasicType ret_type) {
-+  if (method->is_method_handle_intrinsic()) {
-+    vmIntrinsics::ID iid = method->intrinsic_id();
-+    intptr_t start = (intptr_t)__ pc();
-+    int vep_offset = ((intptr_t)__ pc()) - start;
++  // Reexecute case
++  // return address is the pc describes what bci to do re-execute at
 +
-+    // First instruction must be a nop as it may need to be patched on deoptimisation
-+    __ nop();
-+    gen_special_dispatch(masm,
-+                         method,
-+                         in_sig_bt,
-+                         in_regs);
-+    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
-+    __ flush();
-+    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
-+    return nmethod::new_native_nmethod(method,
-+                                       compile_id,
-+                                       masm->code(),
-+                                       vep_offset,
-+                                       frame_complete,
-+                                       stack_slots / VMRegImpl::slots_per_word,
-+                                       in_ByteSize(-1),
-+                                       in_ByteSize(-1),
-+                                       (OopMapSet*)NULL);
-+  }
-+  address native_func = method->native_function();
-+  assert(native_func != NULL, "must have function");
++  // No need to update map as each call to save_live_registers will produce identical oopmap
++  (void) reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
 +
-+  // An OopMap for lock (and class if static)
-+  OopMapSet *oop_maps = new OopMapSet();
-+  assert_cond(oop_maps != NULL);
-+  intptr_t start = (intptr_t)__ pc();
++  __ mvw(xcpool, Deoptimization::Unpack_reexecute); // callee-saved
++  __ j(cont);
 +
-+  // We have received a description of where all the java arg are located
-+  // on entry to the wrapper. We need to convert these args to where
-+  // the jni function will expect them. To figure out where they go
-+  // we convert the java signature to a C signature by inserting
-+  // the hidden arguments as arg[0] and possibly arg[1] (static method)
++  int exception_offset = __ pc() - start;
 +
-+  const int total_in_args = method->size_of_parameters();
-+  int total_c_args = total_in_args + (method->is_static() ? 2 : 1);
++  // Prolog for exception case
 +
-+  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
-+  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
-+  BasicType* in_elem_bt = NULL;
++  // all registers are dead at this entry point, except for x10, and
++  // x13 which contain the exception oop and exception pc
++  // respectively.  Set them in TLS and fall thru to the
++  // unpack_with_exception_in_tls entry point.
 +
-+  int argc = 0;
-+  out_sig_bt[argc++] = T_ADDRESS;
-+  if (method->is_static()) {
-+    out_sig_bt[argc++] = T_OBJECT;
-+  }
++  __ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
++  __ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
 +
-+  for (int i = 0; i < total_in_args ; i++) {
-+    out_sig_bt[argc++] = in_sig_bt[i];
-+  }
++  int exception_in_tls_offset = __ pc() - start;
 +
-+  // Now figure out where the args must be stored and how much stack space
-+  // they require.
-+  int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
++  // new implementation because exception oop is now passed in JavaThread
 +
-+  // Compute framesize for the wrapper.  We need to handlize all oops in
-+  // incoming registers
++  // Prolog for exception case
++  // All registers must be preserved because they might be used by LinearScan
++  // Exceptiop oop and throwing PC are passed in JavaThread
++  // tos: stack at point of call to method that threw the exception (i.e. only
++  // args are on the stack, no return address)
 +
-+  // Calculate the total number of stack slots we will need.
++  // The return address pushed by save_live_registers will be patched
++  // later with the throwing pc. The correct value is not available
++  // now because loading it from memory would destroy registers.
 +
-+  // First count the abi requirement plus all of the outgoing args
-+  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
++  // NB: The SP at this point must be the SP of the method that is
++  // being deoptimized.  Deoptimization assumes that the frame created
++  // here by save_live_registers is immediately below the method's SP.
++  // This is a somewhat fragile mechanism.
 +
-+  // Now the space for the inbound oop handle area
-+  int total_save_slots = 8 * VMRegImpl::slots_per_word;  // 8 arguments passed in registers
++  // Save everything in sight.
++  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
 +
-+  int oop_handle_offset = stack_slots;
-+  stack_slots += total_save_slots;
++  // Now it is safe to overwrite any register
 +
-+  // Now any space we need for handlizing a klass if static method
++  // Deopt during an exception.  Save exec mode for unpack_frames.
++  __ li(xcpool, Deoptimization::Unpack_exception); // callee-saved
 +
-+  int klass_slot_offset = 0;
-+  int klass_offset = -1;
-+  int lock_slot_offset = 0;
-+  bool is_static = false;
++  // load throwing pc from JavaThread and patch it as the return address
++  // of the current frame. Then clear the field in JavaThread
 +
-+  if (method->is_static()) {
-+    klass_slot_offset = stack_slots;
-+    stack_slots += VMRegImpl::slots_per_word;
-+    klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
-+    is_static = true;
-+  }
++  __ ld(x13, Address(xthread, JavaThread::exception_pc_offset()));
++  __ sd(x13, Address(fp, frame::return_addr_offset * wordSize));
++  __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
 +
-+  // Plus a lock if needed
++#ifdef ASSERT
++  // verify that there is really an exception oop in JavaThread
++  __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
++  __ verify_oop(x10);
 +
-+  if (method->is_synchronized()) {
-+    lock_slot_offset = stack_slots;
-+    stack_slots += VMRegImpl::slots_per_word;
-+  }
++  // verify that there is no pending exception
++  Label no_pending_exception;
++  __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
++  __ beqz(t0, no_pending_exception);
++  __ stop("must not have pending exception here");
++  __ bind(no_pending_exception);
++#endif
 +
-+  // Now a place (+2) to save return values or temp during shuffling
-+  // + 4 for return address (which we own) and saved fp
-+  stack_slots += 6;
++  __ bind(cont);
 +
-+  // Ok The space we have allocated will look like:
-+  //
-+  //
-+  // FP-> |                     |
-+  //      | 2 slots (ra)        |
-+  //      | 2 slots (fp)        |
-+  //      |---------------------|
-+  //      | 2 slots for moves   |
-+  //      |---------------------|
-+  //      | lock box (if sync)  |
-+  //      |---------------------| <- lock_slot_offset
-+  //      | klass (if static)   |
-+  //      |---------------------| <- klass_slot_offset
-+  //      | oopHandle area      |
-+  //      |---------------------| <- oop_handle_offset (8 java arg registers)
-+  //      | outbound memory     |
-+  //      | based arguments     |
-+  //      |                     |
-+  //      |---------------------|
-+  //      |                     |
-+  // SP-> | out_preserved_slots |
-+  //
++  // Call C code.  Need thread and this frame, but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.
 +  //
++  // UnrollBlock* fetch_unroll_info(JavaThread* thread)
 +
++  // fetch_unroll_info needs to call last_java_frame().
 +
-+  // Now compute actual number of stack words we need rounding to make
-+  // stack properly aligned.
-+  stack_slots = align_up(stack_slots, StackAlignmentInSlots);
-+
-+  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
-+
-+  // First thing make an ic check to see if we should even be here
-+
-+  // We are free to use all registers as temps without saving them and
-+  // restoring them except fp. fp is the only callee save register
-+  // as far as the interpreter and the compiler(s) are concerned.
-+
-+
-+  const Register ic_reg = t1;
-+  const Register receiver = j_rarg0;
-+
-+  Label hit;
-+  Label exception_pending;
-+
-+  assert_different_registers(ic_reg, receiver, t0);
-+  __ verify_oop(receiver);
-+  __ cmp_klass(receiver, ic_reg, t0, hit);
-+
-+  __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
-+
-+  // Verified entry point must be aligned
-+  __ align(8);
-+
-+  __ bind(hit);
-+
-+  int vep_offset = ((intptr_t)__ pc()) - start;
++  Label retaddr;
++  __ set_last_Java_frame(sp, noreg, retaddr, t0);
++#ifdef ASSERT
++  {
++    Label L;
++    __ ld(t0, Address(xthread,
++                              JavaThread::last_Java_fp_offset()));
++    __ beqz(t0, L);
++    __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
++    __ bind(L);
++  }
++#endif // ASSERT
++  __ mv(c_rarg0, xthread);
++  __ mv(c_rarg1, xcpool);
++  int32_t offset = 0;
++  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)), offset);
++  __ jalr(x1, t0, offset);
++  __ bind(retaddr);
 +
-+  // If we have to make this method not-entrant we'll overwrite its
-+  // first instruction with a jump.
-+  __ nop();
++  // Need to have an oopmap that tells fetch_unroll_info where to
++  // find any register it might need.
++  oop_maps->add_gc_map(__ pc() - start, map);
 +
-+  if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) {
-+    Label L_skip_barrier;
-+    __ mov_metadata(t1, method->method_holder()); // InstanceKlass*
-+    __ clinit_barrier(t1, t0, &L_skip_barrier);
-+    __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
++  __ reset_last_Java_frame(false);
 +
-+    __ bind(L_skip_barrier);
-+  }
++  // Load UnrollBlock* into x15
++  __ mv(x15, x10);
 +
-+  // Generate stack overflow check
-+  __ bang_stack_with_offset(checked_cast<int>(StackOverflow::stack_shadow_zone_size()));
++  __ lwu(xcpool, Address(x15, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
++  Label noException;
++  __ li(t0, Deoptimization::Unpack_exception);
++  __ bne(xcpool, t0, noException); // Was exception pending?
++  __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
++  __ ld(x13, Address(xthread, JavaThread::exception_pc_offset()));
++  __ sd(zr, Address(xthread, JavaThread::exception_oop_offset()));
++  __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
 +
-+  // Generate a new frame for the wrapper.
-+  __ enter();
-+  // -2 because return address is already present and so is saved fp
-+  __ sub(sp, sp, stack_size - 2 * wordSize);
++  __ verify_oop(x10);
 +
-+  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+  assert_cond(bs != NULL);
-+  bs->nmethod_entry_barrier(masm);
++  // Overwrite the result registers with the exception results.
++  __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
 +
-+  // Frame is now completed as far as size and linkage.
-+  int frame_complete = ((intptr_t)__ pc()) - start;
++  __ bind(noException);
 +
-+  // We use x18 as the oop handle for the receiver/klass
-+  // It is callee save so it survives the call to native
++  // Only register save data is on the stack.
++  // Now restore the result registers.  Everything else is either dead
++  // or captured in the vframeArray.
 +
-+  const Register oop_handle_reg = x18;
++  // Restore fp result register
++  __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
++  // Restore integer result register
++  __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
 +
-+  //
-+  // We immediately shuffle the arguments so that any vm call we have to
-+  // make from here on out (sync slow path, jvmti, etc.) we will have
-+  // captured the oops from our caller and have a valid oopMap for
-+  // them.
++  // Pop all of the register save area off the stack
++  __ add(sp, sp, frame_size_in_words * wordSize);
 +
-+  // -----------------
-+  // The Grand Shuffle
++  // All of the register save area has been popped of the stack. Only the
++  // return address remains.
 +
-+  // The Java calling convention is either equal (linux) or denser (win64) than the
-+  // c calling convention. However the because of the jni_env argument the c calling
-+  // convention always has at least one more (and two for static) arguments than Java.
-+  // Therefore if we move the args from java -> c backwards then we will never have
-+  // a register->register conflict and we don't have to build a dependency graph
-+  // and figure out how to break any cycles.
++  // Pop all the frames we must move/replace.
 +  //
-+
-+  // Record esp-based slot for receiver on stack for non-static methods
-+  int receiver_offset = -1;
-+
-+  // This is a trick. We double the stack slots so we can claim
-+  // the oops in the caller's frame. Since we are sure to have
-+  // more args than the caller doubling is enough to make
-+  // sure we can capture all the incoming oop args from the
-+  // caller.
++  // Frame picture (youngest to oldest)
++  // 1: self-frame (no frame link)
++  // 2: deopting frame  (no frame link)
++  // 3: caller of deopting frame (could be compiled/interpreted).
 +  //
-+  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
-+  assert_cond(map != NULL);
++  // Note: by leaving the return address of self-frame on the stack
++  // and using the size of frame 2 to adjust the stack
++  // when we are done the return to frame 3 will still be on the stack.
 +
-+  int float_args = 0;
-+  int int_args = 0;
++  // Pop deoptimized frame
++  __ lwu(x12, Address(x15, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
++  __ sub(x12, x12, 2 * wordSize);
++  __ add(sp, sp, x12);
++  __ ld(fp, Address(sp, 0));
++  __ ld(ra, Address(sp, wordSize));
++  __ addi(sp, sp, 2 * wordSize);
++  // RA should now be the return address to the caller (3)
 +
 +#ifdef ASSERT
-+  bool reg_destroyed[RegisterImpl::number_of_registers];
-+  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
-+  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
-+    reg_destroyed[r] = false;
-+  }
-+  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
-+    freg_destroyed[f] = false;
-+  }
++  // Compilers generate code that bang the stack by as much as the
++  // interpreter would need. So this stack banging should never
++  // trigger a fault. Verify that it does not on non product builds.
++  __ lwu(x9, Address(x15, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
++  __ bang_stack_size(x9, x12);
++#endif
++  // Load address of array of frame pcs into x12
++  __ ld(x12, Address(x15, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
 +
-+#endif /* ASSERT */
++  // Load address of array of frame sizes into x14
++  __ ld(x14, Address(x15, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
 +
-+  // For JNI natives the incoming and outgoing registers are offset upwards.
-+  GrowableArray<int> arg_order(2 * total_in_args);
-+  VMRegPair tmp_vmreg;
-+  tmp_vmreg.set2(x9->as_VMReg());
++  // Load counter into x13
++  __ lwu(x13, Address(x15, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
 +
-+  for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
-+    arg_order.push(i);
-+    arg_order.push(c_arg);
-+  }
++  // Now adjust the caller's stack to make up for the extra locals
++  // but record the original sp so that we can save it in the skeletal interpreter
++  // frame and the stack walking of interpreter_sender will get the unextended sp
++  // value and not the "real" sp value.
 +
-+  int temploc = -1;
-+  for (int ai = 0; ai < arg_order.length(); ai += 2) {
-+    int i = arg_order.at(ai);
-+    int c_arg = arg_order.at(ai + 1);
-+    __ block_comment(err_msg("mv %d -> %d", i, c_arg));
-+    assert(c_arg != -1 && i != -1, "wrong order");
-+#ifdef ASSERT
-+    if (in_regs[i].first()->is_Register()) {
-+      assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
-+    } else if (in_regs[i].first()->is_FloatRegister()) {
-+      assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
-+    }
-+    if (out_regs[c_arg].first()->is_Register()) {
-+      reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
-+    } else if (out_regs[c_arg].first()->is_FloatRegister()) {
-+      freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
-+    }
-+#endif /* ASSERT */
-+    switch (in_sig_bt[i]) {
-+      case T_ARRAY:
-+      case T_OBJECT:
-+        object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
-+                    ((i == 0) && (!is_static)),
-+                    &receiver_offset);
-+        int_args++;
-+        break;
-+      case T_VOID:
-+        break;
++  const Register sender_sp = x16;
 +
-+      case T_FLOAT:
-+        float_move(masm, in_regs[i], out_regs[c_arg]);
-+        float_args++;
-+        break;
++  __ mv(sender_sp, sp);
++  __ lwu(x9, Address(x15,
++                     Deoptimization::UnrollBlock::
++                     caller_adjustment_offset_in_bytes()));
++  __ sub(sp, sp, x9);
 +
-+      case T_DOUBLE:
-+        assert( i + 1 < total_in_args &&
-+                in_sig_bt[i + 1] == T_VOID &&
-+                out_sig_bt[c_arg + 1] == T_VOID, "bad arg list");
-+        double_move(masm, in_regs[i], out_regs[c_arg]);
-+        float_args++;
-+        break;
++  // Push interpreter frames in a loop
++  __ li(t0, 0xDEADDEAD);               // Make a recognizable pattern
++  __ mv(t1, t0);
++  Label loop;
++  __ bind(loop);
++  __ ld(x9, Address(x14, 0));          // Load frame size
++  __ addi(x14, x14, wordSize);
++  __ sub(x9, x9, 2 * wordSize);        // We'll push pc and fp by hand
++  __ ld(ra, Address(x12, 0));          // Load pc
++  __ addi(x12, x12, wordSize);
++  __ enter();                          // Save old & set new fp
++  __ sub(sp, sp, x9);                  // Prolog
++  // This value is corrected by layout_activation_impl
++  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++  __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable
++  __ mv(sender_sp, sp);                // Pass sender_sp to next frame
++  __ addi(x13, x13, -1);               // Decrement counter
++  __ bnez(x13, loop);
 +
-+      case T_LONG :
-+        long_move(masm, in_regs[i], out_regs[c_arg]);
-+        int_args++;
-+        break;
++    // Re-push self-frame
++  __ ld(ra, Address(x12));
++  __ enter();
 +
-+      case T_ADDRESS:
-+        assert(false, "found T_ADDRESS in java args");
-+        break;
++  // Allocate a full sized register save area.  We subtract 2 because
++  // enter() just pushed 2 words
++  __ sub(sp, sp, (frame_size_in_words - 2) * wordSize);
 +
-+      default:
-+        move32_64(masm, in_regs[i], out_regs[c_arg]);
-+        int_args++;
-+    }
-+  }
++  // Restore frame locals after moving the frame
++  __ fsd(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
++  __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
 +
-+  // point c_arg at the first arg that is already loaded in case we
-+  // need to spill before we call out
-+  int c_arg = total_c_args - total_in_args;
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // restore return values to their stack-slots with the new SP.
++  //
++  // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode)
 +
-+  // Pre-load a static method's oop into c_rarg1.
-+  if (method->is_static()) {
++  // Use fp because the frames look interpreted now
++  // Don't need the precise return PC here, just precise enough to point into this code blob.
++  address the_pc = __ pc();
++  __ set_last_Java_frame(sp, fp, the_pc, t0);
 +
-+    //  load oop into a register
-+    __ movoop(c_rarg1,
-+              JNIHandles::make_local(method->method_holder()->java_mirror()),
-+              /*immediate*/true);
++  __ mv(c_rarg0, xthread);
++  __ mv(c_rarg1, xcpool); // second arg: exec_mode
++  offset = 0;
++  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset);
++  __ jalr(x1, t0, offset);
 +
-+    // Now handlize the static class mirror it's known not-null.
-+    __ sd(c_rarg1, Address(sp, klass_offset));
-+    map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
++  // Set an oopmap for the call site
++  // Use the same PC we used for the last java frame
++  oop_maps->add_gc_map(the_pc - start,
++                       new OopMap(frame_size_in_words, 0));
 +
-+    // Now get the handle
-+    __ la(c_rarg1, Address(sp, klass_offset));
-+    // and protect the arg if we must spill
-+    c_arg--;
-+  }
++  // Clear fp AND pc
++  __ reset_last_Java_frame(true);
 +
-+  // Change state to native (we save the return address in the thread, since it might not
-+  // be pushed on the stack when we do a stack traversal).
-+  // We use the same pc/oopMap repeatedly when we call out
++  // Collect return values
++  __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
++  __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
 +
-+  Label native_return;
-+  __ set_last_Java_frame(sp, noreg, native_return, t0);
++  // Pop self-frame.
++  __ leave();                           // Epilog
 +
-+  Label dtrace_method_entry, dtrace_method_entry_done;
-+  {
-+    int32_t offset = 0;
-+    __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset);
-+    __ lbu(t0, Address(t0, offset));
-+    __ addw(t0, t0, zr);
-+    __ bnez(t0, dtrace_method_entry);
-+    __ bind(dtrace_method_entry_done);
-+  }
++  // Jump to interpreter
++  __ ret();
 +
-+  // RedefineClasses() tracing support for obsolete method entry
-+  if (log_is_enabled(Trace, redefine, class, obsolete)) {
-+    // protect the args we've loaded
-+    save_args(masm, total_c_args, c_arg, out_regs);
-+    __ mov_metadata(c_rarg1, method());
-+    __ call_VM_leaf(
-+      CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
-+      xthread, c_rarg1);
-+    restore_args(masm, total_c_args, c_arg, out_regs);
-+  }
++  // Make sure all code is generated
++  masm->flush();
 +
-+  // Lock a synchronized method
++  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
++  assert(_deopt_blob != NULL, "create deoptimization blob fail!");
++  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
++}
 +
-+  // Register definitions used by locking and unlocking
++uint SharedRuntime::out_preserve_stack_slots() {
++  return 0;
++}
 +
-+  const Register swap_reg = x10;
-+  const Register obj_reg  = x9;  // Will contain the oop
-+  const Register lock_reg = x30;  // Address of compiler lock object (BasicLock)
-+  const Register old_hdr  = x30;  // value of old header at unlock time
-+  const Register tmp      = ra;
++#ifdef COMPILER2
++//------------------------------generate_uncommon_trap_blob--------------------
++void SharedRuntime::generate_uncommon_trap_blob() {
++  // Allocate space for the code
++  ResourceMark rm;
++  // Setup code generation tools
++  CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++  assert_cond(masm != NULL);
 +
-+  Label slow_path_lock;
-+  Label lock_done;
++  assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
 +
-+  if (method->is_synchronized()) {
++  address start = __ pc();
 +
-+    const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
++  // Push self-frame.  We get here with a return address in RA
++  // and sp should be 16 byte aligned
++  // push fp and retaddr by hand
++  __ addi(sp, sp, -2 * wordSize);
++  __ sd(ra, Address(sp, wordSize));
++  __ sd(fp, Address(sp, 0));
++  // we don't expect an arg reg save area
++#ifndef PRODUCT
++  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
++#endif
++  // compiler left unloaded_class_index in j_rarg0 move to where the
++  // runtime expects it.
++  __ addiw(c_rarg1, j_rarg0, 0);
 +
-+    // Get the handle (the 2nd argument)
-+    __ mv(oop_handle_reg, c_rarg1);
++  // we need to set the past SP to the stack pointer of the stub frame
++  // and the pc to the address where this runtime call will return
++  // although actually any pc in this code blob will do).
++  Label retaddr;
++  __ set_last_Java_frame(sp, noreg, retaddr, t0);
 +
-+    // Get address of the box
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // capture callee-saved registers as well as return values.
++  //
++  // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index, jint exec_mode)
++  //
++  // n.b. 3 gp args, 0 fp args, integral return type
 +
-+    __ la(lock_reg, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
++  __ mv(c_rarg0, xthread);
++  __ mvw(c_rarg2, (unsigned)Deoptimization::Unpack_uncommon_trap);
++  int32_t offset = 0;
++  __ la_patchable(t0,
++        RuntimeAddress(CAST_FROM_FN_PTR(address,
++                                        Deoptimization::uncommon_trap)), offset);
++  __ jalr(x1, t0, offset);
++  __ bind(retaddr);
 +
-+    // Load the oop from the handle
-+    __ ld(obj_reg, Address(oop_handle_reg, 0));
++  // Set an oopmap for the call site
++  OopMapSet* oop_maps = new OopMapSet();
++  OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0);
++  assert_cond(oop_maps != NULL && map != NULL);
 +
-+    if (!UseHeavyMonitors) {
-+      // Load (object->mark() | 1) into swap_reg % x10
-+      __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-+      __ ori(swap_reg, t0, 1);
++  // location of fp is known implicitly by the frame sender code
 +
-+      // Save (object->mark() | 1) into BasicLock's displaced header
-+      __ sd(swap_reg, Address(lock_reg, mark_word_offset));
++  oop_maps->add_gc_map(__ pc() - start, map);
 +
-+      // src -> dest if dest == x10 else x10 <- dest
-+      {
-+        Label here;
-+        __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL);
-+      }
++  __ reset_last_Java_frame(false);
 +
-+      // Test if the oopMark is an obvious stack pointer, i.e.,
-+      //  1) (mark & 3) == 0, and
-+      //  2) sp <= mark < mark + os::pagesize()
-+      // These 3 tests can be done by evaluating the following
-+      // expression: ((mark - sp) & (3 - os::vm_page_size())),
-+      // assuming both stack pointer and pagesize have their
-+      // least significant 2 bits clear.
-+      // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg
-+
-+      __ sub(swap_reg, swap_reg, sp);
-+      __ andi(swap_reg, swap_reg, 3 - os::vm_page_size());
-+
-+      // Save the test result, for recursive case, the result is zero
-+      __ sd(swap_reg, Address(lock_reg, mark_word_offset));
-+      __ bnez(swap_reg, slow_path_lock);
-+    } else {
-+      __ j(slow_path_lock);
-+    }
++  // move UnrollBlock* into x14
++  __ mv(x14, x10);
 +
-+    // Slow path will re-enter here
-+    __ bind(lock_done);
++#ifdef ASSERT
++  { Label L;
++    __ lwu(t0, Address(x14, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
++    __ mvw(t1, Deoptimization::Unpack_uncommon_trap);
++    __ beq(t0, t1, L);
++    __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
++    __ bind(L);
 +  }
++#endif
 +
++  // Pop all the frames we must move/replace.
++  //
++  // Frame picture (youngest to oldest)
++  // 1: self-frame (no frame link)
++  // 2: deopting frame  (no frame link)
++  // 3: caller of deopting frame (could be compiled/interpreted).
 +
-+  // Finally just about ready to make the JNI call
++  __ add(sp, sp, (SimpleRuntimeFrame::framesize) << LogBytesPerInt); // Epilog!
 +
-+  // get JNIEnv* which is first argument to native
-+  __ la(c_rarg0, Address(xthread, in_bytes(JavaThread::jni_environment_offset())));
++  // Pop deoptimized frame (int)
++  __ lwu(x12, Address(x14,
++                      Deoptimization::UnrollBlock::
++                      size_of_deoptimized_frame_offset_in_bytes()));
++  __ sub(x12, x12, 2 * wordSize);
++  __ add(sp, sp, x12);
++  __ ld(fp, sp, 0);
++  __ ld(ra, sp, wordSize);
++  __ addi(sp, sp, 2 * wordSize);
++  // RA should now be the return address to the caller (3) frame
 +
-+  // Now set thread in native
-+  __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
-+  __ mv(t0, _thread_in_native);
-+  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
-+  __ sw(t0, Address(t1));
++#ifdef ASSERT
++  // Compilers generate code that bang the stack by as much as the
++  // interpreter would need. So this stack banging should never
++  // trigger a fault. Verify that it does not on non product builds.
++  __ lwu(x11, Address(x14,
++                      Deoptimization::UnrollBlock::
++                      total_frame_sizes_offset_in_bytes()));
++  __ bang_stack_size(x11, x12);
++#endif
 +
-+  rt_call(masm, native_func);
++  // Load address of array of frame pcs into x12 (address*)
++  __ ld(x12, Address(x14,
++                     Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
 +
-+  __ bind(native_return);
++  // Load address of array of frame sizes into x15 (intptr_t*)
++  __ ld(x15, Address(x14,
++                     Deoptimization::UnrollBlock::
++                     frame_sizes_offset_in_bytes()));
 +
-+  intptr_t return_pc = (intptr_t) __ pc();
-+  oop_maps->add_gc_map(return_pc - start, map);
++  // Counter
++  __ lwu(x13, Address(x14,
++                      Deoptimization::UnrollBlock::
++                      number_of_frames_offset_in_bytes())); // (int)
 +
-+  // Unpack native results.
-+  if (ret_type != T_OBJECT && ret_type != T_ARRAY) {
-+    __ cast_primitive_type(ret_type, x10);
-+  }
++  // Now adjust the caller's stack to make up for the extra locals but
++  // record the original sp so that we can save it in the skeletal
++  // interpreter frame and the stack walking of interpreter_sender
++  // will get the unextended sp value and not the "real" sp value.
 +
-+  Label safepoint_in_progress, safepoint_in_progress_done;
-+  Label after_transition;
++  const Register sender_sp = t1; // temporary register
 +
-+  // Switch thread to "native transition" state before reading the synchronization state.
-+  // This additional state is necessary because reading and testing the synchronization
-+  // state is not atomic w.r.t. GC, as this scenario demonstrates:
-+  //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
-+  //     VM thread changes sync state to synchronizing and suspends threads for GC.
-+  //     Thread A is resumed to finish this native method, but doesn't block here since it
-+  //     didn't see any synchronization is progress, and escapes.
-+  __ mv(t0, _thread_in_native_trans);
++  __ lwu(x11, Address(x14,
++                      Deoptimization::UnrollBlock::
++                      caller_adjustment_offset_in_bytes())); // (int)
++  __ mv(sender_sp, sp);
++  __ sub(sp, sp, x11);
 +
-+  __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
++  // Push interpreter frames in a loop
++  Label loop;
++  __ bind(loop);
++  __ ld(x11, Address(x15, 0));       // Load frame size
++  __ sub(x11, x11, 2 * wordSize);    // We'll push pc and fp by hand
++  __ ld(ra, Address(x12, 0));        // Save return address
++  __ enter();                        // and old fp & set new fp
++  __ sub(sp, sp, x11);               // Prolog
++  __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable
++  // This value is corrected by layout_activation_impl
++  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++  __ mv(sender_sp, sp);              // Pass sender_sp to next frame
++  __ add(x15, x15, wordSize);        // Bump array pointer (sizes)
++  __ add(x12, x12, wordSize);        // Bump array pointer (pcs)
++  __ subw(x13, x13, 1);              // Decrement counter
++  __ bgtz(x13, loop);
++  __ ld(ra, Address(x12, 0));        // save final return address
++  // Re-push self-frame
++  __ enter();                        // & old fp & set new fp
 +
-+  // Force this write out before the read below
-+  __ membar(MacroAssembler::AnyAny);
++  // Use fp because the frames look interpreted now
++  // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
++  // Don't need the precise return PC here, just precise enough to point into this code blob.
++  address the_pc = __ pc();
++  __ set_last_Java_frame(sp, fp, the_pc, t0);
 +
-+  // check for safepoint operation in progress and/or pending suspend requests
-+  {
-+    // We need an acquire here to ensure that any subsequent load of the
-+    // global SafepointSynchronize::_state flag is ordered after this load
-+    // of the thread-local polling word. We don't want this poll to
-+    // return false (i.e. not safepointing) and a later poll of the global
-+    // SafepointSynchronize::_state spuriously to return true.
-+    // This is to avoid a race when we're in a native->Java transition
-+    // racing the code which wakes up from a safepoint.
-+
-+    __ safepoint_poll(safepoint_in_progress, true /* at_return */, true /* acquire */, false /* in_nmethod */);
-+    __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset()));
-+    __ bnez(t0, safepoint_in_progress);
-+    __ bind(safepoint_in_progress_done);
-+  }
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // restore return values to their stack-slots with the new SP.
++  //
++  // BasicType unpack_frames(JavaThread* thread, int exec_mode)
++  //
 +
-+  // change thread state
-+  __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
-+  __ mv(t0, _thread_in_Java);
-+  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
-+  __ sw(t0, Address(t1));
-+  __ bind(after_transition);
-+
-+  Label reguard;
-+  Label reguard_done;
-+  __ lbu(t0, Address(xthread, JavaThread::stack_guard_state_offset()));
-+  __ mv(t1, StackOverflow::stack_guard_yellow_reserved_disabled);
-+  __ beq(t0, t1, reguard);
-+  __ bind(reguard_done);
++  // n.b. 2 gp args, 0 fp args, integral return type
 +
-+  // native result if any is live
++  // sp should already be aligned
++  __ mv(c_rarg0, xthread);
++  __ mvw(c_rarg1, (unsigned)Deoptimization::Unpack_uncommon_trap);
++  offset = 0;
++  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset);
++  __ jalr(x1, t0, offset);
 +
-+  // Unlock
-+  Label unlock_done;
-+  Label slow_path_unlock;
-+  if (method->is_synchronized()) {
++  // Set an oopmap for the call site
++  // Use the same PC we used for the last java frame
++  oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
 +
-+    // Get locked oop from the handle we passed to jni
-+    __ ld(obj_reg, Address(oop_handle_reg, 0));
++  // Clear fp AND pc
++  __ reset_last_Java_frame(true);
 +
-+    Label done;
++  // Pop self-frame.
++  __ leave();                 // Epilog
 +
-+    if (!UseHeavyMonitors) {
-+      // Simple recursive lock?
-+      __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
-+      __ beqz(t0, done);
-+    }
++  // Jump to interpreter
++  __ ret();
 +
++  // Make sure all code is generated
++  masm->flush();
 +
-+    // Must save x10 if if it is live now because cmpxchg must use it
-+    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
-+      save_native_result(masm, ret_type, stack_slots);
-+    }
++  _uncommon_trap_blob =  UncommonTrapBlob::create(&buffer, oop_maps,
++                                                  SimpleRuntimeFrame::framesize >> 1);
++}
++#endif // COMPILER2
 +
-+    if (!UseHeavyMonitors) {
-+      // get address of the stack lock
-+      __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
-+      //  get old displaced header
-+      __ ld(old_hdr, Address(x10, 0));
++//------------------------------generate_handler_blob------
++//
++// Generate a special Compile2Runtime blob that saves all registers,
++// and setup oopmap.
++//
++SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
++  ResourceMark rm;
++  OopMapSet *oop_maps = new OopMapSet();
++  assert_cond(oop_maps != NULL);
++  OopMap* map = NULL;
 +
-+      // Atomic swap old header if oop still contains the stack lock
-+      Label succeed;
-+      __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock);
-+      __ bind(succeed);
-+    } else {
-+      __ j(slow_path_unlock);
-+    }
++  // Allocate space for the code.  Setup code generation tools.
++  CodeBuffer buffer("handler_blob", 2048, 1024);
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++  assert_cond(masm != NULL);
 +
-+    // slow path re-enters here
-+    __ bind(unlock_done);
-+    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
-+      restore_native_result(masm, ret_type, stack_slots);
-+    }
++  address start   = __ pc();
++  address call_pc = NULL;
++  int frame_size_in_words = -1;
++  bool cause_return = (poll_type == POLL_AT_RETURN);
++  RegisterSaver reg_saver;
 +
-+    __ bind(done);
-+  }
++  // Save Integer and Float registers.
++  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
 +
-+  Label dtrace_method_exit, dtrace_method_exit_done;
-+  {
-+    int32_t offset = 0;
-+    __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset);
-+    __ lbu(t0, Address(t0, offset));
-+    __ bnez(t0, dtrace_method_exit);
-+    __ bind(dtrace_method_exit_done);
-+  }
++  // The following is basically a call_VM.  However, we need the precise
++  // address of the call in order to generate an oopmap. Hence, we do all the
++  // work outselves.
 +
-+  __ reset_last_Java_frame(false);
++  Label retaddr;
++  __ set_last_Java_frame(sp, noreg, retaddr, t0);
 +
-+  // Unbox oop result, e.g. JNIHandles::resolve result.
-+  if (is_reference_type(ret_type)) {
-+    __ resolve_jobject(x10, xthread, t1);
-+  }
++  // The return address must always be correct so that frame constructor never
++  // sees an invalid pc.
 +
-+  if (CheckJNICalls) {
-+    // clear_pending_jni_exception_check
-+    __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset()));
++  if (!cause_return) {
++    // overwrite the return address pushed by save_live_registers
++    // Additionally, x18 is a callee-saved register so we can look at
++    // it later to determine if someone changed the return address for
++    // us!
++    __ ld(x18, Address(xthread, JavaThread::saved_exception_pc_offset()));
++    __ sd(x18, Address(fp, frame::return_addr_offset * wordSize));
 +  }
 +
-+  // reset handle block
-+  __ ld(x12, Address(xthread, JavaThread::active_handles_offset()));
-+  __ sd(zr, Address(x12, JNIHandleBlock::top_offset_in_bytes()));
-+
-+  __ leave();
++  // Do the call
++  __ mv(c_rarg0, xthread);
++  int32_t offset = 0;
++  __ la_patchable(t0, RuntimeAddress(call_ptr), offset);
++  __ jalr(x1, t0, offset);
++  __ bind(retaddr);
 +
-+  // Any exception pending?
-+  __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
-+  __ bnez(t0, exception_pending);
++  // Set an oopmap for the call site.  This oopmap will map all
++  // oop-registers and debug-info registers as callee-saved.  This
++  // will allow deoptimization at this safepoint to find all possible
++  // debug-info recordings, as well as let GC find all oops.
 +
-+  // We're done
-+  __ ret();
++  oop_maps->add_gc_map( __ pc() - start, map);
 +
-+  // Unexpected paths are out of line and go here
++  Label noException;
 +
-+  // forward the exception
-+  __ bind(exception_pending);
++  __ reset_last_Java_frame(false);
 +
-+  // and forward the exception
-+  __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
++  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
 +
-+  // Slow path locking & unlocking
-+  if (method->is_synchronized()) {
++  __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
++  __ beqz(t0, noException);
 +
-+    __ block_comment("Slow path lock {");
-+    __ bind(slow_path_lock);
++  // Exception pending
 +
-+    // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
-+    // args are (oop obj, BasicLock* lock, JavaThread* thread)
++  reg_saver.restore_live_registers(masm);
 +
-+    // protect the args we've loaded
-+    save_args(masm, total_c_args, c_arg, out_regs);
++  __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
 +
-+    __ mv(c_rarg0, obj_reg);
-+    __ mv(c_rarg1, lock_reg);
-+    __ mv(c_rarg2, xthread);
++  // No exception case
++  __ bind(noException);
 +
-+    // Not a leaf but we have last_Java_frame setup as we want
-+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3);
-+    restore_args(masm, total_c_args, c_arg, out_regs);
++  Label no_adjust, bail;
++  if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
++    // If our stashed return pc was modified by the runtime we avoid touching it
++    __ ld(t0, Address(fp, frame::return_addr_offset * wordSize));
++    __ bne(x18, t0, no_adjust);
 +
 +#ifdef ASSERT
-+    { Label L;
-+      __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
-+      __ beqz(t0, L);
-+      __ stop("no pending exception allowed on exit from monitorenter");
-+      __ bind(L);
-+    }
++    // Verify the correct encoding of the poll we're about to skip.
++    // See NativeInstruction::is_lwu_to_zr()
++    __ lwu(t0, Address(x18));
++    __ andi(t1, t0, 0b0000011);
++    __ mv(t2, 0b0000011);
++    __ bne(t1, t2, bail); // 0-6:0b0000011
++    __ srli(t1, t0, 7);
++    __ andi(t1, t1, 0b00000);
++    __ bnez(t1, bail);    // 7-11:0b00000
++    __ srli(t1, t0, 12);
++    __ andi(t1, t1, 0b110);
++    __ mv(t2, 0b110);
++    __ bne(t1, t2, bail); // 12-14:0b110
 +#endif
-+    __ j(lock_done);
++    // Adjust return pc forward to step over the safepoint poll instruction
++    __ add(x18, x18, NativeInstruction::instruction_size);
++    __ sd(x18, Address(fp, frame::return_addr_offset * wordSize));
++  }
 +
-+    __ block_comment("} Slow path lock");
++  __ bind(no_adjust);
++  // Normal exit, restore registers and exit.
 +
-+    __ block_comment("Slow path unlock {");
-+    __ bind(slow_path_unlock);
++  reg_saver.restore_live_registers(masm);
++  __ ret();
 +
-+    if (ret_type == T_FLOAT || ret_type == T_DOUBLE) {
-+      save_native_result(masm, ret_type, stack_slots);
-+    }
++#ifdef ASSERT
++  __ bind(bail);
++  __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
++#endif
 +
-+    __ mv(c_rarg2, xthread);
-+    __ la(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
-+    __ mv(c_rarg0, obj_reg);
++  // Make sure all code is generated
++  masm->flush();
 +
-+    // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
-+    // NOTE that obj_reg == x9 currently
-+    __ ld(x9, Address(xthread, in_bytes(Thread::pending_exception_offset())));
-+    __ sd(zr, Address(xthread, in_bytes(Thread::pending_exception_offset())));
++  // Fill-out other meta info
++  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
++}
 +
-+    rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
++//
++// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
++//
++// Generate a stub that calls into vm to find out the proper destination
++// of a java call. All the argument registers are live at this point
++// but since this is generic code we don't know what they are and the caller
++// must do any gc of the args.
++//
++RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
++  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
 +
-+#ifdef ASSERT
-+    {
-+      Label L;
-+      __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
-+      __ beqz(t0, L);
-+      __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
-+      __ bind(L);
-+    }
-+#endif /* ASSERT */
++  // allocate space for the code
++  ResourceMark rm;
 +
-+    __ sd(x9, Address(xthread, in_bytes(Thread::pending_exception_offset())));
++  CodeBuffer buffer(name, 1000, 512);
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++  assert_cond(masm != NULL);
 +
-+    if (ret_type == T_FLOAT || ret_type == T_DOUBLE) {
-+      restore_native_result(masm, ret_type, stack_slots);
-+    }
-+    __ j(unlock_done);
++  int frame_size_in_words = -1;
++  RegisterSaver reg_saver;
 +
-+    __ block_comment("} Slow path unlock");
++  OopMapSet *oop_maps = new OopMapSet();
++  assert_cond(oop_maps != NULL);
++  OopMap* map = NULL;
 +
-+  } // synchronized
++  int start = __ offset();
 +
-+  // SLOW PATH Reguard the stack if needed
++  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
 +
-+  __ bind(reguard);
-+  save_native_result(masm, ret_type, stack_slots);
-+  rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
-+  restore_native_result(masm, ret_type, stack_slots);
-+  // and continue
-+  __ j(reguard_done);
++  int frame_complete = __ offset();
 +
-+  // SLOW PATH safepoint
 +  {
-+    __ block_comment("safepoint {");
-+    __ bind(safepoint_in_progress);
++    Label retaddr;
++    __ set_last_Java_frame(sp, noreg, retaddr, t0);
 +
-+    // Don't use call_VM as it will see a possible pending exception and forward it
-+    // and never return here preventing us from clearing _last_native_pc down below.
-+    //
-+    save_native_result(masm, ret_type, stack_slots);
 +    __ mv(c_rarg0, xthread);
-+#ifndef PRODUCT
-+    assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
-+#endif
 +    int32_t offset = 0;
-+    __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), offset);
++    __ la_patchable(t0, RuntimeAddress(destination), offset);
 +    __ jalr(x1, t0, offset);
++    __ bind(retaddr);
++  }
 +
-+    // Restore any method result value
-+    restore_native_result(masm, ret_type, stack_slots);
++  // Set an oopmap for the call site.
++  // We need this not only for callee-saved registers, but also for volatile
++  // registers that the compiler might be keeping live across a safepoint.
 +
-+    __ j(safepoint_in_progress_done);
-+    __ block_comment("} safepoint");
-+  }
++  oop_maps->add_gc_map( __ offset() - start, map);
 +
-+  // SLOW PATH dtrace support
-+  {
-+    __ block_comment("dtrace entry {");
-+    __ bind(dtrace_method_entry);
++  // x10 contains the address we are going to jump to assuming no exception got installed
 +
-+    // We have all of the arguments setup at this point. We must not touch any register
-+    // argument registers at this point (what if we save/restore them there are no oop?
++  // clear last_Java_sp
++  __ reset_last_Java_frame(false);
++  // check for pending exceptions
++  Label pending;
++  __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
++  __ bnez(t0, pending);
 +
-+    save_args(masm, total_c_args, c_arg, out_regs);
-+    __ mov_metadata(c_rarg1, method());
-+    __ call_VM_leaf(
-+      CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
-+      xthread, c_rarg1);
-+    restore_args(masm, total_c_args, c_arg, out_regs);
-+    __ j(dtrace_method_entry_done);
-+    __ block_comment("} dtrace entry");
-+  }
++  // get the returned Method*
++  __ get_vm_result_2(xmethod, xthread);
++  __ sd(xmethod, Address(sp, reg_saver.reg_offset_in_bytes(xmethod)));
 +
-+  {
-+    __ block_comment("dtrace exit {");
-+    __ bind(dtrace_method_exit);
-+    save_native_result(masm, ret_type, stack_slots);
-+    __ mov_metadata(c_rarg1, method());
-+    __ call_VM_leaf(
-+         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
-+         xthread, c_rarg1);
-+    restore_native_result(masm, ret_type, stack_slots);
-+    __ j(dtrace_method_exit_done);
-+    __ block_comment("} dtrace exit");
-+  }
++  // x10 is where we want to jump, overwrite t0 which is saved and temporary
++  __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(t0)));
++  reg_saver.restore_live_registers(masm);
 +
-+  __ flush();
++  // We are back the the original state on entry and ready to go.
 +
-+  nmethod *nm = nmethod::new_native_nmethod(method,
-+                                            compile_id,
-+                                            masm->code(),
-+                                            vep_offset,
-+                                            frame_complete,
-+                                            stack_slots / VMRegImpl::slots_per_word,
-+                                            (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
-+                                            in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
-+                                            oop_maps);
-+  assert(nm != NULL, "create native nmethod fail!");
-+  return nm;
-+}
++  __ jr(t0);
 +
-+// this function returns the adjust size (in number of words) to a c2i adapter
-+// activation for use during deoptimization
-+int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
-+  assert(callee_locals >= callee_parameters,
-+         "test and remove; got more parms than locals");
-+  if (callee_locals < callee_parameters) {
-+    return 0;                   // No adjustment for negative locals
-+  }
-+  int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
-+  // diff is counted in stack words
-+  return align_up(diff, 2);
++  // Pending exception after the safepoint
++
++  __ bind(pending);
++
++  reg_saver.restore_live_registers(masm);
++
++  // exception pending => remove activation and forward to exception handler
++
++  __ sd(zr, Address(xthread, JavaThread::vm_result_offset()));
++
++  __ ld(x10, Address(xthread, Thread::pending_exception_offset()));
++  __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
++
++  // -------------
++  // make sure all code is generated
++  masm->flush();
++
++  // return the  blob
++  return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
 +}
 +
-+//------------------------------generate_deopt_blob----------------------------
-+void SharedRuntime::generate_deopt_blob() {
++#ifdef COMPILER2
++//------------------------------generate_exception_blob---------------------------
++// creates exception blob at the end
++// Using exception blob, this code is jumped from a compiled method.
++// (see emit_exception_handler in riscv.ad file)
++//
++// Given an exception pc at a call we call into the runtime for the
++// handler in this method. This handler might merely restore state
++// (i.e. callee save registers) unwind the frame and jump to the
++// exception handler for the nmethod if there is no Java level handler
++// for the nmethod.
++//
++// This code is entered with a jmp.
++//
++// Arguments:
++//   x10: exception oop
++//   x13: exception pc
++//
++// Results:
++//   x10: exception oop
++//   x13: exception pc in caller
++//   destination: exception handler of caller
++//
++// Note: the exception pc MUST be at a call (precise debug information)
++//       Registers x10, x13, x12, x14, x15, t0 are not callee saved.
++//
++
++void OptoRuntime::generate_exception_blob() {
++  assert(!OptoRuntime::is_callee_saved_register(R13_num), "");
++  assert(!OptoRuntime::is_callee_saved_register(R10_num), "");
++  assert(!OptoRuntime::is_callee_saved_register(R12_num), "");
++
++  assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
++
 +  // Allocate space for the code
 +  ResourceMark rm;
 +  // Setup code generation tools
-+  int pad = 0;
-+  CodeBuffer buffer("deopt_blob", 2048 + pad, 1024);
++  CodeBuffer buffer("exception_blob", 2048, 1024);
 +  MacroAssembler* masm = new MacroAssembler(&buffer);
-+  int frame_size_in_words = -1;
-+  OopMap* map = NULL;
-+  OopMapSet *oop_maps = new OopMapSet();
-+  assert_cond(masm != NULL && oop_maps != NULL);
-+  RegisterSaver reg_saver(COMPILER2_OR_JVMCI != 0);
++  assert_cond(masm != NULL);
 +
-+  // -------------
-+  // This code enters when returning to a de-optimized nmethod.  A return
-+  // address has been pushed on the the stack, and return values are in
-+  // registers.
-+  // If we are doing a normal deopt then we were called from the patched
-+  // nmethod from the point we returned to the nmethod. So the return
-+  // address on the stack is wrong by NativeCall::instruction_size
-+  // We will adjust the value so it looks like we have the original return
-+  // address on the stack (like when we eagerly deoptimized).
-+  // In the case of an exception pending when deoptimizing, we enter
-+  // with a return address on the stack that points after the call we patched
-+  // into the exception handler. We have the following register state from,
-+  // e.g., the forward exception stub (see stubGenerator_riscv.cpp).
-+  //    x10: exception oop
-+  //    x9: exception handler
-+  //    x13: throwing pc
-+  // So in this case we simply jam x13 into the useless return address and
-+  // the stack looks just like we want.
++  // TODO check various assumptions made here
 +  //
-+  // At this point we need to de-opt.  We save the argument return
-+  // registers.  We call the first C routine, fetch_unroll_info().  This
-+  // routine captures the return values and returns a structure which
-+  // describes the current frame size and the sizes of all replacement frames.
-+  // The current frame is compiled code and may contain many inlined
-+  // functions, each with their own JVM state.  We pop the current frame, then
-+  // push all the new frames.  Then we call the C routine unpack_frames() to
-+  // populate these frames.  Finally unpack_frames() returns us the new target
-+  // address.  Notice that callee-save registers are BLOWN here; they have
-+  // already been captured in the vframeArray at the time the return PC was
-+  // patched.
++  // make sure we do so before running this
++
 +  address start = __ pc();
-+  Label cont;
 +
-+  // Prolog for non exception case!
++  // push fp and retaddr by hand
++  // Exception pc is 'return address' for stack walker
++  __ addi(sp, sp, -2 * wordSize);
++  __ sd(ra, Address(sp, wordSize));
++  __ sd(fp, Address(sp));
++  // there are no callee save registers and we don't expect an
++  // arg reg save area
++#ifndef PRODUCT
++  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
++#endif
++  // Store exception in Thread object. We cannot pass any arguments to the
++  // handle_exception call, since we do not want to make any assumption
++  // about the size of the frame where the exception happened in.
++  __ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
++  __ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
 +
-+  // Save everything in sight.
-+  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
++  // This call does all the hard work.  It checks if an exception handler
++  // exists in the method.
++  // If so, it returns the handler address.
++  // If not, it prepares for stack-unwinding, restoring the callee-save
++  // registers of the frame being removed.
++  //
++  // address OptoRuntime::handle_exception_C(JavaThread* thread)
++  //
++  // n.b. 1 gp arg, 0 fp args, integral return type
 +
-+  // Normal deoptimization.  Save exec mode for unpack_frames.
-+  __ mvw(xcpool, Deoptimization::Unpack_deopt); // callee-saved
-+  __ j(cont);
++  // the stack should always be aligned
++  address the_pc = __ pc();
++  __ set_last_Java_frame(sp, noreg, the_pc, t0);
++  __ mv(c_rarg0, xthread);
++  int32_t offset = 0;
++  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)), offset);
++  __ jalr(x1, t0, offset);
 +
-+  int reexecute_offset = __ pc() - start;
 +
-+  // Reexecute case
-+  // return address is the pc describes what bci to do re-execute at
++  // handle_exception_C is a special VM call which does not require an explicit
++  // instruction sync afterwards.
 +
-+  // No need to update map as each call to save_live_registers will produce identical oopmap
-+  (void) reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
++  // Set an oopmap for the call site.  This oopmap will only be used if we
++  // are unwinding the stack.  Hence, all locations will be dead.
++  // Callee-saved registers will be the same as the frame above (i.e.,
++  // handle_exception_stub), since they were restored when we got the
++  // exception.
 +
-+  __ mvw(xcpool, Deoptimization::Unpack_reexecute); // callee-saved
-+  __ j(cont);
++  OopMapSet* oop_maps = new OopMapSet();
++  assert_cond(oop_maps != NULL);
 +
-+  int exception_offset = __ pc() - start;
++  oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
 +
-+  // Prolog for exception case
++  __ reset_last_Java_frame(false);
 +
-+  // all registers are dead at this entry point, except for x10, and
-+  // x13 which contain the exception oop and exception pc
-+  // respectively.  Set them in TLS and fall thru to the
-+  // unpack_with_exception_in_tls entry point.
++  // Restore callee-saved registers
 +
-+  __ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
-+  __ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
++  // fp is an implicitly saved callee saved register (i.e. the calling
++  // convention will save restore it in prolog/epilog) Other than that
++  // there are no callee save registers now that adapter frames are gone.
++  // and we dont' expect an arg reg save area
++  __ ld(fp, Address(sp));
++  __ ld(x13, Address(sp, wordSize));
++  __ addi(sp, sp , 2 * wordSize);
 +
-+  int exception_in_tls_offset = __ pc() - start;
-+
-+  // new implementation because exception oop is now passed in JavaThread
++  // x10: exception handler
 +
-+  // Prolog for exception case
-+  // All registers must be preserved because they might be used by LinearScan
-+  // Exceptiop oop and throwing PC are passed in JavaThread
-+  // tos: stack at point of call to method that threw the exception (i.e. only
-+  // args are on the stack, no return address)
++  // We have a handler in x10 (could be deopt blob).
++  __ mv(t0, x10);
 +
-+  // The return address pushed by save_live_registers will be patched
-+  // later with the throwing pc. The correct value is not available
-+  // now because loading it from memory would destroy registers.
++  // Get the exception oop
++  __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
++  // Get the exception pc in case we are deoptimized
++  __ ld(x14, Address(xthread, JavaThread::exception_pc_offset()));
++#ifdef ASSERT
++  __ sd(zr, Address(xthread, JavaThread::exception_handler_pc_offset()));
++  __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
++#endif
++  // Clear the exception oop so GC no longer processes it as a root.
++  __ sd(zr, Address(xthread, JavaThread::exception_oop_offset()));
 +
-+  // NB: The SP at this point must be the SP of the method that is
-+  // being deoptimized.  Deoptimization assumes that the frame created
-+  // here by save_live_registers is immediately below the method's SP.
-+  // This is a somewhat fragile mechanism.
++  // x10: exception oop
++  // t0:  exception handler
++  // x14: exception pc
++  // Jump to handler
 +
-+  // Save everything in sight.
-+  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
++  __ jr(t0);
 +
-+  // Now it is safe to overwrite any register
++  // Make sure all code is generated
++  masm->flush();
 +
-+  // Deopt during an exception.  Save exec mode for unpack_frames.
-+  __ li(xcpool, Deoptimization::Unpack_exception); // callee-saved
++  // Set exception blob
++  _exception_blob =  ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
++}
++#endif // COMPILER2
+diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+new file mode 100644
+index 0000000000..9970229c5c
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+@@ -0,0 +1,3743 @@
++/*
++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  // load throwing pc from JavaThread and patch it as the return address
-+  // of the current frame. Then clear the field in JavaThread
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "compiler/oopMap.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/universe.hpp"
++#include "nativeInst_riscv.hpp"
++#include "oops/instanceOop.hpp"
++#include "oops/method.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#include "utilities/align.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++#if INCLUDE_ZGC
++#include "gc/z/zThreadLocalData.hpp"
++#endif
 +
-+  __ ld(x13, Address(xthread, JavaThread::exception_pc_offset()));
-+  __ sd(x13, Address(fp, frame::return_addr_offset * wordSize));
-+  __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
++// Declaration and definition of StubGenerator (no .hpp file).
++// For a more detailed description of the stub routine structure
++// see the comment in stubRoutines.hpp
 +
-+#ifdef ASSERT
-+  // verify that there is really an exception oop in JavaThread
-+  __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
-+  __ verify_oop(x10);
++#undef __
++#define __ _masm->
 +
-+  // verify that there is no pending exception
-+  Label no_pending_exception;
-+  __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
-+  __ beqz(t0, no_pending_exception);
-+  __ stop("must not have pending exception here");
-+  __ bind(no_pending_exception);
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
 +#endif
 +
-+  __ bind(cont);
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
 +
-+  // Call C code.  Need thread and this frame, but NOT official VM entry
-+  // crud.  We cannot block on this call, no GC can happen.
-+  //
-+  // UnrollBlock* fetch_unroll_info(JavaThread* thread)
++// Stub Code definitions
 +
-+  // fetch_unroll_info needs to call last_java_frame().
++class StubGenerator: public StubCodeGenerator {
++ private:
 +
-+  Label retaddr;
-+  __ set_last_Java_frame(sp, noreg, retaddr, t0);
-+#ifdef ASSERT
-+  {
-+    Label L;
-+    __ ld(t0, Address(xthread,
-+                              JavaThread::last_Java_fp_offset()));
-+    __ beqz(t0, L);
-+    __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
-+    __ bind(L);
++#ifdef PRODUCT
++#define inc_counter_np(counter) ((void)0)
++#else
++  void inc_counter_np_(int& counter) {
++    __ la(t1, ExternalAddress((address)&counter));
++    __ lwu(t0, Address(t1, 0));
++    __ addiw(t0, t0, 1);
++    __ sw(t0, Address(t1, 0));
 +  }
-+#endif // ASSERT
-+  __ mv(c_rarg0, xthread);
-+  __ mv(c_rarg1, xcpool);
-+  int32_t offset = 0;
-+  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)), offset);
-+  __ jalr(x1, t0, offset);
-+  __ bind(retaddr);
++#define inc_counter_np(counter) \
++  BLOCK_COMMENT("inc_counter " #counter); \
++  inc_counter_np_(counter);
++#endif
 +
-+  // Need to have an oopmap that tells fetch_unroll_info where to
-+  // find any register it might need.
-+  oop_maps->add_gc_map(__ pc() - start, map);
++  // Call stubs are used to call Java from C
++  //
++  // Arguments:
++  //    c_rarg0:   call wrapper address                   address
++  //    c_rarg1:   result                                 address
++  //    c_rarg2:   result type                            BasicType
++  //    c_rarg3:   method                                 Method*
++  //    c_rarg4:   (interpreter) entry point              address
++  //    c_rarg5:   parameters                             intptr_t*
++  //    c_rarg6:   parameter size (in words)              int
++  //    c_rarg7:   thread                                 Thread*
++  //
++  // There is no return from the stub itself as any Java result
++  // is written to result
++  //
++  // we save x1 (ra) as the return PC at the base of the frame and
++  // link x8 (fp) below it as the frame pointer installing sp (x2)
++  // into fp.
++  //
++  // we save x10-x17, which accounts for all the c arguments.
++  //
++  // TODO: strictly do we need to save them all? they are treated as
++  // volatile by C so could we omit saving the ones we are going to
++  // place in global registers (thread? method?) or those we only use
++  // during setup of the Java call?
++  //
++  // we don't need to save x5 which C uses as an indirect result location
++  // return register.
++  //
++  // we don't need to save x6-x7 and x28-x31 which both C and Java treat as
++  // volatile
++  //
++  // we save x9, x18-x27, f8-f9, and f18-f27 which Java uses as temporary
++  // registers and C expects to be callee-save
++  //
++  // so the stub frame looks like this when we enter Java code
++  //
++  //     [ return_from_Java     ] <--- sp
++  //     [ argument word n      ]
++  //      ...
++  // -34 [ argument word 1      ]
++  // -33 [ saved f27            ] <--- sp_after_call
++  // -32 [ saved f26            ]
++  // -31 [ saved f25            ]
++  // -30 [ saved f24            ]
++  // -29 [ saved f23            ]
++  // -28 [ saved f22            ]
++  // -27 [ saved f21            ]
++  // -26 [ saved f20            ]
++  // -25 [ saved f19            ]
++  // -24 [ saved f18            ]
++  // -23 [ saved f9             ]
++  // -22 [ saved f8             ]
++  // -21 [ saved x27            ]
++  // -20 [ saved x26            ]
++  // -19 [ saved x25            ]
++  // -18 [ saved x24            ]
++  // -17 [ saved x23            ]
++  // -16 [ saved x22            ]
++  // -15 [ saved x21            ]
++  // -14 [ saved x20            ]
++  // -13 [ saved x19            ]
++  // -12 [ saved x18            ]
++  // -11 [ saved x9             ]
++  // -10 [ call wrapper   (x10) ]
++  //  -9 [ result         (x11) ]
++  //  -8 [ result type    (x12) ]
++  //  -7 [ method         (x13) ]
++  //  -6 [ entry point    (x14) ]
++  //  -5 [ parameters     (x15) ]
++  //  -4 [ parameter size (x16) ]
++  //  -3 [ thread         (x17) ]
++  //  -2 [ saved fp       (x8)  ]
++  //  -1 [ saved ra       (x1)  ]
++  //   0 [                      ] <--- fp == saved sp (x2)
 +
-+  __ reset_last_Java_frame(false);
++  // Call stub stack layout word offsets from fp
++  enum call_stub_layout {
++    sp_after_call_off  = -33,
 +
-+  // Load UnrollBlock* into x15
-+  __ mv(x15, x10);
++    f27_off            = -33,
++    f26_off            = -32,
++    f25_off            = -31,
++    f24_off            = -30,
++    f23_off            = -29,
++    f22_off            = -28,
++    f21_off            = -27,
++    f20_off            = -26,
++    f19_off            = -25,
++    f18_off            = -24,
++    f9_off             = -23,
++    f8_off             = -22,
 +
-+  __ lwu(xcpool, Address(x15, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
-+  Label noException;
-+  __ li(t0, Deoptimization::Unpack_exception);
-+  __ bne(xcpool, t0, noException); // Was exception pending?
-+  __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
-+  __ ld(x13, Address(xthread, JavaThread::exception_pc_offset()));
-+  __ sd(zr, Address(xthread, JavaThread::exception_oop_offset()));
-+  __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
++    x27_off            = -21,
++    x26_off            = -20,
++    x25_off            = -19,
++    x24_off            = -18,
++    x23_off            = -17,
++    x22_off            = -16,
++    x21_off            = -15,
++    x20_off            = -14,
++    x19_off            = -13,
++    x18_off            = -12,
++    x9_off             = -11,
 +
-+  __ verify_oop(x10);
++    call_wrapper_off   = -10,
++    result_off         = -9,
++    result_type_off    = -8,
++    method_off         = -7,
++    entry_point_off    = -6,
++    parameters_off     = -5,
++    parameter_size_off = -4,
++    thread_off         = -3,
++    fp_f               = -2,
++    retaddr_off        = -1,
++  };
 +
-+  // Overwrite the result registers with the exception results.
-+  __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
++  address generate_call_stub(address& return_address) {
++    assert((int)frame::entry_frame_after_call_words == -(int)sp_after_call_off + 1 &&
++           (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off,
++           "adjust this code");
 +
-+  __ bind(noException);
++    StubCodeMark mark(this, "StubRoutines", "call_stub");
++    address start = __ pc();
 +
-+  // Only register save data is on the stack.
-+  // Now restore the result registers.  Everything else is either dead
-+  // or captured in the vframeArray.
++    const Address sp_after_call (fp, sp_after_call_off  * wordSize);
 +
-+  // Restore fp result register
-+  __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
-+  // Restore integer result register
-+  __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
++    const Address call_wrapper  (fp, call_wrapper_off   * wordSize);
++    const Address result        (fp, result_off         * wordSize);
++    const Address result_type   (fp, result_type_off    * wordSize);
++    const Address method        (fp, method_off         * wordSize);
++    const Address entry_point   (fp, entry_point_off    * wordSize);
++    const Address parameters    (fp, parameters_off     * wordSize);
++    const Address parameter_size(fp, parameter_size_off * wordSize);
 +
-+  // Pop all of the register save area off the stack
-+  __ add(sp, sp, frame_size_in_words * wordSize);
++    const Address thread        (fp, thread_off         * wordSize);
 +
-+  // All of the register save area has been popped of the stack. Only the
-+  // return address remains.
++    const Address f27_save      (fp, f27_off            * wordSize);
++    const Address f26_save      (fp, f26_off            * wordSize);
++    const Address f25_save      (fp, f25_off            * wordSize);
++    const Address f24_save      (fp, f24_off            * wordSize);
++    const Address f23_save      (fp, f23_off            * wordSize);
++    const Address f22_save      (fp, f22_off            * wordSize);
++    const Address f21_save      (fp, f21_off            * wordSize);
++    const Address f20_save      (fp, f20_off            * wordSize);
++    const Address f19_save      (fp, f19_off            * wordSize);
++    const Address f18_save      (fp, f18_off            * wordSize);
++    const Address f9_save       (fp, f9_off             * wordSize);
++    const Address f8_save       (fp, f8_off             * wordSize);
 +
-+  // Pop all the frames we must move/replace.
-+  //
-+  // Frame picture (youngest to oldest)
-+  // 1: self-frame (no frame link)
-+  // 2: deopting frame  (no frame link)
-+  // 3: caller of deopting frame (could be compiled/interpreted).
-+  //
-+  // Note: by leaving the return address of self-frame on the stack
-+  // and using the size of frame 2 to adjust the stack
-+  // when we are done the return to frame 3 will still be on the stack.
++    const Address x27_save      (fp, x27_off            * wordSize);
++    const Address x26_save      (fp, x26_off            * wordSize);
++    const Address x25_save      (fp, x25_off            * wordSize);
++    const Address x24_save      (fp, x24_off            * wordSize);
++    const Address x23_save      (fp, x23_off            * wordSize);
++    const Address x22_save      (fp, x22_off            * wordSize);
++    const Address x21_save      (fp, x21_off            * wordSize);
++    const Address x20_save      (fp, x20_off            * wordSize);
++    const Address x19_save      (fp, x19_off            * wordSize);
++    const Address x18_save      (fp, x18_off            * wordSize);
 +
-+  // Pop deoptimized frame
-+  __ lwu(x12, Address(x15, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
-+  __ sub(x12, x12, 2 * wordSize);
-+  __ add(sp, sp, x12);
-+  __ ld(fp, Address(sp, 0));
-+  __ ld(ra, Address(sp, wordSize));
-+  __ addi(sp, sp, 2 * wordSize);
-+  // RA should now be the return address to the caller (3)
++    const Address x9_save       (fp, x9_off             * wordSize);
 +
-+#ifdef ASSERT
-+  // Compilers generate code that bang the stack by as much as the
-+  // interpreter would need. So this stack banging should never
-+  // trigger a fault. Verify that it does not on non product builds.
-+  __ lwu(x9, Address(x15, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
-+  __ bang_stack_size(x9, x12);
-+#endif
-+  // Load address of array of frame pcs into x12
-+  __ ld(x12, Address(x15, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
++    // stub code
 +
-+  // Load address of array of frame sizes into x14
-+  __ ld(x14, Address(x15, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
++    address riscv_entry = __ pc();
 +
-+  // Load counter into x13
-+  __ lwu(x13, Address(x15, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
++    // set up frame and move sp to end of save area
++    __ enter();
++    __ addi(sp, fp, sp_after_call_off * wordSize);
 +
-+  // Now adjust the caller's stack to make up for the extra locals
-+  // but record the original sp so that we can save it in the skeletal interpreter
-+  // frame and the stack walking of interpreter_sender will get the unextended sp
-+  // value and not the "real" sp value.
++    // save register parameters and Java temporary/global registers
++    // n.b. we save thread even though it gets installed in
++    // xthread because we want to sanity check tp later
++    __ sd(c_rarg7, thread);
++    __ sw(c_rarg6, parameter_size);
++    __ sd(c_rarg5, parameters);
++    __ sd(c_rarg4, entry_point);
++    __ sd(c_rarg3, method);
++    __ sd(c_rarg2, result_type);
++    __ sd(c_rarg1, result);
++    __ sd(c_rarg0, call_wrapper);
 +
-+  const Register sender_sp = x16;
++    __ sd(x9, x9_save);
 +
-+  __ mv(sender_sp, sp);
-+  __ lwu(x9, Address(x15,
-+                     Deoptimization::UnrollBlock::
-+                     caller_adjustment_offset_in_bytes()));
-+  __ sub(sp, sp, x9);
++    __ sd(x18, x18_save);
++    __ sd(x19, x19_save);
++    __ sd(x20, x20_save);
++    __ sd(x21, x21_save);
++    __ sd(x22, x22_save);
++    __ sd(x23, x23_save);
++    __ sd(x24, x24_save);
++    __ sd(x25, x25_save);
++    __ sd(x26, x26_save);
++    __ sd(x27, x27_save);
 +
-+  // Push interpreter frames in a loop
-+  __ li(t0, 0xDEADDEAD);               // Make a recognizable pattern
-+  __ mv(t1, t0);
-+  Label loop;
-+  __ bind(loop);
-+  __ ld(x9, Address(x14, 0));          // Load frame size
-+  __ addi(x14, x14, wordSize);
-+  __ sub(x9, x9, 2 * wordSize);        // We'll push pc and fp by hand
-+  __ ld(ra, Address(x12, 0));          // Load pc
-+  __ addi(x12, x12, wordSize);
-+  __ enter();                          // Save old & set new fp
-+  __ sub(sp, sp, x9);                  // Prolog
-+  // This value is corrected by layout_activation_impl
-+  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
-+  __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable
-+  __ mv(sender_sp, sp);                // Pass sender_sp to next frame
-+  __ addi(x13, x13, -1);               // Decrement counter
-+  __ bnez(x13, loop);
++    __ fsd(f8,  f8_save);
++    __ fsd(f9,  f9_save);
++    __ fsd(f18, f18_save);
++    __ fsd(f19, f19_save);
++    __ fsd(f20, f20_save);
++    __ fsd(f21, f21_save);
++    __ fsd(f22, f22_save);
++    __ fsd(f23, f23_save);
++    __ fsd(f24, f24_save);
++    __ fsd(f25, f25_save);
++    __ fsd(f26, f26_save);
++    __ fsd(f27, f27_save);
 +
-+    // Re-push self-frame
-+  __ ld(ra, Address(x12));
-+  __ enter();
++    // install Java thread in global register now we have saved
++    // whatever value it held
++    __ mv(xthread, c_rarg7);
 +
-+  // Allocate a full sized register save area.  We subtract 2 because
-+  // enter() just pushed 2 words
-+  __ sub(sp, sp, (frame_size_in_words - 2) * wordSize);
++    // And method
++    __ mv(xmethod, c_rarg3);
 +
-+  // Restore frame locals after moving the frame
-+  __ fsd(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
-+  __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
++    // set up the heapbase register
++    __ reinit_heapbase();
 +
-+  // Call C code.  Need thread but NOT official VM entry
-+  // crud.  We cannot block on this call, no GC can happen.  Call should
-+  // restore return values to their stack-slots with the new SP.
-+  //
-+  // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode)
++#ifdef ASSERT
++    // make sure we have no pending exceptions
++    {
++      Label L;
++      __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
++      __ beqz(t0, L);
++      __ stop("StubRoutines::call_stub: entered with pending exception");
++      __ BIND(L);
++    }
++#endif
++    // pass parameters if any
++    __ mv(esp, sp);
++    __ slli(t0, c_rarg6, LogBytesPerWord);
++    __ sub(t0, sp, t0); // Move SP out of the way
++    __ andi(sp, t0, -2 * wordSize);
 +
-+  // Use fp because the frames look interpreted now
-+  // Don't need the precise return PC here, just precise enough to point into this code blob.
-+  address the_pc = __ pc();
-+  __ set_last_Java_frame(sp, fp, the_pc, t0);
++    BLOCK_COMMENT("pass parameters if any");
++    Label parameters_done;
++    // parameter count is still in c_rarg6
++    // and parameter pointer identifying param 1 is in c_rarg5
++    __ beqz(c_rarg6, parameters_done);
 +
-+  __ mv(c_rarg0, xthread);
-+  __ mv(c_rarg1, xcpool); // second arg: exec_mode
-+  offset = 0;
-+  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset);
-+  __ jalr(x1, t0, offset);
++    address loop = __ pc();
++    __ ld(t0, c_rarg5, 0);
++    __ addi(c_rarg5, c_rarg5, wordSize);
++    __ addi(c_rarg6, c_rarg6, -1);
++    __ push_reg(t0);
++    __ bgtz(c_rarg6, loop);
 +
-+  // Set an oopmap for the call site
-+  // Use the same PC we used for the last java frame
-+  oop_maps->add_gc_map(the_pc - start,
-+                       new OopMap(frame_size_in_words, 0));
++    __ BIND(parameters_done);
 +
-+  // Clear fp AND pc
-+  __ reset_last_Java_frame(true);
++    // call Java entry -- passing methdoOop, and current sp
++    //      xmethod: Method*
++    //      x30: sender sp
++    BLOCK_COMMENT("call Java function");
++    __ mv(x30, sp);
++    __ jalr(c_rarg4);
 +
-+  // Collect return values
-+  __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
-+  __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
++    // save current address for use by exception handling code
 +
-+  // Pop self-frame.
-+  __ leave();                           // Epilog
-+
-+  // Jump to interpreter
-+  __ ret();
-+
-+  // Make sure all code is generated
-+  masm->flush();
-+
-+  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
-+  assert(_deopt_blob != NULL, "create deoptimization blob fail!");
-+  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
-+}
-+
-+// Number of stack slots between incoming argument block and the start of
-+// a new frame. The PROLOG must add this many slots to the stack. The
-+// EPILOG must remove this many slots.
-+// RISCV needs two words for RA (return address) and FP (frame pointer).
-+uint SharedRuntime::in_preserve_stack_slots() {
-+  return 2 * VMRegImpl::slots_per_word;
-+}
++    return_address = __ pc();
 +
-+uint SharedRuntime::out_preserve_stack_slots() {
-+  return 0;
-+}
++    // store result depending on type (everything that is not
++    // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
++    // n.b. this assumes Java returns an integral result in x10
++    // and a floating result in j_farg0
++    __ ld(j_rarg2, result);
++    Label is_long, is_float, is_double, exit;
++    __ ld(j_rarg1, result_type);
++    __ li(t0, (u1)T_OBJECT);
++    __ beq(j_rarg1, t0, is_long);
++    __ li(t0, (u1)T_LONG);
++    __ beq(j_rarg1, t0, is_long);
++    __ li(t0, (u1)T_FLOAT);
++    __ beq(j_rarg1, t0, is_float);
++    __ li(t0, (u1)T_DOUBLE);
++    __ beq(j_rarg1, t0, is_double);
 +
-+#ifdef COMPILER2
-+//------------------------------generate_uncommon_trap_blob--------------------
-+void SharedRuntime::generate_uncommon_trap_blob() {
-+  // Allocate space for the code
-+  ResourceMark rm;
-+  // Setup code generation tools
-+  CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
-+  MacroAssembler* masm = new MacroAssembler(&buffer);
-+  assert_cond(masm != NULL);
++    // handle T_INT case
++    __ sw(x10, Address(j_rarg2));
 +
-+  assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
++    __ BIND(exit);
 +
-+  address start = __ pc();
++    // pop parameters
++    __ addi(esp, fp, sp_after_call_off * wordSize);
 +
-+  // Push self-frame.  We get here with a return address in RA
-+  // and sp should be 16 byte aligned
-+  // push fp and retaddr by hand
-+  __ addi(sp, sp, -2 * wordSize);
-+  __ sd(ra, Address(sp, wordSize));
-+  __ sd(fp, Address(sp, 0));
-+  // we don't expect an arg reg save area
-+#ifndef PRODUCT
-+  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
++#ifdef ASSERT
++    // verify that threads correspond
++    {
++      Label L, S;
++      __ ld(t0, thread);
++      __ bne(xthread, t0, S);
++      __ get_thread(t0);
++      __ beq(xthread, t0, L);
++      __ BIND(S);
++      __ stop("StubRoutines::call_stub: threads must correspond");
++      __ BIND(L);
++    }
 +#endif
-+  // compiler left unloaded_class_index in j_rarg0 move to where the
-+  // runtime expects it.
-+  __ addiw(c_rarg1, j_rarg0, 0);
 +
-+  // we need to set the past SP to the stack pointer of the stub frame
-+  // and the pc to the address where this runtime call will return
-+  // although actually any pc in this code blob will do).
-+  Label retaddr;
-+  __ set_last_Java_frame(sp, noreg, retaddr, t0);
++    // restore callee-save registers
++    __ fld(f27, f27_save);
++    __ fld(f26, f26_save);
++    __ fld(f25, f25_save);
++    __ fld(f24, f24_save);
++    __ fld(f23, f23_save);
++    __ fld(f22, f22_save);
++    __ fld(f21, f21_save);
++    __ fld(f20, f20_save);
++    __ fld(f19, f19_save);
++    __ fld(f18, f18_save);
++    __ fld(f9,  f9_save);
++    __ fld(f8,  f8_save);
 +
-+  // Call C code.  Need thread but NOT official VM entry
-+  // crud.  We cannot block on this call, no GC can happen.  Call should
-+  // capture callee-saved registers as well as return values.
-+  //
-+  // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index, jint exec_mode)
-+  //
-+  // n.b. 3 gp args, 0 fp args, integral return type
++    __ ld(x27, x27_save);
++    __ ld(x26, x26_save);
++    __ ld(x25, x25_save);
++    __ ld(x24, x24_save);
++    __ ld(x23, x23_save);
++    __ ld(x22, x22_save);
++    __ ld(x21, x21_save);
++    __ ld(x20, x20_save);
++    __ ld(x19, x19_save);
++    __ ld(x18, x18_save);
 +
-+  __ mv(c_rarg0, xthread);
-+  __ mvw(c_rarg2, (unsigned)Deoptimization::Unpack_uncommon_trap);
-+  int32_t offset = 0;
-+  __ la_patchable(t0,
-+        RuntimeAddress(CAST_FROM_FN_PTR(address,
-+                                        Deoptimization::uncommon_trap)), offset);
-+  __ jalr(x1, t0, offset);
-+  __ bind(retaddr);
++    __ ld(x9, x9_save);
 +
-+  // Set an oopmap for the call site
-+  OopMapSet* oop_maps = new OopMapSet();
-+  OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0);
-+  assert_cond(oop_maps != NULL && map != NULL);
++    __ ld(c_rarg0, call_wrapper);
++    __ ld(c_rarg1, result);
++    __ ld(c_rarg2, result_type);
++    __ ld(c_rarg3, method);
++    __ ld(c_rarg4, entry_point);
++    __ ld(c_rarg5, parameters);
++    __ ld(c_rarg6, parameter_size);
++    __ ld(c_rarg7, thread);
 +
-+  // location of fp is known implicitly by the frame sender code
++    // leave frame and return to caller
++    __ leave();
++    __ ret();
 +
-+  oop_maps->add_gc_map(__ pc() - start, map);
++    // handle return types different from T_INT
 +
-+  __ reset_last_Java_frame(false);
++    __ BIND(is_long);
++    __ sd(x10, Address(j_rarg2, 0));
++    __ j(exit);
 +
-+  // move UnrollBlock* into x14
-+  __ mv(x14, x10);
++    __ BIND(is_float);
++    __ fsw(j_farg0, Address(j_rarg2, 0), t0);
++    __ j(exit);
 +
-+#ifdef ASSERT
-+  { Label L;
-+    __ lwu(t0, Address(x14, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
-+    __ mvw(t1, Deoptimization::Unpack_uncommon_trap);
-+    __ beq(t0, t1, L);
-+    __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
-+    __ bind(L);
++    __ BIND(is_double);
++    __ fsd(j_farg0, Address(j_rarg2, 0), t0);
++    __ j(exit);
++
++    return start;
 +  }
-+#endif
 +
-+  // Pop all the frames we must move/replace.
++  // Return point for a Java call if there's an exception thrown in
++  // Java code.  The exception is caught and transformed into a
++  // pending exception stored in JavaThread that can be tested from
++  // within the VM.
 +  //
-+  // Frame picture (youngest to oldest)
-+  // 1: self-frame (no frame link)
-+  // 2: deopting frame  (no frame link)
-+  // 3: caller of deopting frame (could be compiled/interpreted).
++  // Note: Usually the parameters are removed by the callee. In case
++  // of an exception crossing an activation frame boundary, that is
++  // not the case if the callee is compiled code => need to setup the
++  // sp.
++  //
++  // x10: exception oop
 +
-+  __ add(sp, sp, (SimpleRuntimeFrame::framesize) << LogBytesPerInt); // Epilog!
++  address generate_catch_exception() {
++    StubCodeMark mark(this, "StubRoutines", "catch_exception");
++    address start = __ pc();
 +
-+  // Pop deoptimized frame (int)
-+  __ lwu(x12, Address(x14,
-+                      Deoptimization::UnrollBlock::
-+                      size_of_deoptimized_frame_offset_in_bytes()));
-+  __ sub(x12, x12, 2 * wordSize);
-+  __ add(sp, sp, x12);
-+  __ ld(fp, sp, 0);
-+  __ ld(ra, sp, wordSize);
-+  __ addi(sp, sp, 2 * wordSize);
-+  // RA should now be the return address to the caller (3) frame
++    // same as in generate_call_stub():
++    const Address thread(fp, thread_off * wordSize);
 +
 +#ifdef ASSERT
-+  // Compilers generate code that bang the stack by as much as the
-+  // interpreter would need. So this stack banging should never
-+  // trigger a fault. Verify that it does not on non product builds.
-+  __ lwu(x11, Address(x14,
-+                      Deoptimization::UnrollBlock::
-+                      total_frame_sizes_offset_in_bytes()));
-+  __ bang_stack_size(x11, x12);
++    // verify that threads correspond
++    {
++      Label L, S;
++      __ ld(t0, thread);
++      __ bne(xthread, t0, S);
++      __ get_thread(t0);
++      __ beq(xthread, t0, L);
++      __ bind(S);
++      __ stop("StubRoutines::catch_exception: threads must correspond");
++      __ bind(L);
++    }
 +#endif
 +
-+  // Load address of array of frame pcs into x12 (address*)
-+  __ ld(x12, Address(x14,
-+                     Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
-+
-+  // Load address of array of frame sizes into x15 (intptr_t*)
-+  __ ld(x15, Address(x14,
-+                     Deoptimization::UnrollBlock::
-+                     frame_sizes_offset_in_bytes()));
-+
-+  // Counter
-+  __ lwu(x13, Address(x14,
-+                      Deoptimization::UnrollBlock::
-+                      number_of_frames_offset_in_bytes())); // (int)
-+
-+  // Now adjust the caller's stack to make up for the extra locals but
-+  // record the original sp so that we can save it in the skeletal
-+  // interpreter frame and the stack walking of interpreter_sender
-+  // will get the unextended sp value and not the "real" sp value.
-+
-+  const Register sender_sp = t1; // temporary register
++    // set pending exception
++    __ verify_oop(x10);
 +
-+  __ lwu(x11, Address(x14,
-+                      Deoptimization::UnrollBlock::
-+                      caller_adjustment_offset_in_bytes())); // (int)
-+  __ mv(sender_sp, sp);
-+  __ sub(sp, sp, x11);
++    __ sd(x10, Address(xthread, Thread::pending_exception_offset()));
++    __ mv(t0, (address)__FILE__);
++    __ sd(t0, Address(xthread, Thread::exception_file_offset()));
++    __ mv(t0, (int)__LINE__);
++    __ sw(t0, Address(xthread, Thread::exception_line_offset()));
 +
-+  // Push interpreter frames in a loop
-+  Label loop;
-+  __ bind(loop);
-+  __ ld(x11, Address(x15, 0));       // Load frame size
-+  __ sub(x11, x11, 2 * wordSize);    // We'll push pc and fp by hand
-+  __ ld(ra, Address(x12, 0));        // Save return address
-+  __ enter();                        // and old fp & set new fp
-+  __ sub(sp, sp, x11);               // Prolog
-+  __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable
-+  // This value is corrected by layout_activation_impl
-+  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
-+  __ mv(sender_sp, sp);              // Pass sender_sp to next frame
-+  __ add(x15, x15, wordSize);        // Bump array pointer (sizes)
-+  __ add(x12, x12, wordSize);        // Bump array pointer (pcs)
-+  __ subw(x13, x13, 1);              // Decrement counter
-+  __ bgtz(x13, loop);
-+  __ ld(ra, Address(x12, 0));        // save final return address
-+  // Re-push self-frame
-+  __ enter();                        // & old fp & set new fp
++    // complete return to VM
++    assert(StubRoutines::_call_stub_return_address != NULL,
++           "_call_stub_return_address must have been generated before");
++    __ j(StubRoutines::_call_stub_return_address);
 +
-+  // Use fp because the frames look interpreted now
-+  // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
-+  // Don't need the precise return PC here, just precise enough to point into this code blob.
-+  address the_pc = __ pc();
-+  __ set_last_Java_frame(sp, fp, the_pc, t0);
++    return start;
++  }
 +
-+  // Call C code.  Need thread but NOT official VM entry
-+  // crud.  We cannot block on this call, no GC can happen.  Call should
-+  // restore return values to their stack-slots with the new SP.
++  // Continuation point for runtime calls returning with a pending
++  // exception.  The pending exception check happened in the runtime
++  // or native call stub.  The pending exception in Thread is
++  // converted into a Java-level exception.
 +  //
-+  // BasicType unpack_frames(JavaThread* thread, int exec_mode)
++  // Contract with Java-level exception handlers:
++  // x10: exception
++  // x13: throwing pc
 +  //
++  // NOTE: At entry of this stub, exception-pc must be in RA !!
 +
-+  // n.b. 2 gp args, 0 fp args, integral return type
-+
-+  // sp should already be aligned
-+  __ mv(c_rarg0, xthread);
-+  __ mvw(c_rarg1, (unsigned)Deoptimization::Unpack_uncommon_trap);
-+  offset = 0;
-+  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset);
-+  __ jalr(x1, t0, offset);
++  // NOTE: this is always used as a jump target within generated code
++  // so it just needs to be generated code with no x86 prolog
 +
-+  // Set an oopmap for the call site
-+  // Use the same PC we used for the last java frame
-+  oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
++  address generate_forward_exception() {
++    StubCodeMark mark(this, "StubRoutines", "forward exception");
++    address start = __ pc();
 +
-+  // Clear fp AND pc
-+  __ reset_last_Java_frame(true);
++    // Upon entry, RA points to the return address returning into
++    // Java (interpreted or compiled) code; i.e., the return address
++    // becomes the throwing pc.
++    //
++    // Arguments pushed before the runtime call are still on the stack
++    // but the exception handler will reset the stack pointer ->
++    // ignore them.  A potential result in registers can be ignored as
++    // well.
 +
-+  // Pop self-frame.
-+  __ leave();                 // Epilog
++#ifdef ASSERT
++    // make sure this code is only executed if there is a pending exception
++    {
++      Label L;
++      __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
++      __ bnez(t0, L);
++      __ stop("StubRoutines::forward exception: no pending exception (1)");
++      __ bind(L);
++    }
++#endif
 +
-+  // Jump to interpreter
-+  __ ret();
++    // compute exception handler into x9
 +
-+  // Make sure all code is generated
-+  masm->flush();
++    // call the VM to find the handler address associated with the
++    // caller address. pass thread in x10 and caller pc (ret address)
++    // in x11. n.b. the caller pc is in ra, unlike x86 where it is on
++    // the stack.
++    __ mv(c_rarg1, ra);
++    // ra will be trashed by the VM call so we move it to x9
++    // (callee-saved) because we also need to pass it to the handler
++    // returned by this call.
++    __ mv(x9, ra);
++    BLOCK_COMMENT("call exception_handler_for_return_address");
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address,
++                         SharedRuntime::exception_handler_for_return_address),
++                    xthread, c_rarg1);
++    // we should not really care that ra is no longer the callee
++    // address. we saved the value the handler needs in x9 so we can
++    // just copy it to x13. however, the C2 handler will push its own
++    // frame and then calls into the VM and the VM code asserts that
++    // the PC for the frame above the handler belongs to a compiled
++    // Java method. So, we restore ra here to satisfy that assert.
++    __ mv(ra, x9);
++    // setup x10 & x13 & clear pending exception
++    __ mv(x13, x9);
++    __ mv(x9, x10);
++    __ ld(x10, Address(xthread, Thread::pending_exception_offset()));
++    __ sd(zr, Address(xthread, Thread::pending_exception_offset()));
 +
-+  _uncommon_trap_blob =  UncommonTrapBlob::create(&buffer, oop_maps,
-+                                                  SimpleRuntimeFrame::framesize >> 1);
-+}
-+#endif // COMPILER2
++#ifdef ASSERT
++    // make sure exception is set
++    {
++      Label L;
++      __ bnez(x10, L);
++      __ stop("StubRoutines::forward exception: no pending exception (2)");
++      __ bind(L);
++    }
++#endif
 +
-+//------------------------------generate_handler_blob------
-+//
-+// Generate a special Compile2Runtime blob that saves all registers,
-+// and setup oopmap.
-+//
-+SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
-+  ResourceMark rm;
-+  OopMapSet *oop_maps = new OopMapSet();
-+  assert_cond(oop_maps != NULL);
-+  OopMap* map = NULL;
++    // continue at exception handler
++    // x10: exception
++    // x13: throwing pc
++    // x9: exception handler
++    __ verify_oop(x10);
++    __ jr(x9);
 +
-+  // Allocate space for the code.  Setup code generation tools.
-+  CodeBuffer buffer("handler_blob", 2048, 1024);
-+  MacroAssembler* masm = new MacroAssembler(&buffer);
-+  assert_cond(masm != NULL);
++    return start;
++  }
 +
-+  address start   = __ pc();
-+  address call_pc = NULL;
-+  int frame_size_in_words = -1;
-+  bool cause_return = (poll_type == POLL_AT_RETURN);
-+  RegisterSaver reg_saver(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */);
++  // Non-destructive plausibility checks for oops
++  //
++  // Arguments:
++  //    x10: oop to verify
++  //    t0: error message
++  //
++  // Stack after saving c_rarg3:
++  //    [tos + 0]: saved c_rarg3
++  //    [tos + 1]: saved c_rarg2
++  //    [tos + 2]: saved ra
++  //    [tos + 3]: saved t1
++  //    [tos + 4]: saved x10
++  //    [tos + 5]: saved t0
++  address generate_verify_oop() {
 +
-+  // Save Integer and Float registers.
-+  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
++    StubCodeMark mark(this, "StubRoutines", "verify_oop");
++    address start = __ pc();
 +
-+  // The following is basically a call_VM.  However, we need the precise
-+  // address of the call in order to generate an oopmap. Hence, we do all the
-+  // work outselves.
++    Label exit, error;
 +
-+  Label retaddr;
-+  __ set_last_Java_frame(sp, noreg, retaddr, t0);
++    __ push_reg(0x3000, sp);   // save c_rarg2 and c_rarg3
 +
-+  // The return address must always be correct so that frame constructor never
-+  // sees an invalid pc.
++    __ la(c_rarg2, ExternalAddress((address) StubRoutines::verify_oop_count_addr()));
++    __ ld(c_rarg3, Address(c_rarg2));
++    __ add(c_rarg3, c_rarg3, 1);
++    __ sd(c_rarg3, Address(c_rarg2));
 +
-+  if (!cause_return) {
-+    // overwrite the return address pushed by save_live_registers
-+    // Additionally, x18 is a callee-saved register so we can look at
-+    // it later to determine if someone changed the return address for
-+    // us!
-+    __ ld(x18, Address(xthread, JavaThread::saved_exception_pc_offset()));
-+    __ sd(x18, Address(fp, frame::return_addr_offset * wordSize));
-+  }
++    // object is in x10
++    // make sure object is 'reasonable'
++    __ beqz(x10, exit); // if obj is NULL it is OK
 +
-+  // Do the call
-+  __ mv(c_rarg0, xthread);
-+  int32_t offset = 0;
-+  __ la_patchable(t0, RuntimeAddress(call_ptr), offset);
-+  __ jalr(x1, t0, offset);
-+  __ bind(retaddr);
++    // Check if the oop is in the right area of memory
++    __ mv(c_rarg3, (intptr_t) Universe::verify_oop_mask());
++    __ andr(c_rarg2, x10, c_rarg3);
++    __ mv(c_rarg3, (intptr_t) Universe::verify_oop_bits());
 +
-+  // Set an oopmap for the call site.  This oopmap will map all
-+  // oop-registers and debug-info registers as callee-saved.  This
-+  // will allow deoptimization at this safepoint to find all possible
-+  // debug-info recordings, as well as let GC find all oops.
++    // Compare c_rarg2 and c_rarg3.
++    __ bne(c_rarg2, c_rarg3, error);
 +
-+  oop_maps->add_gc_map( __ pc() - start, map);
++    // make sure klass is 'reasonable', which is not zero.
++    __ load_klass(x10, x10);  // get klass
++    __ beqz(x10, error);      // if klass is NULL it is broken
 +
-+  Label noException;
++    // return if everything seems ok
++    __ bind(exit);
 +
-+  __ reset_last_Java_frame(false);
++    __ pop_reg(0x3000, sp);   // pop c_rarg2 and c_rarg3
++    __ ret();
 +
-+  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
++    // handle errors
++    __ bind(error);
++    __ pop_reg(0x3000, sp);   // pop c_rarg2 and c_rarg3
 +
-+  __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
-+  __ beqz(t0, noException);
++    __ pusha();
++    // debug(char* msg, int64_t pc, int64_t regs[])
++    __ mv(c_rarg0, t0);             // pass address of error message
++    __ mv(c_rarg1, ra);             // pass return address
++    __ mv(c_rarg2, sp);             // pass address of regs on stack
++#ifndef PRODUCT
++    assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
++#endif
++    BLOCK_COMMENT("call MacroAssembler::debug");
++    int32_t offset = 0;
++    __ movptr_with_offset(t0, CAST_FROM_FN_PTR(address, MacroAssembler::debug64), offset);
++    __ jalr(x1, t0, offset);
++    __ ebreak();
 +
-+  // Exception pending
++    return start;
++  }
 +
-+  reg_saver.restore_live_registers(masm);
++  // The inner part of zero_words().
++  //
++  // Inputs:
++  // x28: the HeapWord-aligned base address of an array to zero.
++  // x29: the count in HeapWords, x29 > 0.
++  //
++  // Returns x28 and x29, adjusted for the caller to clear.
++  // x28: the base address of the tail of words left to clear.
++  // x29: the number of words in the tail.
++  //      x29 < MacroAssembler::zero_words_block_size.
 +
-+  __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
++  address generate_zero_blocks() {
++    Label done;
 +
-+  // No exception case
-+  __ bind(noException);
++    const Register base = x28, cnt = x29;
 +
-+  Label no_adjust, bail;
-+  if (!cause_return) {
-+    // If our stashed return pc was modified by the runtime we avoid touching it
-+    __ ld(t0, Address(fp, frame::return_addr_offset * wordSize));
-+    __ bne(x18, t0, no_adjust);
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "zero_blocks");
++    address start = __ pc();
 +
-+#ifdef ASSERT
-+    // Verify the correct encoding of the poll we're about to skip.
-+    // See NativeInstruction::is_lwu_to_zr()
-+    __ lwu(t0, Address(x18));
-+    __ andi(t1, t0, 0b0000011);
-+    __ mv(t2, 0b0000011);
-+    __ bne(t1, t2, bail); // 0-6:0b0000011
-+    __ srli(t1, t0, 7);
-+    __ andi(t1, t1, 0b00000);
-+    __ bnez(t1, bail);    // 7-11:0b00000
-+    __ srli(t1, t0, 12);
-+    __ andi(t1, t1, 0b110);
-+    __ mv(t2, 0b110);
-+    __ bne(t1, t2, bail); // 12-14:0b110
-+#endif
-+    // Adjust return pc forward to step over the safepoint poll instruction
-+    __ add(x18, x18, NativeInstruction::instruction_size);
-+    __ sd(x18, Address(fp, frame::return_addr_offset * wordSize));
-+  }
++    {
++      // Clear the remaining blocks.
++      Label loop;
++      __ sub(cnt, cnt, MacroAssembler::zero_words_block_size);
++      __ bltz(cnt, done);
++      __ bind(loop);
++      for (int i = 0; i < MacroAssembler::zero_words_block_size; i++) {
++        __ sd(zr, Address(base, 0));
++        __ add(base, base, 8);
++      }
++      __ sub(cnt, cnt, MacroAssembler::zero_words_block_size);
++      __ bgez(cnt, loop);
++      __ bind(done);
++      __ add(cnt, cnt, MacroAssembler::zero_words_block_size);
++    }
 +
-+  __ bind(no_adjust);
-+  // Normal exit, restore registers and exit.
++    __ ret();
 +
-+  reg_saver.restore_live_registers(masm);
-+  __ ret();
++    return start;
++  }
 +
-+#ifdef ASSERT
-+  __ bind(bail);
-+  __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
-+#endif
++  typedef enum {
++    copy_forwards = 1,
++    copy_backwards = -1
++  } copy_direction;
 +
-+  // Make sure all code is generated
-+  masm->flush();
++  // Bulk copy of blocks of 8 words.
++  //
++  // count is a count of words.
++  //
++  // Precondition: count >= 8
++  //
++  // Postconditions:
++  //
++  // The least significant bit of count contains the remaining count
++  // of words to copy.  The rest of count is trash.
++  //
++  // s and d are adjusted to point to the remaining words to copy
++  //
++  void generate_copy_longs(Label &start, Register s, Register d, Register count,
++                           copy_direction direction) {
++    int unit = wordSize * direction;
++    int bias = wordSize;
 +
-+  // Fill-out other meta info
-+  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
-+}
++    const Register tmp_reg0 = x13, tmp_reg1 = x14, tmp_reg2 = x15, tmp_reg3 = x16,
++      tmp_reg4 = x17, tmp_reg5 = x7, tmp_reg6 = x28, tmp_reg7 = x29;
 +
-+//
-+// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
-+//
-+// Generate a stub that calls into vm to find out the proper destination
-+// of a java call. All the argument registers are live at this point
-+// but since this is generic code we don't know what they are and the caller
-+// must do any gc of the args.
-+//
-+RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
-+  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
++    const Register stride = x30;
 +
-+  // allocate space for the code
-+  ResourceMark rm;
++    assert_different_registers(t0, tmp_reg0, tmp_reg1, tmp_reg2, tmp_reg3,
++      tmp_reg4, tmp_reg5, tmp_reg6, tmp_reg7);
++    assert_different_registers(s, d, count, t0);
 +
-+  CodeBuffer buffer(name, 1000, 512);
-+  MacroAssembler* masm = new MacroAssembler(&buffer);
-+  assert_cond(masm != NULL);
++    Label again, drain;
++    const char* stub_name = NULL;
++    if (direction == copy_forwards) {
++      stub_name = "forward_copy_longs";
++    } else {
++      stub_name = "backward_copy_longs";
++    }
++    StubCodeMark mark(this, "StubRoutines", stub_name);
++    __ align(CodeEntryAlignment);
++    __ bind(start);
 +
-+  int frame_size_in_words = -1;
-+  RegisterSaver reg_saver(false /* save_vectors */);
++    if (direction == copy_forwards) {
++      __ sub(s, s, bias);
++      __ sub(d, d, bias);
++    }
 +
-+  OopMapSet *oop_maps = new OopMapSet();
-+  assert_cond(oop_maps != NULL);
-+  OopMap* map = NULL;
++#ifdef ASSERT
++    // Make sure we are never given < 8 words
++    {
++      Label L;
 +
-+  int start = __ offset();
++      __ li(t0, 8);
++      __ bge(count, t0, L);
++      __ stop("genrate_copy_longs called with < 8 words");
++      __ bind(L);
++    }
++#endif
 +
-+  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
++    __ ld(tmp_reg0, Address(s, 1 * unit));
++    __ ld(tmp_reg1, Address(s, 2 * unit));
++    __ ld(tmp_reg2, Address(s, 3 * unit));
++    __ ld(tmp_reg3, Address(s, 4 * unit));
++    __ ld(tmp_reg4, Address(s, 5 * unit));
++    __ ld(tmp_reg5, Address(s, 6 * unit));
++    __ ld(tmp_reg6, Address(s, 7 * unit));
++    __ ld(tmp_reg7, Address(s, 8 * unit));
++    __ addi(s, s, 8 * unit);
 +
-+  int frame_complete = __ offset();
++    __ sub(count, count, 16);
++    __ bltz(count, drain);
 +
-+  {
-+    Label retaddr;
-+    __ set_last_Java_frame(sp, noreg, retaddr, t0);
++    __ bind(again);
 +
-+    __ mv(c_rarg0, xthread);
-+    int32_t offset = 0;
-+    __ la_patchable(t0, RuntimeAddress(destination), offset);
-+    __ jalr(x1, t0, offset);
-+    __ bind(retaddr);
-+  }
++    __ sd(tmp_reg0, Address(d, 1 * unit));
++    __ sd(tmp_reg1, Address(d, 2 * unit));
++    __ sd(tmp_reg2, Address(d, 3 * unit));
++    __ sd(tmp_reg3, Address(d, 4 * unit));
++    __ sd(tmp_reg4, Address(d, 5 * unit));
++    __ sd(tmp_reg5, Address(d, 6 * unit));
++    __ sd(tmp_reg6, Address(d, 7 * unit));
++    __ sd(tmp_reg7, Address(d, 8 * unit));
 +
-+  // Set an oopmap for the call site.
-+  // We need this not only for callee-saved registers, but also for volatile
-+  // registers that the compiler might be keeping live across a safepoint.
++    __ ld(tmp_reg0, Address(s, 1 * unit));
++    __ ld(tmp_reg1, Address(s, 2 * unit));
++    __ ld(tmp_reg2, Address(s, 3 * unit));
++    __ ld(tmp_reg3, Address(s, 4 * unit));
++    __ ld(tmp_reg4, Address(s, 5 * unit));
++    __ ld(tmp_reg5, Address(s, 6 * unit));
++    __ ld(tmp_reg6, Address(s, 7 * unit));
++    __ ld(tmp_reg7, Address(s, 8 * unit));
 +
-+  oop_maps->add_gc_map( __ offset() - start, map);
++    __ addi(s, s, 8 * unit);
++    __ addi(d, d, 8 * unit);
 +
-+  // x10 contains the address we are going to jump to assuming no exception got installed
++    __ sub(count, count, 8);
++    __ bgez(count, again);
 +
-+  // clear last_Java_sp
-+  __ reset_last_Java_frame(false);
-+  // check for pending exceptions
-+  Label pending;
-+  __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
-+  __ bnez(t0, pending);
++    // Drain
++    __ bind(drain);
 +
-+  // get the returned Method*
-+  __ get_vm_result_2(xmethod, xthread);
-+  __ sd(xmethod, Address(sp, reg_saver.reg_offset_in_bytes(xmethod)));
++    __ sd(tmp_reg0, Address(d, 1 * unit));
++    __ sd(tmp_reg1, Address(d, 2 * unit));
++    __ sd(tmp_reg2, Address(d, 3 * unit));
++    __ sd(tmp_reg3, Address(d, 4 * unit));
++    __ sd(tmp_reg4, Address(d, 5 * unit));
++    __ sd(tmp_reg5, Address(d, 6 * unit));
++    __ sd(tmp_reg6, Address(d, 7 * unit));
++    __ sd(tmp_reg7, Address(d, 8 * unit));
++    __ addi(d, d, 8 * unit);
 +
-+  // x10 is where we want to jump, overwrite t0 which is saved and temporary
-+  __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(t0)));
-+  reg_saver.restore_live_registers(masm);
++    {
++      Label L1, L2;
++      __ andi(t0, count, 4);
++      __ beqz(t0, L1);
 +
-+  // We are back the the original state on entry and ready to go.
++      __ ld(tmp_reg0, Address(s, 1 * unit));
++      __ ld(tmp_reg1, Address(s, 2 * unit));
++      __ ld(tmp_reg2, Address(s, 3 * unit));
++      __ ld(tmp_reg3, Address(s, 4 * unit));
++      __ addi(s, s, 4 * unit);
 +
-+  __ jr(t0);
++      __ sd(tmp_reg0, Address(d, 1 * unit));
++      __ sd(tmp_reg1, Address(d, 2 * unit));
++      __ sd(tmp_reg2, Address(d, 3 * unit));
++      __ sd(tmp_reg3, Address(d, 4 * unit));
++      __ addi(d, d, 4 * unit);
 +
-+  // Pending exception after the safepoint
++      __ bind(L1);
 +
-+  __ bind(pending);
++      if (direction == copy_forwards) {
++        __ addi(s, s, bias);
++        __ addi(d, d, bias);
++      }
 +
-+  reg_saver.restore_live_registers(masm);
++      __ andi(t0, count, 2);
++      __ beqz(t0, L2);
++      if (direction == copy_backwards) {
++        __ addi(s, s, 2 * unit);
++        __ ld(tmp_reg0, Address(s));
++        __ ld(tmp_reg1, Address(s, wordSize));
++        __ addi(d, d, 2 * unit);
++        __ sd(tmp_reg0, Address(d));
++        __ sd(tmp_reg1, Address(d, wordSize));
++      } else {
++        __ ld(tmp_reg0, Address(s));
++        __ ld(tmp_reg1, Address(s, wordSize));
++        __ addi(s, s, 2 * unit);
++        __ sd(tmp_reg0, Address(d));
++        __ sd(tmp_reg1, Address(d, wordSize));
++        __ addi(d, d, 2 * unit);
++      }
++      __ bind(L2);
++    }
 +
-+  // exception pending => remove activation and forward to exception handler
++    __ ret();
++  }
 +
-+  __ sd(zr, Address(xthread, JavaThread::vm_result_offset()));
++  Label copy_f, copy_b;
 +
-+  __ ld(x10, Address(xthread, Thread::pending_exception_offset()));
-+  __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
++  // All-singing all-dancing memory copy.
++  //
++  // Copy count units of memory from s to d.  The size of a unit is
++  // step, which can be positive or negative depending on the direction
++  // of copy.  If is_aligned is false, we align the source address.
++  //
++  /*
++   * if (is_aligned) {
++   *   goto copy_8_bytes;
++   * }
++   * bool is_backwards = step < 0;
++   * int granularity = uabs(step);
++   * count = count  *  granularity;   * count bytes
++   *
++   * if (is_backwards) {
++   *   s += count;
++   *   d += count;
++   * }
++   *
++   * count limit maybe greater than 16, for better performance
++   * if (count < 16) {
++   *   goto copy_small;
++   * }
++   *
++   * if ((dst % 8) == (src % 8)) {
++   *   aligned;
++   *   goto copy8;
++   * }
++   *
++   * copy_small:
++   *   load element one by one;
++   * done;
++   */
 +
-+  // -------------
-+  // make sure all code is generated
-+  masm->flush();
++  typedef void (MacroAssembler::*copy_insn)(Register Rd, const Address &adr, Register temp);
 +
-+  // return the  blob
-+  return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
-+}
++  void copy_memory_v(Register s, Register d, Register count, Register tmp, int step) {
++    bool is_backward = step < 0;
++    int granularity = uabs(step);
 +
-+#ifdef COMPILER2
-+RuntimeStub* SharedRuntime::make_native_invoker(address call_target,
-+                                                int shadow_space_bytes,
-+                                                const GrowableArray<VMReg>& input_registers,
-+                                                const GrowableArray<VMReg>& output_registers) {
-+  Unimplemented();
-+  return nullptr;
-+}
++    const Register src = x30, dst = x31, vl = x14, cnt = x15, tmp1 = x16, tmp2 = x17;
++    assert_different_registers(s, d, cnt, vl, tmp, tmp1, tmp2);
++    Assembler::SEW sew = Assembler::elembytes_to_sew(granularity);
++    Label loop_forward, loop_backward, done;
 +
-+//------------------------------generate_exception_blob---------------------------
-+// creates exception blob at the end
-+// Using exception blob, this code is jumped from a compiled method.
-+// (see emit_exception_handler in riscv.ad file)
-+//
-+// Given an exception pc at a call we call into the runtime for the
-+// handler in this method. This handler might merely restore state
-+// (i.e. callee save registers) unwind the frame and jump to the
-+// exception handler for the nmethod if there is no Java level handler
-+// for the nmethod.
-+//
-+// This code is entered with a jmp.
-+//
-+// Arguments:
-+//   x10: exception oop
-+//   x13: exception pc
-+//
-+// Results:
-+//   x10: exception oop
-+//   x13: exception pc in caller
-+//   destination: exception handler of caller
-+//
-+// Note: the exception pc MUST be at a call (precise debug information)
-+//       Registers x10, x13, x12, x14, x15, t0 are not callee saved.
-+//
++    __ mv(dst, d);
++    __ mv(src, s);
++    __ mv(cnt, count);
 +
-+void OptoRuntime::generate_exception_blob() {
-+  assert(!OptoRuntime::is_callee_saved_register(R13_num), "");
-+  assert(!OptoRuntime::is_callee_saved_register(R10_num), "");
-+  assert(!OptoRuntime::is_callee_saved_register(R12_num), "");
++    __ bind(loop_forward);
++    __ vsetvli(vl, cnt, sew, Assembler::m8);
++    if (is_backward) {
++      __ bne(vl, cnt, loop_backward);
++    }
 +
-+  assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
++    __ vlex_v(v0, src, sew);
++    __ sub(cnt, cnt, vl);
++    __ slli(vl, vl, (int)sew);
++    __ add(src, src, vl);
 +
-+  // Allocate space for the code
-+  ResourceMark rm;
-+  // Setup code generation tools
-+  CodeBuffer buffer("exception_blob", 2048, 1024);
-+  MacroAssembler* masm = new MacroAssembler(&buffer);
-+  assert_cond(masm != NULL);
++    __ vsex_v(v0, dst, sew);
++    __ add(dst, dst, vl);
++    __ bnez(cnt, loop_forward);
 +
-+  // TODO check various assumptions made here
-+  //
-+  // make sure we do so before running this
++    if (is_backward) {
++      __ j(done);
 +
-+  address start = __ pc();
++      __ bind(loop_backward);
++      __ sub(tmp, cnt, vl);
++      __ slli(tmp, tmp, sew);
++      __ add(tmp1, s, tmp);
++      __ vlex_v(v0, tmp1, sew);
++      __ add(tmp2, d, tmp);
++      __ vsex_v(v0, tmp2, sew);
++      __ sub(cnt, cnt, vl);
++      __ bnez(cnt, loop_forward);
++      __ bind(done);
++    }
++  }
 +
-+  // push fp and retaddr by hand
-+  // Exception pc is 'return address' for stack walker
-+  __ addi(sp, sp, -2 * wordSize);
-+  __ sd(ra, Address(sp, wordSize));
-+  __ sd(fp, Address(sp));
-+  // there are no callee save registers and we don't expect an
-+  // arg reg save area
-+#ifndef PRODUCT
-+  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
-+#endif
-+  // Store exception in Thread object. We cannot pass any arguments to the
-+  // handle_exception call, since we do not want to make any assumption
-+  // about the size of the frame where the exception happened in.
-+  __ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
-+  __ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
++  void copy_memory(bool is_aligned, Register s, Register d,
++                   Register count, Register tmp, int step) {
++    if (UseRVV) {
++      return copy_memory_v(s, d, count, tmp, step);
++    }
 +
-+  // This call does all the hard work.  It checks if an exception handler
-+  // exists in the method.
-+  // If so, it returns the handler address.
-+  // If not, it prepares for stack-unwinding, restoring the callee-save
-+  // registers of the frame being removed.
-+  //
-+  // address OptoRuntime::handle_exception_C(JavaThread* thread)
-+  //
-+  // n.b. 1 gp arg, 0 fp args, integral return type
++    bool is_backwards = step < 0;
++    int granularity = uabs(step);
 +
-+  // the stack should always be aligned
-+  address the_pc = __ pc();
-+  __ set_last_Java_frame(sp, noreg, the_pc, t0);
-+  __ mv(c_rarg0, xthread);
-+  int32_t offset = 0;
-+  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)), offset);
-+  __ jalr(x1, t0, offset);
++    const Register src = x30, dst = x31, cnt = x15, tmp3 = x16, tmp4 = x17;
 +
++    Label same_aligned;
++    Label copy8, copy_small, done;
 +
-+  // handle_exception_C is a special VM call which does not require an explicit
-+  // instruction sync afterwards.
++    copy_insn ld_arr = NULL, st_arr = NULL;
++    switch (granularity) {
++      case 1 :
++        ld_arr = (copy_insn)&MacroAssembler::lbu;
++        st_arr = (copy_insn)&MacroAssembler::sb;
++        break;
++      case 2 :
++        ld_arr = (copy_insn)&MacroAssembler::lhu;
++        st_arr = (copy_insn)&MacroAssembler::sh;
++        break;
++      case 4 :
++        ld_arr = (copy_insn)&MacroAssembler::lwu;
++        st_arr = (copy_insn)&MacroAssembler::sw;
++        break;
++      case 8 :
++        ld_arr = (copy_insn)&MacroAssembler::ld;
++        st_arr = (copy_insn)&MacroAssembler::sd;
++        break;
++      default :
++        ShouldNotReachHere();
++    }
 +
-+  // Set an oopmap for the call site.  This oopmap will only be used if we
-+  // are unwinding the stack.  Hence, all locations will be dead.
-+  // Callee-saved registers will be the same as the frame above (i.e.,
-+  // handle_exception_stub), since they were restored when we got the
-+  // exception.
++    __ beqz(count, done);
++    __ slli(cnt, count, exact_log2(granularity));
++    if (is_backwards) {
++      __ add(src, s, cnt);
++      __ add(dst, d, cnt);
++    } else {
++      __ mv(src, s);
++      __ mv(dst, d);
++    }
 +
-+  OopMapSet* oop_maps = new OopMapSet();
-+  assert_cond(oop_maps != NULL);
++    if (is_aligned) {
++      __ addi(tmp, cnt, -8);
++      __ bgez(tmp, copy8);
++      __ j(copy_small);
++    }
 +
-+  oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
++    __ mv(tmp, 16);
++    __ blt(cnt, tmp, copy_small);
 +
-+  __ reset_last_Java_frame(false);
++    __ xorr(tmp, src, dst);
++    __ andi(tmp, tmp, 0b111);
++    __ bnez(tmp, copy_small);
 +
-+  // Restore callee-saved registers
++    __ bind(same_aligned);
++    __ andi(tmp, src, 0b111);
++    __ beqz(tmp, copy8);
++    if (is_backwards) {
++      __ addi(src, src, step);
++      __ addi(dst, dst, step);
++    }
++    (_masm->*ld_arr)(tmp3, Address(src), t0);
++    (_masm->*st_arr)(tmp3, Address(dst), t0);
++    if (!is_backwards) {
++      __ addi(src, src, step);
++      __ addi(dst, dst, step);
++    }
++    __ addi(cnt, cnt, -granularity);
++    __ beqz(cnt, done);
++    __ j(same_aligned);
 +
-+  // fp is an implicitly saved callee saved register (i.e. the calling
-+  // convention will save restore it in prolog/epilog) Other than that
-+  // there are no callee save registers now that adapter frames are gone.
-+  // and we dont' expect an arg reg save area
-+  __ ld(fp, Address(sp));
-+  __ ld(x13, Address(sp, wordSize));
-+  __ addi(sp, sp , 2 * wordSize);
++    __ bind(copy8);
++    if (is_backwards) {
++      __ addi(src, src, -wordSize);
++      __ addi(dst, dst, -wordSize);
++    }
++    __ ld(tmp3, Address(src));
++    __ sd(tmp3, Address(dst));
++    if (!is_backwards) {
++      __ addi(src, src, wordSize);
++      __ addi(dst, dst, wordSize);
++    }
++    __ addi(cnt, cnt, -wordSize);
++    __ addi(tmp4, cnt, -8);
++    __ bgez(tmp4, copy8); // cnt >= 8, do next loop
 +
-+  // x10: exception handler
++    __ beqz(cnt, done);
 +
-+  // We have a handler in x10 (could be deopt blob).
-+  __ mv(t0, x10);
++    __ bind(copy_small);
++    if (is_backwards) {
++      __ addi(src, src, step);
++      __ addi(dst, dst, step);
++    }
++    (_masm->*ld_arr)(tmp3, Address(src), t0);
++    (_masm->*st_arr)(tmp3, Address(dst), t0);
++    if (!is_backwards) {
++      __ addi(src, src, step);
++      __ addi(dst, dst, step);
++    }
++    __ addi(cnt, cnt, -granularity);
++    __ bgtz(cnt, copy_small);
 +
-+  // Get the exception oop
-+  __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
-+  // Get the exception pc in case we are deoptimized
-+  __ ld(x14, Address(xthread, JavaThread::exception_pc_offset()));
-+#ifdef ASSERT
-+  __ sd(zr, Address(xthread, JavaThread::exception_handler_pc_offset()));
-+  __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
-+#endif
-+  // Clear the exception oop so GC no longer processes it as a root.
-+  __ sd(zr, Address(xthread, JavaThread::exception_oop_offset()));
++    __ bind(done);
++  }
 +
-+  // x10: exception oop
-+  // t0:  exception handler
-+  // x14: exception pc
-+  // Jump to handler
-+
-+  __ jr(t0);
-+
-+  // Make sure all code is generated
-+  masm->flush();
-+
-+  // Set exception blob
-+  _exception_blob =  ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
-+}
-+#endif // COMPILER2
-diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-new file mode 100644
-index 00000000000..b3fdd04db1b
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-@@ -0,0 +1,3864 @@
-+/*
-+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "compiler/oopMap.hpp"
-+#include "gc/shared/barrierSet.hpp"
-+#include "gc/shared/barrierSetAssembler.hpp"
-+#include "interpreter/interpreter.hpp"
-+#include "memory/universe.hpp"
-+#include "nativeInst_riscv.hpp"
-+#include "oops/instanceOop.hpp"
-+#include "oops/method.hpp"
-+#include "oops/objArrayKlass.hpp"
-+#include "oops/oop.inline.hpp"
-+#include "prims/methodHandles.hpp"
-+#include "runtime/frame.inline.hpp"
-+#include "runtime/handles.inline.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "runtime/stubCodeGenerator.hpp"
-+#include "runtime/stubRoutines.hpp"
-+#include "runtime/thread.inline.hpp"
-+#include "utilities/align.hpp"
-+#include "utilities/powerOfTwo.hpp"
-+#ifdef COMPILER2
-+#include "opto/runtime.hpp"
-+#endif
-+#if INCLUDE_ZGC
-+#include "gc/z/zThreadLocalData.hpp"
-+#endif
-+
-+// Declaration and definition of StubGenerator (no .hpp file).
-+// For a more detailed description of the stub routine structure
-+// see the comment in stubRoutines.hpp
-+
-+#undef __
-+#define __ _masm->
-+
-+#ifdef PRODUCT
-+#define BLOCK_COMMENT(str) /* nothing */
-+#else
-+#define BLOCK_COMMENT(str) __ block_comment(str)
-+#endif
-+
-+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
-+
-+// Stub Code definitions
-+
-+class StubGenerator: public StubCodeGenerator {
-+ private:
++  // Scan over array at a for count oops, verifying each one.
++  // Preserves a and count, clobbers t0 and t1.
++  void verify_oop_array(size_t size, Register a, Register count, Register temp) {
++    Label loop, end;
++    __ mv(t1, zr);
++    __ slli(t0, count, exact_log2(size));
++    __ bind(loop);
++    __ bgeu(t1, t0, end);
 +
-+#ifdef PRODUCT
-+#define inc_counter_np(counter) ((void)0)
-+#else
-+  void inc_counter_np_(int& counter) {
-+    __ la(t1, ExternalAddress((address)&counter));
-+    __ lwu(t0, Address(t1, 0));
-+    __ addiw(t0, t0, 1);
-+    __ sw(t0, Address(t1, 0));
++    __ add(temp, a, t1);
++    if (size == (size_t)wordSize) {
++      __ ld(temp, Address(temp, 0));
++      __ verify_oop(temp);
++    } else {
++      __ lwu(temp, Address(temp, 0));
++      __ decode_heap_oop(temp); // calls verify_oop
++    }
++    __ add(t1, t1, size);
++    __ j(loop);
++    __ bind(end);
 +  }
-+#define inc_counter_np(counter) \
-+  BLOCK_COMMENT("inc_counter " #counter); \
-+  inc_counter_np_(counter);
-+#endif
 +
-+  // Call stubs are used to call Java from C
-+  //
 +  // Arguments:
-+  //    c_rarg0:   call wrapper address                   address
-+  //    c_rarg1:   result                                 address
-+  //    c_rarg2:   result type                            BasicType
-+  //    c_rarg3:   method                                 Method*
-+  //    c_rarg4:   (interpreter) entry point              address
-+  //    c_rarg5:   parameters                             intptr_t*
-+  //    c_rarg6:   parameter size (in words)              int
-+  //    c_rarg7:   thread                                 Thread*
-+  //
-+  // There is no return from the stub itself as any Java result
-+  // is written to result
-+  //
-+  // we save x1 (ra) as the return PC at the base of the frame and
-+  // link x8 (fp) below it as the frame pointer installing sp (x2)
-+  // into fp.
-+  //
-+  // we save x10-x17, which accounts for all the c arguments.
-+  //
-+  // TODO: strictly do we need to save them all? they are treated as
-+  // volatile by C so could we omit saving the ones we are going to
-+  // place in global registers (thread? method?) or those we only use
-+  // during setup of the Java call?
-+  //
-+  // we don't need to save x5 which C uses as an indirect result location
-+  // return register.
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
 +  //
-+  // we don't need to save x6-x7 and x28-x31 which both C and Java treat as
-+  // volatile
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
 +  //
-+  // we save x18-x27 which Java uses as temporary registers and C
-+  // expects to be callee-save
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
 +  //
-+  // so the stub frame looks like this when we enter Java code
++  // Side Effects:
++  //   disjoint_int_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_int_oop_copy().
 +  //
-+  //     [ return_from_Java     ] <--- sp
-+  //     [ argument word n      ]
-+  //      ...
-+  // -22 [ argument word 1      ]
-+  // -21 [ saved x27            ] <--- sp_after_call
-+  // -20 [ saved x26            ]
-+  // -19 [ saved x25            ]
-+  // -18 [ saved x24            ]
-+  // -17 [ saved x23            ]
-+  // -16 [ saved x22            ]
-+  // -15 [ saved x21            ]
-+  // -14 [ saved x20            ]
-+  // -13 [ saved x19            ]
-+  // -12 [ saved x18            ]
-+  // -11 [ saved x9             ]
-+  // -10 [ call wrapper   (x10) ]
-+  //  -9 [ result         (x11) ]
-+  //  -8 [ result type    (x12) ]
-+  //  -7 [ method         (x13) ]
-+  //  -6 [ entry point    (x14) ]
-+  //  -5 [ parameters     (x15) ]
-+  //  -4 [ parameter size (x16) ]
-+  //  -3 [ thread         (x17) ]
-+  //  -2 [ saved fp       (x8)  ]
-+  //  -1 [ saved ra       (x1)  ]
-+  //   0 [                      ] <--- fp == saved sp (x2)
-+
-+  // Call stub stack layout word offsets from fp
-+  enum call_stub_layout {
-+    sp_after_call_off  = -21,
-+
-+    x27_off            = -21,
-+    x26_off            = -20,
-+    x25_off            = -19,
-+    x24_off            = -18,
-+    x23_off            = -17,
-+    x22_off            = -16,
-+    x21_off            = -15,
-+    x20_off            = -14,
-+    x19_off            = -13,
-+    x18_off            = -12,
-+    x9_off             = -11,
-+
-+    call_wrapper_off   = -10,
-+    result_off         = -9,
-+    result_type_off    = -8,
-+    method_off         = -7,
-+    entry_point_off    = -6,
-+    parameters_off     = -5,
-+    parameter_size_off = -4,
-+    thread_off         = -3,
-+    fp_f               = -2,
-+    retaddr_off        = -1,
-+  };
-+
-+  address generate_call_stub(address& return_address) {
-+    assert((int)frame::entry_frame_after_call_words == -(int)sp_after_call_off + 1 &&
-+           (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off,
-+           "adjust this code");
-+
-+    StubCodeMark mark(this, "StubRoutines", "call_stub");
++  address generate_disjoint_copy(size_t size, bool aligned, bool is_oop, address* entry,
++                                 const char* name, bool dest_uninitialized = false) {
++    const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
++    RegSet saved_reg = RegSet::of(s, d, count);
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
 +    address start = __ pc();
-+
-+    const Address sp_after_call (fp, sp_after_call_off  * wordSize);
-+
-+    const Address call_wrapper  (fp, call_wrapper_off   * wordSize);
-+    const Address result        (fp, result_off         * wordSize);
-+    const Address result_type   (fp, result_type_off    * wordSize);
-+    const Address method        (fp, method_off         * wordSize);
-+    const Address entry_point   (fp, entry_point_off    * wordSize);
-+    const Address parameters    (fp, parameters_off     * wordSize);
-+    const Address parameter_size(fp, parameter_size_off * wordSize);
-+
-+    const Address thread        (fp, thread_off         * wordSize);
-+
-+    const Address x27_save      (fp, x27_off            * wordSize);
-+    const Address x26_save      (fp, x26_off            * wordSize);
-+    const Address x25_save      (fp, x25_off            * wordSize);
-+    const Address x24_save      (fp, x24_off            * wordSize);
-+    const Address x23_save      (fp, x23_off            * wordSize);
-+    const Address x22_save      (fp, x22_off            * wordSize);
-+    const Address x21_save      (fp, x21_off            * wordSize);
-+    const Address x20_save      (fp, x20_off            * wordSize);
-+    const Address x19_save      (fp, x19_off            * wordSize);
-+    const Address x18_save      (fp, x18_off            * wordSize);
-+
-+    const Address x9_save       (fp, x9_off             * wordSize);
-+
-+    // stub code
-+
-+    address riscv_entry = __ pc();
-+
-+    // set up frame and move sp to end of save area
 +    __ enter();
-+    __ addi(sp, fp, sp_after_call_off * wordSize);
-+
-+    // save register parameters and Java temporary/global registers
-+    // n.b. we save thread even though it gets installed in
-+    // xthread because we want to sanity check tp later
-+    __ sd(c_rarg7, thread);
-+    __ sw(c_rarg6, parameter_size);
-+    __ sd(c_rarg5, parameters);
-+    __ sd(c_rarg4, entry_point);
-+    __ sd(c_rarg3, method);
-+    __ sd(c_rarg2, result_type);
-+    __ sd(c_rarg1, result);
-+    __ sd(c_rarg0, call_wrapper);
-+
-+    __ sd(x9, x9_save);
-+
-+    __ sd(x18, x18_save);
-+    __ sd(x19, x19_save);
-+    __ sd(x20, x20_save);
-+    __ sd(x21, x21_save);
-+    __ sd(x22, x22_save);
-+    __ sd(x23, x23_save);
-+    __ sd(x24, x24_save);
-+    __ sd(x25, x25_save);
-+    __ sd(x26, x26_save);
-+    __ sd(x27, x27_save);
-+
-+    // install Java thread in global register now we have saved
-+    // whatever value it held
-+    __ mv(xthread, c_rarg7);
-+
-+    // And method
-+    __ mv(xmethod, c_rarg3);
-+
-+    // set up the heapbase register
-+    __ reinit_heapbase();
 +
-+#ifdef ASSERT
-+    // make sure we have no pending exceptions
-+    {
-+      Label L;
-+      __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
-+      __ beqz(t0, L);
-+      __ stop("StubRoutines::call_stub: entered with pending exception");
-+      __ BIND(L);
++    if (entry != NULL) {
++      *entry = __ pc();
++      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
++      BLOCK_COMMENT("Entry:");
 +    }
-+#endif
-+    // pass parameters if any
-+    __ mv(esp, sp);
-+    __ slli(t0, c_rarg6, LogBytesPerWord);
-+    __ sub(t0, sp, t0); // Move SP out of the way
-+    __ andi(sp, t0, -2 * wordSize);
-+
-+    BLOCK_COMMENT("pass parameters if any");
-+    Label parameters_done;
-+    // parameter count is still in c_rarg6
-+    // and parameter pointer identifying param 1 is in c_rarg5
-+    __ beqz(c_rarg6, parameters_done);
 +
-+    address loop = __ pc();
-+    __ ld(t0, c_rarg5, 0);
-+    __ addi(c_rarg5, c_rarg5, wordSize);
-+    __ addi(c_rarg6, c_rarg6, -1);
-+    __ push_reg(t0);
-+    __ bgtz(c_rarg6, loop);
-+
-+    __ BIND(parameters_done);
-+
-+    // call Java entry -- passing methdoOop, and current sp
-+    //      xmethod: Method*
-+    //      x30: sender sp
-+    BLOCK_COMMENT("call Java function");
-+    __ mv(x30, sp);
-+    __ jalr(c_rarg4);
-+
-+    // save current address for use by exception handling code
-+
-+    return_address = __ pc();
-+
-+    // store result depending on type (everything that is not
-+    // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
-+    // n.b. this assumes Java returns an integral result in x10
-+    // and a floating result in j_farg0
-+    __ ld(j_rarg2, result);
-+    Label is_long, is_float, is_double, exit;
-+    __ ld(j_rarg1, result_type);
-+    __ li(t0, (u1)T_OBJECT);
-+    __ beq(j_rarg1, t0, is_long);
-+    __ li(t0, (u1)T_LONG);
-+    __ beq(j_rarg1, t0, is_long);
-+    __ li(t0, (u1)T_FLOAT);
-+    __ beq(j_rarg1, t0, is_float);
-+    __ li(t0, (u1)T_DOUBLE);
-+    __ beq(j_rarg1, t0, is_double);
-+
-+    // handle T_INT case
-+    __ sw(x10, Address(j_rarg2));
-+
-+    __ BIND(exit);
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++    if (aligned) {
++      decorators |= ARRAYCOPY_ALIGNED;
++    }
 +
-+    // pop parameters
-+    __ addi(esp, fp, sp_after_call_off * wordSize);
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_reg);
 +
-+#ifdef ASSERT
-+    // verify that threads correspond
-+    {
-+      Label L, S;
-+      __ ld(t0, thread);
-+      __ bne(xthread, t0, S);
-+      __ get_thread(t0);
-+      __ beq(xthread, t0, L);
-+      __ BIND(S);
-+      __ stop("StubRoutines::call_stub: threads must correspond");
-+      __ BIND(L);
++    if (is_oop) {
++      // save regs before copy_memory
++      __ push_reg(RegSet::of(d, count), sp);
 +    }
-+#endif
 +
-+    // restore callee-save registers
-+    __ ld(x27, x27_save);
-+    __ ld(x26, x26_save);
-+    __ ld(x25, x25_save);
-+    __ ld(x24, x24_save);
-+    __ ld(x23, x23_save);
-+    __ ld(x22, x22_save);
-+    __ ld(x21, x21_save);
-+    __ ld(x20, x20_save);
-+    __ ld(x19, x19_save);
-+    __ ld(x18, x18_save);
++    copy_memory(aligned, s, d, count, t0, size);
 +
-+    __ ld(x9, x9_save);
++    if (is_oop) {
++      __ pop_reg(RegSet::of(d, count), sp);
++      if (VerifyOops) {
++        verify_oop_array(size, d, count, t2);
++      }
++    }
 +
-+    __ ld(c_rarg0, call_wrapper);
-+    __ ld(c_rarg1, result);
-+    __ ld(c_rarg2, result_type);
-+    __ ld(c_rarg3, method);
-+    __ ld(c_rarg4, entry_point);
-+    __ ld(c_rarg5, parameters);
-+    __ ld(c_rarg6, parameter_size);
-+    __ ld(c_rarg7, thread);
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet());
 +
-+    // leave frame and return to caller
 +    __ leave();
++    __ mv(x10, zr); // return 0
 +    __ ret();
-+
-+    // handle return types different from T_INT
-+
-+    __ BIND(is_long);
-+    __ sd(x10, Address(j_rarg2, 0));
-+    __ j(exit);
-+
-+    __ BIND(is_float);
-+    __ fsw(j_farg0, Address(j_rarg2, 0), t0);
-+    __ j(exit);
-+
-+    __ BIND(is_double);
-+    __ fsd(j_farg0, Address(j_rarg2, 0), t0);
-+    __ j(exit);
-+
 +    return start;
 +  }
 +
-+  // Return point for a Java call if there's an exception thrown in
-+  // Java code.  The exception is caught and transformed into a
-+  // pending exception stored in JavaThread that can be tested from
-+  // within the VM.
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
 +  //
-+  // Note: Usually the parameters are removed by the callee. In case
-+  // of an exception crossing an activation frame boundary, that is
-+  // not the case if the callee is compiled code => need to setup the
-+  // sp.
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
 +  //
-+  // x10: exception oop
-+
-+  address generate_catch_exception() {
-+    StubCodeMark mark(this, "StubRoutines", "catch_exception");
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  address generate_conjoint_copy(size_t size, bool aligned, bool is_oop, address nooverlap_target,
++                                 address* entry, const char* name,
++                                 bool dest_uninitialized = false) {
++    const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
++    RegSet saved_regs = RegSet::of(s, d, count);
++    StubCodeMark mark(this, "StubRoutines", name);
 +    address start = __ pc();
++    __ enter();
 +
-+    // same as in generate_call_stub():
-+    const Address thread(fp, thread_off * wordSize);
-+
-+#ifdef ASSERT
-+    // verify that threads correspond
-+    {
-+      Label L, S;
-+      __ ld(t0, thread);
-+      __ bne(xthread, t0, S);
-+      __ get_thread(t0);
-+      __ beq(xthread, t0, L);
-+      __ bind(S);
-+      __ stop("StubRoutines::catch_exception: threads must correspond");
-+      __ bind(L);
++    if (entry != NULL) {
++      *entry = __ pc();
++      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
++      BLOCK_COMMENT("Entry:");
 +    }
-+#endif
 +
-+    // set pending exception
-+    __ verify_oop(x10);
++    // use fwd copy when (d-s) above_equal (count*size)
++    __ sub(t0, d, s);
++    __ slli(t1, count, exact_log2(size));
++    __ bgeu(t0, t1, nooverlap_target);
 +
-+    __ sd(x10, Address(xthread, Thread::pending_exception_offset()));
-+    __ mv(t0, (address)__FILE__);
-+    __ sd(t0, Address(xthread, Thread::exception_file_offset()));
-+    __ mv(t0, (int)__LINE__);
-+    __ sw(t0, Address(xthread, Thread::exception_line_offset()));
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++    if (aligned) {
++      decorators |= ARRAYCOPY_ALIGNED;
++    }
 +
-+    // complete return to VM
-+    assert(StubRoutines::_call_stub_return_address != NULL,
-+           "_call_stub_return_address must have been generated before");
-+    __ j(StubRoutines::_call_stub_return_address);
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_regs);
 +
-+    return start;
-+  }
++    if (is_oop) {
++      // save regs before copy_memory
++      __ push_reg(RegSet::of(d, count), sp);
++    }
 +
-+  // Continuation point for runtime calls returning with a pending
-+  // exception.  The pending exception check happened in the runtime
-+  // or native call stub.  The pending exception in Thread is
-+  // converted into a Java-level exception.
-+  //
-+  // Contract with Java-level exception handlers:
-+  // x10: exception
-+  // x13: throwing pc
-+  //
-+  // NOTE: At entry of this stub, exception-pc must be in RA !!
-+
-+  // NOTE: this is always used as a jump target within generated code
-+  // so it just needs to be generated code with no x86 prolog
-+
-+  address generate_forward_exception() {
-+    StubCodeMark mark(this, "StubRoutines", "forward exception");
-+    address start = __ pc();
-+
-+    // Upon entry, RA points to the return address returning into
-+    // Java (interpreted or compiled) code; i.e., the return address
-+    // becomes the throwing pc.
-+    //
-+    // Arguments pushed before the runtime call are still on the stack
-+    // but the exception handler will reset the stack pointer ->
-+    // ignore them.  A potential result in registers can be ignored as
-+    // well.
-+
-+#ifdef ASSERT
-+    // make sure this code is only executed if there is a pending exception
-+    {
-+      Label L;
-+      __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
-+      __ bnez(t0, L);
-+      __ stop("StubRoutines::forward exception: no pending exception (1)");
-+      __ bind(L);
-+    }
-+#endif
-+
-+    // compute exception handler into x9
-+
-+    // call the VM to find the handler address associated with the
-+    // caller address. pass thread in x10 and caller pc (ret address)
-+    // in x11. n.b. the caller pc is in ra, unlike x86 where it is on
-+    // the stack.
-+    __ mv(c_rarg1, ra);
-+    // ra will be trashed by the VM call so we move it to x9
-+    // (callee-saved) because we also need to pass it to the handler
-+    // returned by this call.
-+    __ mv(x9, ra);
-+    BLOCK_COMMENT("call exception_handler_for_return_address");
-+    __ call_VM_leaf(CAST_FROM_FN_PTR(address,
-+                         SharedRuntime::exception_handler_for_return_address),
-+                    xthread, c_rarg1);
-+    // we should not really care that ra is no longer the callee
-+    // address. we saved the value the handler needs in x9 so we can
-+    // just copy it to x13. however, the C2 handler will push its own
-+    // frame and then calls into the VM and the VM code asserts that
-+    // the PC for the frame above the handler belongs to a compiled
-+    // Java method. So, we restore ra here to satisfy that assert.
-+    __ mv(ra, x9);
-+    // setup x10 & x13 & clear pending exception
-+    __ mv(x13, x9);
-+    __ mv(x9, x10);
-+    __ ld(x10, Address(xthread, Thread::pending_exception_offset()));
-+    __ sd(zr, Address(xthread, Thread::pending_exception_offset()));
++    copy_memory(aligned, s, d, count, t0, -size);
 +
-+#ifdef ASSERT
-+    // make sure exception is set
-+    {
-+      Label L;
-+      __ bnez(x10, L);
-+      __ stop("StubRoutines::forward exception: no pending exception (2)");
-+      __ bind(L);
++    if (is_oop) {
++      __ pop_reg(RegSet::of(d, count), sp);
++      if (VerifyOops) {
++        verify_oop_array(size, d, count, t2);
++      }
 +    }
-+#endif
-+
-+    // continue at exception handler
-+    // x10: exception
-+    // x13: throwing pc
-+    // x9: exception handler
-+    __ verify_oop(x10);
-+    __ jr(x9);
-+
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet());
++    __ leave();
++    __ mv(x10, zr); // return 0
++    __ ret();
 +    return start;
 +  }
 +
-+  // Non-destructive plausibility checks for oops
-+  //
 +  // Arguments:
-+  //    x10: oop to verify
-+  //    t0: error message
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
 +  //
-+  // Stack after saving c_rarg3:
-+  //    [tos + 0]: saved c_rarg3
-+  //    [tos + 1]: saved c_rarg2
-+  //    [tos + 2]: saved ra
-+  //    [tos + 3]: saved t1
-+  //    [tos + 4]: saved x10
-+  //    [tos + 5]: saved t0
-+  address generate_verify_oop() {
-+
-+    StubCodeMark mark(this, "StubRoutines", "verify_oop");
-+    address start = __ pc();
-+
-+    Label exit, error;
-+
-+    __ push_reg(0x3000, sp);   // save c_rarg2 and c_rarg3
-+
-+    __ la(c_rarg2, ExternalAddress((address) StubRoutines::verify_oop_count_addr()));
-+    __ ld(c_rarg3, Address(c_rarg2));
-+    __ add(c_rarg3, c_rarg3, 1);
-+    __ sd(c_rarg3, Address(c_rarg2));
-+
-+    // object is in x10
-+    // make sure object is 'reasonable'
-+    __ beqz(x10, exit); // if obj is NULL it is OK
-+
-+#if INCLUDE_ZGC
-+    if (UseZGC) {
-+      // Check if mask is good.
-+      // verifies that ZAddressBadMask & x10 == 0
-+      __ ld(c_rarg3, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
-+      __ andr(c_rarg2, x10, c_rarg3);
-+      __ bnez(c_rarg2, error);
-+    }
-+#endif
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  // Side Effects:
++  //   disjoint_byte_copy_entry is set to the no-overlap entry point  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  // Side Effects:
++  //   disjoint_byte_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_byte_copy().
++  //
++  address generate_disjoint_byte_copy(bool aligned, address* entry, const char* name) {
++    const bool not_oop = false;
++    return generate_disjoint_copy(sizeof (jbyte), aligned, not_oop, entry, name);
++  }
 +
-+    // Check if the oop is in the right area of memory
-+    __ mv(c_rarg3, (intptr_t) Universe::verify_oop_mask());
-+    __ andr(c_rarg2, x10, c_rarg3);
-+    __ mv(c_rarg3, (intptr_t) Universe::verify_oop_bits());
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  address generate_conjoint_byte_copy(bool aligned, address nooverlap_target,
++                                      address* entry, const char* name) {
++    const bool not_oop = false;
++    return generate_conjoint_copy(sizeof (jbyte), aligned, not_oop, nooverlap_target, entry, name);
++  }
 +
-+    // Compare c_rarg2 and c_rarg3.
-+    __ bne(c_rarg2, c_rarg3, error);
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
++  // let the hardware handle it.  The two or four words within dwords
++  // or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  // Side Effects:
++  //   disjoint_short_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_short_copy().
++  //
++  address generate_disjoint_short_copy(bool aligned,
++                                       address* entry, const char* name) {
++    const bool not_oop = false;
++    return generate_disjoint_copy(sizeof (jshort), aligned, not_oop, entry, name);
++  }
 +
-+    // make sure klass is 'reasonable', which is not zero.
-+    __ load_klass(x10, x10);  // get klass
-+    __ beqz(x10, error);      // if klass is NULL it is broken
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
++  // let the hardware handle it.  The two or four words within dwords
++  // or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  address generate_conjoint_short_copy(bool aligned, address nooverlap_target,
++                                       address* entry, const char* name) {
++    const bool not_oop = false;
++    return generate_conjoint_copy(sizeof (jshort), aligned, not_oop, nooverlap_target, entry, name);
++  }
 +
-+    // return if everything seems ok
-+    __ bind(exit);
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  // Side Effects:
++  //   disjoint_int_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_int_oop_copy().
++  //
++  address generate_disjoint_int_copy(bool aligned, address* entry,
++                                     const char* name, bool dest_uninitialized = false) {
++    const bool not_oop = false;
++    return generate_disjoint_copy(sizeof (jint), aligned, not_oop, entry, name);
++  }
 +
-+    __ pop_reg(0x3000, sp);   // pop c_rarg2 and c_rarg3
-+    __ ret();
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  address generate_conjoint_int_copy(bool aligned, address nooverlap_target,
++                                     address* entry, const char* name,
++                                     bool dest_uninitialized = false) {
++    const bool not_oop = false;
++    return generate_conjoint_copy(sizeof (jint), aligned, not_oop, nooverlap_target, entry, name);
++  }
 +
-+    // handle errors
-+    __ bind(error);
-+    __ pop_reg(0x3000, sp);   // pop c_rarg2 and c_rarg3
 +
-+    __ pusha();
-+    // debug(char* msg, int64_t pc, int64_t regs[])
-+    __ mv(c_rarg0, t0);             // pass address of error message
-+    __ mv(c_rarg1, ra);             // pass return address
-+    __ mv(c_rarg2, sp);             // pass address of regs on stack
-+#ifndef PRODUCT
-+    assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
-+#endif
-+    BLOCK_COMMENT("call MacroAssembler::debug");
-+    int32_t offset = 0;
-+    __ movptr_with_offset(t0, CAST_FROM_FN_PTR(address, MacroAssembler::debug64), offset);
-+    __ jalr(x1, t0, offset);
-+    __ ebreak();
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as size_t, can be zero
++  //
++  // Side Effects:
++  //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
++  //   no-overlap entry point used by generate_conjoint_long_oop_copy().
++  //
++  address generate_disjoint_long_copy(bool aligned, address* entry,
++                                      const char* name, bool dest_uninitialized = false) {
++    const bool not_oop = false;
++    return generate_disjoint_copy(sizeof (jlong), aligned, not_oop, entry, name);
++  }
 +
-+    return start;
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as size_t, can be zero
++  //
++  address generate_conjoint_long_copy(bool aligned,
++                                      address nooverlap_target, address* entry,
++                                      const char* name, bool dest_uninitialized = false) {
++    const bool not_oop = false;
++    return generate_conjoint_copy(sizeof (jlong), aligned, not_oop, nooverlap_target, entry, name);
 +  }
 +
-+  // The inner part of zero_words().
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
++  //             ignored
++  //   name    - stub name string
 +  //
 +  // Inputs:
-+  // x28: the HeapWord-aligned base address of an array to zero.
-+  // x29: the count in HeapWords, x29 > 0.
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as size_t, can be zero
 +  //
-+  // Returns x28 and x29, adjusted for the caller to clear.
-+  // x28: the base address of the tail of words left to clear.
-+  // x29: the number of words in the tail.
-+  //      x29 < MacroAssembler::zero_words_block_size.
++  // Side Effects:
++  //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
++  //   no-overlap entry point used by generate_conjoint_long_oop_copy().
++  //
++  address generate_disjoint_oop_copy(bool aligned, address* entry,
++                                     const char* name, bool dest_uninitialized) {
++    const bool is_oop = true;
++    const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
++    return generate_disjoint_copy(size, aligned, is_oop, entry, name, dest_uninitialized);
++  }
 +
-+  address generate_zero_blocks() {
-+    Label done;
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as size_t, can be zero
++  //
++  address generate_conjoint_oop_copy(bool aligned,
++                                     address nooverlap_target, address* entry,
++                                     const char* name, bool dest_uninitialized) {
++    const bool is_oop = true;
++    const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
++    return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry,
++                                  name, dest_uninitialized);
++  }
 +
-+    const Register base = x28, cnt = x29;
++  // Helper for generating a dynamic type check.
++  // Smashes t0, t1.
++  void generate_type_check(Register sub_klass,
++                           Register super_check_offset,
++                           Register super_klass,
++                           Label& L_success) {
++    assert_different_registers(sub_klass, super_check_offset, super_klass);
 +
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", "zero_blocks");
-+    address start = __ pc();
++    BLOCK_COMMENT("type_check:");
 +
-+    {
-+      // Clear the remaining blocks.
-+      Label loop;
-+      __ sub(cnt, cnt, MacroAssembler::zero_words_block_size);
-+      __ bltz(cnt, done);
-+      __ bind(loop);
-+      for (int i = 0; i < MacroAssembler::zero_words_block_size; i++) {
-+        __ sd(zr, Address(base, 0));
-+        __ add(base, base, 8);
-+      }
-+      __ sub(cnt, cnt, MacroAssembler::zero_words_block_size);
-+      __ bgez(cnt, loop);
-+      __ bind(done);
-+      __ add(cnt, cnt, MacroAssembler::zero_words_block_size);
-+    }
++    Label L_miss;
 +
-+    __ ret();
++    __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, NULL, super_check_offset);
++    __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL);
 +
-+    return start;
++    // Fall through on failure!
++    __ BIND(L_miss);
 +  }
 +
-+  typedef enum {
-+    copy_forwards = 1,
-+    copy_backwards = -1
-+  } copy_direction;
-+
-+  // Bulk copy of blocks of 8 words.
-+  //
-+  // count is a count of words.
 +  //
-+  // Precondition: count >= 8
-+  //
-+  // Postconditions:
++  //  Generate checkcasting array copy stub
 +  //
-+  // The least significant bit of count contains the remaining count
-+  // of words to copy.  The rest of count is trash.
++  //  Input:
++  //    c_rarg0   - source array address
++  //    c_rarg1   - destination array address
++  //    c_rarg2   - element count, treated as ssize_t, can be zero
++  //    c_rarg3   - size_t ckoff (super_check_offset)
++  //    c_rarg4   - oop ckval (super_klass)
 +  //
-+  // s and d are adjusted to point to the remaining words to copy
++  //  Output:
++  //    x10 ==  0  -  success
++  //    x10 == -1^K - failure, where K is partial transfer count
 +  //
-+  void generate_copy_longs(Label &start, Register s, Register d, Register count,
-+                           copy_direction direction) {
-+    int unit = wordSize * direction;
-+    int bias = wordSize;
++  address generate_checkcast_copy(const char* name, address* entry,
++                                  bool dest_uninitialized = false) {
++    Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop;
 +
-+    const Register tmp_reg0 = x13, tmp_reg1 = x14, tmp_reg2 = x15, tmp_reg3 = x16,
-+      tmp_reg4 = x17, tmp_reg5 = x7, tmp_reg6 = x28, tmp_reg7 = x29;
++    // Input registers (after setup_arg_regs)
++    const Register from        = c_rarg0;   // source array address
++    const Register to          = c_rarg1;   // destination array address
++    const Register count       = c_rarg2;   // elementscount
++    const Register ckoff       = c_rarg3;   // super_check_offset
++    const Register ckval       = c_rarg4;   // super_klass
 +
-+    const Register stride = x30;
++    RegSet wb_pre_saved_regs   = RegSet::range(c_rarg0, c_rarg4);
++    RegSet wb_post_saved_regs  = RegSet::of(count);
 +
-+    assert_different_registers(t0, tmp_reg0, tmp_reg1, tmp_reg2, tmp_reg3,
-+      tmp_reg4, tmp_reg5, tmp_reg6, tmp_reg7);
-+    assert_different_registers(s, d, count, t0);
++    // Registers used as temps (x7, x9, x18 are save-on-entry)
++    const Register count_save  = x19;       // orig elementscount
++    const Register start_to    = x18;       // destination array start address
++    const Register copied_oop  = x7;        // actual oop copied
++    const Register r9_klass    = x9;        // oop._klass
++
++    //---------------------------------------------------------------
++    // Assembler stub will be used for this call to arraycopy
++    // if the two arrays are subtypes of Object[] but the
++    // destination array type is not equal to or a supertype
++    // of the source type.  Each element must be separately
++    // checked.
++
++    assert_different_registers(from, to, count, ckoff, ckval, start_to,
++                               copied_oop, r9_klass, count_save);
 +
-+    Label again, drain;
-+    const char* stub_name = NULL;
-+    if (direction == copy_forwards) {
-+      stub_name = "forward_copy_longs";
-+    } else {
-+      stub_name = "backward_copy_longs";
-+    }
-+    StubCodeMark mark(this, "StubRoutines", stub_name);
 +    __ align(CodeEntryAlignment);
-+    __ bind(start);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
 +
-+    if (direction == copy_forwards) {
-+      __ sub(s, s, bias);
-+      __ sub(d, d, bias);
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    // Caller of this entry point must set up the argument registers.
++    if (entry != NULL) {
++      *entry = __ pc();
++      BLOCK_COMMENT("Entry:");
 +    }
 +
-+#ifdef ASSERT
-+    // Make sure we are never given < 8 words
-+    {
-+      Label L;
++    // Empty array:  Nothing to do
++    __ beqz(count, L_done);
 +
-+      __ li(t0, 8);
-+      __ bge(count, t0, L);
-+      __ stop("genrate_copy_longs called with < 8 words");
++    __ push_reg(RegSet::of(x7, x9, x18, x19), sp);
++
++#ifdef ASSERT
++    BLOCK_COMMENT("assert consistent ckoff/ckval");
++    // The ckoff and ckval must be mutually consistent,
++    // even though caller generates both.
++    { Label L;
++      int sco_offset = in_bytes(Klass::super_check_offset_offset());
++      __ lwu(start_to, Address(ckval, sco_offset));
++      __ beq(ckoff, start_to, L);
++      __ stop("super_check_offset inconsistent");
 +      __ bind(L);
 +    }
-+#endif
-+
-+    __ ld(tmp_reg0, Address(s, 1 * unit));
-+    __ ld(tmp_reg1, Address(s, 2 * unit));
-+    __ ld(tmp_reg2, Address(s, 3 * unit));
-+    __ ld(tmp_reg3, Address(s, 4 * unit));
-+    __ ld(tmp_reg4, Address(s, 5 * unit));
-+    __ ld(tmp_reg5, Address(s, 6 * unit));
-+    __ ld(tmp_reg6, Address(s, 7 * unit));
-+    __ ld(tmp_reg7, Address(s, 8 * unit));
-+    __ addi(s, s, 8 * unit);
++#endif //ASSERT
 +
-+    __ sub(count, count, 16);
-+    __ bltz(count, drain);
-+
-+    __ bind(again);
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT;
++    bool is_oop = true;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
 +
-+    __ sd(tmp_reg0, Address(d, 1 * unit));
-+    __ sd(tmp_reg1, Address(d, 2 * unit));
-+    __ sd(tmp_reg2, Address(d, 3 * unit));
-+    __ sd(tmp_reg3, Address(d, 4 * unit));
-+    __ sd(tmp_reg4, Address(d, 5 * unit));
-+    __ sd(tmp_reg5, Address(d, 6 * unit));
-+    __ sd(tmp_reg6, Address(d, 7 * unit));
-+    __ sd(tmp_reg7, Address(d, 8 * unit));
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->arraycopy_prologue(_masm, decorators, is_oop, from, to, count, wb_pre_saved_regs);
 +
-+    __ ld(tmp_reg0, Address(s, 1 * unit));
-+    __ ld(tmp_reg1, Address(s, 2 * unit));
-+    __ ld(tmp_reg2, Address(s, 3 * unit));
-+    __ ld(tmp_reg3, Address(s, 4 * unit));
-+    __ ld(tmp_reg4, Address(s, 5 * unit));
-+    __ ld(tmp_reg5, Address(s, 6 * unit));
-+    __ ld(tmp_reg6, Address(s, 7 * unit));
-+    __ ld(tmp_reg7, Address(s, 8 * unit));
++    // save the original count
++    __ mv(count_save, count);
 +
-+    __ addi(s, s, 8 * unit);
-+    __ addi(d, d, 8 * unit);
++    // Copy from low to high addresses
++    __ mv(start_to, to);              // Save destination array start address
++    __ j(L_load_element);
 +
-+    __ sub(count, count, 8);
-+    __ bgez(count, again);
++    // ======== begin loop ========
++    // (Loop is rotated; its entry is L_load_element.)
++    // Loop control:
++    //   for count to 0 do
++    //     copied_oop = load_heap_oop(from++)
++    //     ... generate_type_check ...
++    //     store_heap_oop(to++, copied_oop)
++    //   end
 +
-+    // Drain
-+    __ bind(drain);
++    __ align(OptoLoopAlignment);
 +
-+    __ sd(tmp_reg0, Address(d, 1 * unit));
-+    __ sd(tmp_reg1, Address(d, 2 * unit));
-+    __ sd(tmp_reg2, Address(d, 3 * unit));
-+    __ sd(tmp_reg3, Address(d, 4 * unit));
-+    __ sd(tmp_reg4, Address(d, 5 * unit));
-+    __ sd(tmp_reg5, Address(d, 6 * unit));
-+    __ sd(tmp_reg6, Address(d, 7 * unit));
-+    __ sd(tmp_reg7, Address(d, 8 * unit));
-+    __ addi(d, d, 8 * unit);
++    __ BIND(L_store_element);
++    __ store_heap_oop(Address(to, 0), copied_oop, noreg, noreg, AS_RAW);  // store the oop
++    __ add(to, to, UseCompressedOops ? 4 : 8);
++    __ sub(count, count, 1);
++    __ beqz(count, L_do_card_marks);
 +
-+    {
-+      Label L1, L2;
-+      __ andi(t0, count, 4);
-+      __ beqz(t0, L1);
++    // ======== loop entry is here ========
++    __ BIND(L_load_element);
++    __ load_heap_oop(copied_oop, Address(from, 0), noreg, noreg, AS_RAW); // load the oop
++    __ add(from, from, UseCompressedOops ? 4 : 8);
++    __ beqz(copied_oop, L_store_element);
 +
-+      __ ld(tmp_reg0, Address(s, 1 * unit));
-+      __ ld(tmp_reg1, Address(s, 2 * unit));
-+      __ ld(tmp_reg2, Address(s, 3 * unit));
-+      __ ld(tmp_reg3, Address(s, 4 * unit));
-+      __ addi(s, s, 4 * unit);
++    __ load_klass(r9_klass, copied_oop);// query the object klass
++    generate_type_check(r9_klass, ckoff, ckval, L_store_element);
++    // ======== end loop ========
 +
-+      __ sd(tmp_reg0, Address(d, 1 * unit));
-+      __ sd(tmp_reg1, Address(d, 2 * unit));
-+      __ sd(tmp_reg2, Address(d, 3 * unit));
-+      __ sd(tmp_reg3, Address(d, 4 * unit));
-+      __ addi(d, d, 4 * unit);
++    // It was a real error; we must depend on the caller to finish the job.
++    // Register count = remaining oops, count_orig = total oops.
++    // Emit GC store barriers for the oops we have copied and report
++    // their number to the caller.
 +
-+      __ bind(L1);
++    __ sub(count, count_save, count);     // K = partially copied oop count
++    __ xori(count, count, -1);                   // report (-1^K) to caller
++    __ beqz(count, L_done_pop);
 +
-+      if (direction == copy_forwards) {
-+        __ addi(s, s, bias);
-+        __ addi(d, d, bias);
-+      }
++    __ BIND(L_do_card_marks);
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, t0, wb_post_saved_regs);
 +
-+      __ andi(t0, count, 2);
-+      __ beqz(t0, L2);
-+      if (direction == copy_backwards) {
-+        __ addi(s, s, 2 * unit);
-+        __ ld(tmp_reg0, Address(s));
-+        __ ld(tmp_reg1, Address(s, wordSize));
-+        __ addi(d, d, 2 * unit);
-+        __ sd(tmp_reg0, Address(d));
-+        __ sd(tmp_reg1, Address(d, wordSize));
-+      } else {
-+        __ ld(tmp_reg0, Address(s));
-+        __ ld(tmp_reg1, Address(s, wordSize));
-+        __ addi(s, s, 2 * unit);
-+        __ sd(tmp_reg0, Address(d));
-+        __ sd(tmp_reg1, Address(d, wordSize));
-+        __ addi(d, d, 2 * unit);
-+      }
-+      __ bind(L2);
-+    }
++    __ bind(L_done_pop);
++    __ pop_reg(RegSet::of(x7, x9, x18, x19), sp);
++    inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
 +
++    __ bind(L_done);
++    __ mv(x10, count);
++    __ leave();
 +    __ ret();
-+  }
-+
-+  Label copy_f, copy_b;
-+
-+  // All-singing all-dancing memory copy.
-+  //
-+  // Copy count units of memory from s to d.  The size of a unit is
-+  // step, which can be positive or negative depending on the direction
-+  // of copy.  If is_aligned is false, we align the source address.
-+  //
-+  /*
-+   * if (is_aligned) {
-+   *   goto copy_8_bytes;
-+   * }
-+   * bool is_backwards = step < 0;
-+   * int granularity = uabs(step);
-+   * count = count  *  granularity;   * count bytes
-+   *
-+   * if (is_backwards) {
-+   *   s += count;
-+   *   d += count;
-+   * }
-+   *
-+   * count limit maybe greater than 16, for better performance
-+   * if (count < 16) {
-+   *   goto copy_small;
-+   * }
-+   *
-+   * if ((dst % 8) == (src % 8)) {
-+   *   aligned;
-+   *   goto copy8;
-+   * }
-+   *
-+   * copy_small:
-+   *   load element one by one;
-+   * done;
-+   */
-+
-+  typedef void (MacroAssembler::*copy_insn)(Register Rd, const Address &adr, Register temp);
-+
-+  void copy_memory_v(Register s, Register d, Register count, Register tmp, int step) {
-+    bool is_backward = step < 0;
-+    int granularity = uabs(step);
-+
-+    const Register src = x30, dst = x31, vl = x14, cnt = x15, tmp1 = x16, tmp2 = x17;
-+    assert_different_registers(s, d, cnt, vl, tmp, tmp1, tmp2);
-+    Assembler::SEW sew = Assembler::elembytes_to_sew(granularity);
-+    Label loop_forward, loop_backward, done;
-+
-+    __ mv(dst, d);
-+    __ mv(src, s);
-+    __ mv(cnt, count);
-+
-+    __ bind(loop_forward);
-+    __ vsetvli(vl, cnt, sew, Assembler::m8);
-+    if (is_backward) {
-+      __ bne(vl, cnt, loop_backward);
-+    }
-+
-+    __ vlex_v(v0, src, sew);
-+    __ sub(cnt, cnt, vl);
-+    __ slli(vl, vl, (int)sew);
-+    __ add(src, src, vl);
-+
-+    __ vsex_v(v0, dst, sew);
-+    __ add(dst, dst, vl);
-+    __ bnez(cnt, loop_forward);
-+
-+    if (is_backward) {
-+      __ j(done);
 +
-+      __ bind(loop_backward);
-+      __ sub(tmp, cnt, vl);
-+      __ slli(tmp, tmp, sew);
-+      __ add(tmp1, s, tmp);
-+      __ vlex_v(v0, tmp1, sew);
-+      __ add(tmp2, d, tmp);
-+      __ vsex_v(v0, tmp2, sew);
-+      __ sub(cnt, cnt, vl);
-+      __ bnez(cnt, loop_forward);
-+      __ bind(done);
-+    }
++    return start;
 +  }
 +
-+  void copy_memory(bool is_aligned, Register s, Register d,
-+                   Register count, Register tmp, int step) {
-+    if (UseRVV) {
-+      return copy_memory_v(s, d, count, tmp, step);
-+    }
-+
-+    bool is_backwards = step < 0;
-+    int granularity = uabs(step);
-+
-+    const Register src = x30, dst = x31, cnt = x15, tmp3 = x16, tmp4 = x17;
-+
-+    Label same_aligned;
-+    Label copy8, copy_small, done;
-+
-+    copy_insn ld_arr = NULL, st_arr = NULL;
-+    switch (granularity) {
-+      case 1 :
-+        ld_arr = (copy_insn)&MacroAssembler::lbu;
-+        st_arr = (copy_insn)&MacroAssembler::sb;
-+        break;
-+      case 2 :
-+        ld_arr = (copy_insn)&MacroAssembler::lhu;
-+        st_arr = (copy_insn)&MacroAssembler::sh;
-+        break;
-+      case 4 :
-+        ld_arr = (copy_insn)&MacroAssembler::lwu;
-+        st_arr = (copy_insn)&MacroAssembler::sw;
-+        break;
-+      case 8 :
-+        ld_arr = (copy_insn)&MacroAssembler::ld;
-+        st_arr = (copy_insn)&MacroAssembler::sd;
-+        break;
-+      default :
-+        ShouldNotReachHere();
-+    }
-+
-+    __ beqz(count, done);
-+    __ slli(cnt, count, exact_log2(granularity));
-+    if (is_backwards) {
-+      __ add(src, s, cnt);
-+      __ add(dst, d, cnt);
-+    } else {
-+      __ mv(src, s);
-+      __ mv(dst, d);
-+    }
-+
-+    if (is_aligned) {
-+      __ addi(tmp, cnt, -8);
-+      __ bgez(tmp, copy8);
-+      __ j(copy_small);
-+    }
-+
-+    __ mv(tmp, 16);
-+    __ blt(cnt, tmp, copy_small);
-+
-+    __ xorr(tmp, src, dst);
-+    __ andi(tmp, tmp, 0b111);
-+    __ bnez(tmp, copy_small);
-+
-+    __ bind(same_aligned);
-+    __ andi(tmp, src, 0b111);
-+    __ beqz(tmp, copy8);
-+    if (is_backwards) {
-+      __ addi(src, src, step);
-+      __ addi(dst, dst, step);
-+    }
-+    (_masm->*ld_arr)(tmp3, Address(src), t0);
-+    (_masm->*st_arr)(tmp3, Address(dst), t0);
-+    if (!is_backwards) {
-+      __ addi(src, src, step);
-+      __ addi(dst, dst, step);
-+    }
-+    __ addi(cnt, cnt, -granularity);
-+    __ beqz(cnt, done);
-+    __ j(same_aligned);
-+
-+    __ bind(copy8);
-+    if (is_backwards) {
-+      __ addi(src, src, -wordSize);
-+      __ addi(dst, dst, -wordSize);
-+    }
-+    __ ld(tmp3, Address(src));
-+    __ sd(tmp3, Address(dst));
-+    if (!is_backwards) {
-+      __ addi(src, src, wordSize);
-+      __ addi(dst, dst, wordSize);
-+    }
-+    __ addi(cnt, cnt, -wordSize);
-+    __ addi(tmp4, cnt, -8);
-+    __ bgez(tmp4, copy8); // cnt >= 8, do next loop
++  // Perform range checks on the proposed arraycopy.
++  // Kills temp, but nothing else.
++  // Also, clean the sign bits of src_pos and dst_pos.
++  void arraycopy_range_checks(Register src,     // source array oop (c_rarg0)
++                              Register src_pos, // source position (c_rarg1)
++                              Register dst,     // destination array oo (c_rarg2)
++                              Register dst_pos, // destination position (c_rarg3)
++                              Register length,
++                              Register temp,
++                              Label& L_failed) {
++    BLOCK_COMMENT("arraycopy_range_checks:");
 +
-+    __ beqz(cnt, done);
++    assert_different_registers(t0, temp);
 +
-+    __ bind(copy_small);
-+    if (is_backwards) {
-+      __ addi(src, src, step);
-+      __ addi(dst, dst, step);
-+    }
-+    (_masm->*ld_arr)(tmp3, Address(src), t0);
-+    (_masm->*st_arr)(tmp3, Address(dst), t0);
-+    if (!is_backwards) {
-+      __ addi(src, src, step);
-+      __ addi(dst, dst, step);
-+    }
-+    __ addi(cnt, cnt, -granularity);
-+    __ bgtz(cnt, copy_small);
++    // if [src_pos + length > arrayOop(src)->length()] then FAIL
++    __ lwu(t0, Address(src, arrayOopDesc::length_offset_in_bytes()));
++    __ addw(temp, length, src_pos);
++    __ bgtu(temp, t0, L_failed);
 +
-+    __ bind(done);
-+  }
++    // if [dst_pos + length > arrayOop(dst)->length()] then FAIL
++    __ lwu(t0, Address(dst, arrayOopDesc::length_offset_in_bytes()));
++    __ addw(temp, length, dst_pos);
++    __ bgtu(temp, t0, L_failed);
 +
-+  // Scan over array at a for count oops, verifying each one.
-+  // Preserves a and count, clobbers t0 and t1.
-+  void verify_oop_array(size_t size, Register a, Register count, Register temp) {
-+    Label loop, end;
-+    __ mv(t1, zr);
-+    __ slli(t0, count, exact_log2(size));
-+    __ bind(loop);
-+    __ bgeu(t1, t0, end);
++    // Have to clean up high 32 bits of 'src_pos' and 'dst_pos'.
++    __ zero_extend(src_pos, src_pos, 32);
++    __ zero_extend(dst_pos, dst_pos, 32);
 +
-+    __ add(temp, a, t1);
-+    if (size == (size_t)wordSize) {
-+      __ ld(temp, Address(temp, 0));
-+      __ verify_oop(temp);
-+    } else {
-+      __ lwu(temp, Address(temp, 0));
-+      __ decode_heap_oop(temp); // calls verify_oop
-+    }
-+    __ add(t1, t1, size);
-+    __ j(loop);
-+    __ bind(end);
++    BLOCK_COMMENT("arraycopy_range_checks done");
 +  }
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
-+  //             ignored
-+  //   is_oop  - true => oop array, so generate store check code
-+  //   name    - stub name string
 +  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //  Generate 'unsafe' array copy stub
++  //  Though just as safe as the other stubs, it takes an unscaled
++  //  size_t argument instead of an element count.
 +  //
-+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
-+  // the hardware handle it.  The two dwords within qwords that span
-+  // cache line boundaries will still be loaded and stored atomicly.
++  //  Input:
++  //    c_rarg0   - source array address
++  //    c_rarg1   - destination array address
++  //    c_rarg2   - byte count, treated as ssize_t, can be zero
 +  //
-+  // Side Effects:
-+  //   disjoint_int_copy_entry is set to the no-overlap entry point
-+  //   used by generate_conjoint_int_oop_copy().
++  // Examines the alignment of the operands and dispatches
++  // to a long, int, short, or byte copy loop.
 +  //
-+  address generate_disjoint_copy(size_t size, bool aligned, bool is_oop, address* entry,
-+                                 const char* name, bool dest_uninitialized = false) {
++  address generate_unsafe_copy(const char* name,
++                               address byte_copy_entry,
++                               address short_copy_entry,
++                               address int_copy_entry,
++                               address long_copy_entry) {
++    assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL &&
++                int_copy_entry != NULL && long_copy_entry != NULL);
++    Label L_long_aligned, L_int_aligned, L_short_aligned;
 +    const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
-+    RegSet saved_reg = RegSet::of(s, d, count);
++
 +    __ align(CodeEntryAlignment);
 +    StubCodeMark mark(this, "StubRoutines", name);
 +    address start = __ pc();
-+    __ enter();
-+
-+    if (entry != NULL) {
-+      *entry = __ pc();
-+      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
-+      BLOCK_COMMENT("Entry:");
-+    }
-+
-+    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
-+    if (dest_uninitialized) {
-+      decorators |= IS_DEST_UNINITIALIZED;
-+    }
-+    if (aligned) {
-+      decorators |= ARRAYCOPY_ALIGNED;
-+    }
-+
-+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+    bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_reg);
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
 +
-+    if (is_oop) {
-+      // save regs before copy_memory
-+      __ push_reg(RegSet::of(d, count), sp);
-+    }
++    // bump this on entry, not on exit:
++    inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr);
 +
-+    {
-+      // UnsafeCopyMemory page error: continue after ucm
-+      bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size);
-+      UnsafeCopyMemoryMark ucmm(this, add_entry, true);
-+      copy_memory(aligned, s, d, count, t0, size);
-+    }
++    __ orr(t0, s, d);
++    __ orr(t0, t0, count);
 +
-+    if (is_oop) {
-+      __ pop_reg(RegSet::of(d, count), sp);
-+      if (VerifyOops) {
-+        verify_oop_array(size, d, count, t2);
-+      }
-+    }
++    __ andi(t0, t0, BytesPerLong - 1);
++    __ beqz(t0, L_long_aligned);
++    __ andi(t0, t0, BytesPerInt - 1);
++    __ beqz(t0, L_int_aligned);
++    __ andi(t0, t0, 1);
++    __ beqz(t0, L_short_aligned);
++    __ j(RuntimeAddress(byte_copy_entry));
 +
-+    bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet());
++    __ BIND(L_short_aligned);
++    __ srli(count, count, LogBytesPerShort);  // size => short_count
++    __ j(RuntimeAddress(short_copy_entry));
++    __ BIND(L_int_aligned);
++    __ srli(count, count, LogBytesPerInt);    // size => int_count
++    __ j(RuntimeAddress(int_copy_entry));
++    __ BIND(L_long_aligned);
++    __ srli(count, count, LogBytesPerLong);   // size => long_count
++    __ j(RuntimeAddress(long_copy_entry));
 +
-+    __ leave();
-+    __ mv(x10, zr); // return 0
-+    __ ret();
 +    return start;
 +  }
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
-+  //             ignored
-+  //   is_oop  - true => oop array, so generate store check code
-+  //   name    - stub name string
 +  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //  Generate generic array copy stubs
 +  //
-+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
-+  // the hardware handle it.  The two dwords within qwords that span
-+  // cache line boundaries will still be loaded and stored atomicly.
++  //  Input:
++  //    c_rarg0    -  src oop
++  //    c_rarg1    -  src_pos (32-bits)
++  //    c_rarg2    -  dst oop
++  //    c_rarg3    -  dst_pos (32-bits)
++  //    c_rarg4    -  element count (32-bits)
 +  //
-+  address generate_conjoint_copy(size_t size, bool aligned, bool is_oop, address nooverlap_target,
-+                                 address* entry, const char* name,
-+                                 bool dest_uninitialized = false) {
-+    const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
-+    RegSet saved_regs = RegSet::of(s, d, count);
-+    StubCodeMark mark(this, "StubRoutines", name);
-+    address start = __ pc();
-+    __ enter();
++  //  Output:
++  //    x10 ==  0  -  success
++  //    x10 == -1^K - failure, where K is partial transfer count
++  //
++  address generate_generic_copy(const char* name,
++                                address byte_copy_entry, address short_copy_entry,
++                                address int_copy_entry, address oop_copy_entry,
++                                address long_copy_entry, address checkcast_copy_entry) {
++    assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL &&
++                int_copy_entry != NULL && oop_copy_entry != NULL &&
++                long_copy_entry != NULL && checkcast_copy_entry != NULL);
++    Label L_failed, L_failed_0, L_objArray;
++    Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs;
 +
-+    if (entry != NULL) {
-+      *entry = __ pc();
-+      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
-+      BLOCK_COMMENT("Entry:");
-+    }
++    // Input registers
++    const Register src        = c_rarg0;  // source array oop
++    const Register src_pos    = c_rarg1;  // source position
++    const Register dst        = c_rarg2;  // destination array oop
++    const Register dst_pos    = c_rarg3;  // destination position
++    const Register length     = c_rarg4;
 +
-+    // use fwd copy when (d-s) above_equal (count*size)
-+    __ sub(t0, d, s);
-+    __ slli(t1, count, exact_log2(size));
-+    __ bgeu(t0, t1, nooverlap_target);
++    // Registers used as temps
++    const Register dst_klass = c_rarg5;
 +
-+    DecoratorSet decorators = IN_HEAP | IS_ARRAY;
-+    if (dest_uninitialized) {
-+      decorators |= IS_DEST_UNINITIALIZED;
-+    }
-+    if (aligned) {
-+      decorators |= ARRAYCOPY_ALIGNED;
-+    }
++    __ align(CodeEntryAlignment);
 +
-+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+    bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_regs);
++    StubCodeMark mark(this, "StubRoutines", name);
 +
-+    if (is_oop) {
-+      // save regs before copy_memory
-+      __ push_reg(RegSet::of(d, count), sp);
-+    }
-+
-+    {
-+      // UnsafeCopyMemory page error: continue after ucm
-+      bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size);
-+      UnsafeCopyMemoryMark ucmm(this, add_entry, true);
-+      copy_memory(aligned, s, d, count, t0, -size);
-+    }
-+
-+    if (is_oop) {
-+      __ pop_reg(RegSet::of(d, count), sp);
-+      if (VerifyOops) {
-+        verify_oop_array(size, d, count, t2);
-+      }
-+    }
-+    bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet());
-+    __ leave();
-+    __ mv(x10, zr); // return 0
-+    __ ret();
-+    return start;
-+  }
++    address start = __ pc();
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as ssize_t, can be zero
-+  //
-+  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
-+  // we let the hardware handle it.  The one to eight bytes within words,
-+  // dwords or qwords that span cache line boundaries will still be loaded
-+  // and stored atomically.
-+  //
-+  // Side Effects:
-+  //   disjoint_byte_copy_entry is set to the no-overlap entry point  //
-+  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
-+  // we let the hardware handle it.  The one to eight bytes within words,
-+  // dwords or qwords that span cache line boundaries will still be loaded
-+  // and stored atomically.
-+  //
-+  // Side Effects:
-+  //   disjoint_byte_copy_entry is set to the no-overlap entry point
-+  //   used by generate_conjoint_byte_copy().
-+  //
-+  address generate_disjoint_byte_copy(bool aligned, address* entry, const char* name) {
-+    const bool not_oop = false;
-+    return generate_disjoint_copy(sizeof (jbyte), aligned, not_oop, entry, name);
-+  }
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as ssize_t, can be zero
-+  //
-+  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
-+  // we let the hardware handle it.  The one to eight bytes within words,
-+  // dwords or qwords that span cache line boundaries will still be loaded
-+  // and stored atomically.
-+  //
-+  address generate_conjoint_byte_copy(bool aligned, address nooverlap_target,
-+                                      address* entry, const char* name) {
-+    const bool not_oop = false;
-+    return generate_conjoint_copy(sizeof (jbyte), aligned, not_oop, nooverlap_target, entry, name);
-+  }
++    // bump this on entry, not on exit:
++    inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as ssize_t, can be zero
-+  //
-+  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
-+  // let the hardware handle it.  The two or four words within dwords
-+  // or qwords that span cache line boundaries will still be loaded
-+  // and stored atomically.
-+  //
-+  // Side Effects:
-+  //   disjoint_short_copy_entry is set to the no-overlap entry point
-+  //   used by generate_conjoint_short_copy().
-+  //
-+  address generate_disjoint_short_copy(bool aligned,
-+                                       address* entry, const char* name) {
-+    const bool not_oop = false;
-+    return generate_disjoint_copy(sizeof (jshort), aligned, not_oop, entry, name);
-+  }
++    //-----------------------------------------------------------------------
++    // Assembler stub will be used for this call to arraycopy
++    // if the following conditions are met:
++    //
++    // (1) src and dst must not be null.
++    // (2) src_pos must not be negative.
++    // (3) dst_pos must not be negative.
++    // (4) length  must not be negative.
++    // (5) src klass and dst klass should be the same and not NULL.
++    // (6) src and dst should be arrays.
++    // (7) src_pos + length must not exceed length of src.
++    // (8) dst_pos + length must not exceed length of dst.
++    //
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as ssize_t, can be zero
-+  //
-+  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
-+  // let the hardware handle it.  The two or four words within dwords
-+  // or qwords that span cache line boundaries will still be loaded
-+  // and stored atomically.
-+  //
-+  address generate_conjoint_short_copy(bool aligned, address nooverlap_target,
-+                                       address* entry, const char* name) {
-+    const bool not_oop = false;
-+    return generate_conjoint_copy(sizeof (jshort), aligned, not_oop, nooverlap_target, entry, name);
-+  }
++    // if [src == NULL] then return -1
++    __ beqz(src, L_failed);
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as ssize_t, can be zero
-+  //
-+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
-+  // the hardware handle it.  The two dwords within qwords that span
-+  // cache line boundaries will still be loaded and stored atomicly.
-+  //
-+  // Side Effects:
-+  //   disjoint_int_copy_entry is set to the no-overlap entry point
-+  //   used by generate_conjoint_int_oop_copy().
-+  //
-+  address generate_disjoint_int_copy(bool aligned, address* entry,
-+                                     const char* name, bool dest_uninitialized = false) {
-+    const bool not_oop = false;
-+    return generate_disjoint_copy(sizeof (jint), aligned, not_oop, entry, name);
-+  }
++    // if [src_pos < 0] then return -1
++    // i.e. sign bit set
++    __ andi(t0, src_pos, 1UL << 31);
++    __ bnez(t0, L_failed);
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as ssize_t, can be zero
-+  //
-+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
-+  // the hardware handle it.  The two dwords within qwords that span
-+  // cache line boundaries will still be loaded and stored atomicly.
-+  //
-+  address generate_conjoint_int_copy(bool aligned, address nooverlap_target,
-+                                     address* entry, const char* name,
-+                                     bool dest_uninitialized = false) {
-+    const bool not_oop = false;
-+    return generate_conjoint_copy(sizeof (jint), aligned, not_oop, nooverlap_target, entry, name);
-+  }
++    // if [dst == NULL] then return -1
++    __ beqz(dst, L_failed);
 +
++    // if [dst_pos < 0] then return -1
++    // i.e. sign bit set
++    __ andi(t0, dst_pos, 1UL << 31);
++    __ bnez(t0, L_failed);
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as size_t, can be zero
-+  //
-+  // Side Effects:
-+  //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
-+  //   no-overlap entry point used by generate_conjoint_long_oop_copy().
-+  //
-+  address generate_disjoint_long_copy(bool aligned, address* entry,
-+                                      const char* name, bool dest_uninitialized = false) {
-+    const bool not_oop = false;
-+    return generate_disjoint_copy(sizeof (jlong), aligned, not_oop, entry, name);
-+  }
++    // registers used as temp
++    const Register scratch_length    = x28; // elements count to copy
++    const Register scratch_src_klass = x29; // array klass
++    const Register lh                = x30; // layout helper
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as size_t, can be zero
-+  //
-+  address generate_conjoint_long_copy(bool aligned,
-+                                      address nooverlap_target, address* entry,
-+                                      const char* name, bool dest_uninitialized = false) {
-+    const bool not_oop = false;
-+    return generate_conjoint_copy(sizeof (jlong), aligned, not_oop, nooverlap_target, entry, name);
-+  }
++    // if [length < 0] then return -1
++    __ addw(scratch_length, length, zr);    // length (elements count, 32-bits value)
++    // i.e. sign bit set
++    __ andi(t0, scratch_length, 1UL << 31);
++    __ bnez(t0, L_failed);
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as size_t, can be zero
-+  //
-+  // Side Effects:
-+  //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
-+  //   no-overlap entry point used by generate_conjoint_long_oop_copy().
-+  //
-+  address generate_disjoint_oop_copy(bool aligned, address* entry,
-+                                     const char* name, bool dest_uninitialized) {
-+    const bool is_oop = true;
-+    const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
-+    return generate_disjoint_copy(size, aligned, is_oop, entry, name, dest_uninitialized);
-+  }
++    __ load_klass(scratch_src_klass, src);
++#ifdef ASSERT
++    {
++      BLOCK_COMMENT("assert klasses not null {");
++      Label L1, L2;
++      __ bnez(scratch_src_klass, L2);   // it is broken if klass is NULL
++      __ bind(L1);
++      __ stop("broken null klass");
++      __ bind(L2);
++      __ load_klass(t0, dst);
++      __ beqz(t0, L1);     // this would be broken also
++      BLOCK_COMMENT("} assert klasses not null done");
++    }
++#endif
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as size_t, can be zero
-+  //
-+  address generate_conjoint_oop_copy(bool aligned,
-+                                     address nooverlap_target, address* entry,
-+                                     const char* name, bool dest_uninitialized) {
-+    const bool is_oop = true;
-+    const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
-+    return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry,
-+                                  name, dest_uninitialized);
-+  }
++    // Load layout helper (32-bits)
++    //
++    //  |array_tag|     | header_size | element_type |     |log2_element_size|
++    // 32        30    24            16              8     2                 0
++    //
++    //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
++    //
 +
-+  // Helper for generating a dynamic type check.
-+  // Smashes t0, t1.
-+  void generate_type_check(Register sub_klass,
-+                           Register super_check_offset,
-+                           Register super_klass,
-+                           Label& L_success) {
-+    assert_different_registers(sub_klass, super_check_offset, super_klass);
++    const int lh_offset = in_bytes(Klass::layout_helper_offset());
 +
-+    BLOCK_COMMENT("type_check:");
++    // Handle objArrays completely differently...
++    const jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
++    __ lw(lh, Address(scratch_src_klass, lh_offset));
++    __ mvw(t0, objArray_lh);
++    __ beq(lh, t0, L_objArray);
 +
-+    Label L_miss;
++    // if [src->klass() != dst->klass()] then return -1
++    __ load_klass(t1, dst);
++    __ bne(t1, scratch_src_klass, L_failed);
 +
-+    __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, NULL, super_check_offset);
-+    __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL);
++    // if [src->is_Array() != NULL] then return -1
++    // i.e. (lh >= 0)
++    __ andi(t0, lh, 1UL << 31);
++    __ beqz(t0, L_failed);
 +
-+    // Fall through on failure!
-+    __ BIND(L_miss);
-+  }
++    // At this point, it is known to be a typeArray (array_tag 0x3).
++#ifdef ASSERT
++    {
++      BLOCK_COMMENT("assert primitive array {");
++      Label L;
++      __ mvw(t1, Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
++      __ bge(lh, t1, L);
++      __ stop("must be a primitive array");
++      __ bind(L);
++      BLOCK_COMMENT("} assert primitive array done");
++    }
++#endif
 +
-+  //
-+  //  Generate checkcasting array copy stub
-+  //
-+  //  Input:
-+  //    c_rarg0   - source array address
-+  //    c_rarg1   - destination array address
-+  //    c_rarg2   - element count, treated as ssize_t, can be zero
-+  //    c_rarg3   - size_t ckoff (super_check_offset)
-+  //    c_rarg4   - oop ckval (super_klass)
-+  //
-+  //  Output:
-+  //    x10 ==  0  -  success
-+  //    x10 == -1^K - failure, where K is partial transfer count
-+  //
-+  address generate_checkcast_copy(const char* name, address* entry,
-+                                  bool dest_uninitialized = false) {
-+    Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop;
++    arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
++                           t1, L_failed);
 +
-+    // Input registers (after setup_arg_regs)
-+    const Register from        = c_rarg0;   // source array address
-+    const Register to          = c_rarg1;   // destination array address
-+    const Register count       = c_rarg2;   // elementscount
-+    const Register ckoff       = c_rarg3;   // super_check_offset
-+    const Register ckval       = c_rarg4;   // super_klass
++    // TypeArrayKlass
++    //
++    // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize)
++    // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize)
++    //
 +
-+    RegSet wb_pre_saved_regs   = RegSet::range(c_rarg0, c_rarg4);
-+    RegSet wb_post_saved_regs  = RegSet::of(count);
++    const Register t0_offset = t0;    // array offset
++    const Register x22_elsize = lh;   // element size
 +
-+    // Registers used as temps (x7, x9, x18 are save-on-entry)
-+    const Register count_save  = x19;       // orig elementscount
-+    const Register start_to    = x18;       // destination array start address
-+    const Register copied_oop  = x7;        // actual oop copied
-+    const Register r9_klass    = x9;        // oop._klass
++    // Get array_header_in_bytes()
++    int lh_header_size_width = exact_log2(Klass::_lh_header_size_mask + 1);
++    int lh_header_size_msb = Klass::_lh_header_size_shift + lh_header_size_width;
++    __ slli(t0_offset, lh, XLEN - lh_header_size_msb);          // left shift to remove 24 ~ 32;
++    __ srli(t0_offset, t0_offset, XLEN - lh_header_size_width); // array_offset
 +
-+    //---------------------------------------------------------------
-+    // Assembler stub will be used for this call to arraycopy
-+    // if the two arrays are subtypes of Object[] but the
-+    // destination array type is not equal to or a supertype
-+    // of the source type.  Each element must be separately
-+    // checked.
++    __ add(src, src, t0_offset);           // src array offset
++    __ add(dst, dst, t0_offset);           // dst array offset
++    BLOCK_COMMENT("choose copy loop based on element size");
 +
-+    assert_different_registers(from, to, count, ckoff, ckval, start_to,
-+                               copied_oop, r9_klass, count_save);
++    // next registers should be set before the jump to corresponding stub
++    const Register from     = c_rarg0;  // source array address
++    const Register to       = c_rarg1;  // destination array address
++    const Register count    = c_rarg2;  // elements count
 +
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", name);
-+    address start = __ pc();
++    // 'from', 'to', 'count' registers should be set in such order
++    // since they are the same as 'src', 'src_pos', 'dst'.
 +
-+    __ enter(); // required for proper stackwalking of RuntimeStub frame
++    assert(Klass::_lh_log2_element_size_shift == 0, "fix this code");
 +
-+    // Caller of this entry point must set up the argument registers.
-+    if (entry != NULL) {
-+      *entry = __ pc();
-+      BLOCK_COMMENT("Entry:");
-+    }
++    // The possible values of elsize are 0-3, i.e. exact_log2(element
++    // size in bytes).  We do a simple bitwise binary search.
++  __ BIND(L_copy_bytes);
++    __ andi(t0, x22_elsize, 2);
++    __ bnez(t0, L_copy_ints);
++    __ andi(t0, x22_elsize, 1);
++    __ bnez(t0, L_copy_shorts);
++    __ add(from, src, src_pos); // src_addr
++    __ add(to, dst, dst_pos); // dst_addr
++    __ addw(count, scratch_length, zr); // length
++    __ j(RuntimeAddress(byte_copy_entry));
 +
-+    // Empty array:  Nothing to do
-+    __ beqz(count, L_done);
++  __ BIND(L_copy_shorts);
++    __ shadd(from, src_pos, src, t0, 1); // src_addr
++    __ shadd(to, dst_pos, dst, t0, 1); // dst_addr
++    __ addw(count, scratch_length, zr); // length
++    __ j(RuntimeAddress(short_copy_entry));
 +
-+    __ push_reg(RegSet::of(x7, x9, x18, x19), sp);
++  __ BIND(L_copy_ints);
++    __ andi(t0, x22_elsize, 1);
++    __ bnez(t0, L_copy_longs);
++    __ shadd(from, src_pos, src, t0, 2); // src_addr
++    __ shadd(to, dst_pos, dst, t0, 2); // dst_addr
++    __ addw(count, scratch_length, zr); // length
++    __ j(RuntimeAddress(int_copy_entry));
 +
++  __ BIND(L_copy_longs);
 +#ifdef ASSERT
-+    BLOCK_COMMENT("assert consistent ckoff/ckval");
-+    // The ckoff and ckval must be mutually consistent,
-+    // even though caller generates both.
-+    { Label L;
-+      int sco_offset = in_bytes(Klass::super_check_offset_offset());
-+      __ lwu(start_to, Address(ckval, sco_offset));
-+      __ beq(ckoff, start_to, L);
-+      __ stop("super_check_offset inconsistent");
++    {
++      BLOCK_COMMENT("assert long copy {");
++      Label L;
++      __ andi(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> x22_elsize
++      __ addw(lh, lh, zr);
++      __ mvw(t0, LogBytesPerLong);
++      __ beq(x22_elsize, t0, L);
++      __ stop("must be long copy, but elsize is wrong");
 +      __ bind(L);
++      BLOCK_COMMENT("} assert long copy done");
 +    }
-+#endif //ASSERT
-+
-+    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT;
-+    bool is_oop = true;
-+    if (dest_uninitialized) {
-+      decorators |= IS_DEST_UNINITIALIZED;
-+    }
++#endif
++    __ shadd(from, src_pos, src, t0, 3); // src_addr
++    __ shadd(to, dst_pos, dst, t0, 3); // dst_addr
++    __ addw(count, scratch_length, zr); // length
++    __ j(RuntimeAddress(long_copy_entry));
 +
-+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+    bs->arraycopy_prologue(_masm, decorators, is_oop, from, to, count, wb_pre_saved_regs);
++    // ObjArrayKlass
++  __ BIND(L_objArray);
++    // live at this point:  scratch_src_klass, scratch_length, src[_pos], dst[_pos]
 +
-+    // save the original count
-+    __ mv(count_save, count);
++    Label L_plain_copy, L_checkcast_copy;
++    // test array classes for subtyping
++    __ load_klass(t2, dst);
++    __ bne(scratch_src_klass, t2, L_checkcast_copy); // usual case is exact equality
 +
-+    // Copy from low to high addresses
-+    __ mv(start_to, to);              // Save destination array start address
-+    __ j(L_load_element);
++    // Identically typed arrays can be copied without element-wise checks.
++    arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
++                           t1, L_failed);
 +
-+    // ======== begin loop ========
-+    // (Loop is rotated; its entry is L_load_element.)
-+    // Loop control:
-+    //   for count to 0 do
-+    //     copied_oop = load_heap_oop(from++)
-+    //     ... generate_type_check ...
-+    //     store_heap_oop(to++, copied_oop)
-+    //   end
++    __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop);
++    __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++    __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop);
++    __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++    __ addw(count, scratch_length, zr); // length
++  __ BIND(L_plain_copy);
++    __ j(RuntimeAddress(oop_copy_entry));
 +
-+    __ align(OptoLoopAlignment);
++  __ BIND(L_checkcast_copy);
++    // live at this point:  scratch_src_klass, scratch_length, t2 (dst_klass)
++    {
++      // Before looking at dst.length, make sure dst is also an objArray.
++      __ lwu(t0, Address(t2, lh_offset));
++      __ mvw(t1, objArray_lh);
++      __ bne(t0, t1, L_failed);
 +
-+    __ BIND(L_store_element);
-+    __ store_heap_oop(Address(to, 0), copied_oop, noreg, noreg, AS_RAW);  // store the oop
-+    __ add(to, to, UseCompressedOops ? 4 : 8);
-+    __ sub(count, count, 1);
-+    __ beqz(count, L_do_card_marks);
++      // It is safe to examine both src.length and dst.length.
++      arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
++                             t2, L_failed);
 +
-+    // ======== loop entry is here ========
-+    __ BIND(L_load_element);
-+    __ load_heap_oop(copied_oop, Address(from, 0), noreg, noreg, AS_RAW); // load the oop
-+    __ add(from, from, UseCompressedOops ? 4 : 8);
-+    __ beqz(copied_oop, L_store_element);
-+
-+    __ load_klass(r9_klass, copied_oop);// query the object klass
-+    generate_type_check(r9_klass, ckoff, ckval, L_store_element);
-+    // ======== end loop ========
-+
-+    // It was a real error; we must depend on the caller to finish the job.
-+    // Register count = remaining oops, count_orig = total oops.
-+    // Emit GC store barriers for the oops we have copied and report
-+    // their number to the caller.
-+
-+    __ sub(count, count_save, count);     // K = partially copied oop count
-+    __ xori(count, count, -1);                   // report (-1^K) to caller
-+    __ beqz(count, L_done_pop);
-+
-+    __ BIND(L_do_card_marks);
-+    bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, t0, wb_post_saved_regs);
-+
-+    __ bind(L_done_pop);
-+    __ pop_reg(RegSet::of(x7, x9, x18, x19), sp);
-+    inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
-+
-+    __ bind(L_done);
-+    __ mv(x10, count);
-+    __ leave();
-+    __ ret();
-+
-+    return start;
-+  }
-+
-+  // Perform range checks on the proposed arraycopy.
-+  // Kills temp, but nothing else.
-+  // Also, clean the sign bits of src_pos and dst_pos.
-+  void arraycopy_range_checks(Register src,     // source array oop (c_rarg0)
-+                              Register src_pos, // source position (c_rarg1)
-+                              Register dst,     // destination array oo (c_rarg2)
-+                              Register dst_pos, // destination position (c_rarg3)
-+                              Register length,
-+                              Register temp,
-+                              Label& L_failed) {
-+    BLOCK_COMMENT("arraycopy_range_checks:");
-+
-+    assert_different_registers(t0, temp);
-+
-+    // if [src_pos + length > arrayOop(src)->length()] then FAIL
-+    __ lwu(t0, Address(src, arrayOopDesc::length_offset_in_bytes()));
-+    __ addw(temp, length, src_pos);
-+    __ bgtu(temp, t0, L_failed);
-+
-+    // if [dst_pos + length > arrayOop(dst)->length()] then FAIL
-+    __ lwu(t0, Address(dst, arrayOopDesc::length_offset_in_bytes()));
-+    __ addw(temp, length, dst_pos);
-+    __ bgtu(temp, t0, L_failed);
-+
-+    // Have to clean up high 32 bits of 'src_pos' and 'dst_pos'.
-+    __ zero_extend(src_pos, src_pos, 32);
-+    __ zero_extend(dst_pos, dst_pos, 32);
-+
-+    BLOCK_COMMENT("arraycopy_range_checks done");
-+  }
-+
-+  //
-+  //  Generate 'unsafe' array copy stub
-+  //  Though just as safe as the other stubs, it takes an unscaled
-+  //  size_t argument instead of an element count.
-+  //
-+  //  Input:
-+  //    c_rarg0   - source array address
-+  //    c_rarg1   - destination array address
-+  //    c_rarg2   - byte count, treated as ssize_t, can be zero
-+  //
-+  // Examines the alignment of the operands and dispatches
-+  // to a long, int, short, or byte copy loop.
-+  //
-+  address generate_unsafe_copy(const char* name,
-+                               address byte_copy_entry,
-+                               address short_copy_entry,
-+                               address int_copy_entry,
-+                               address long_copy_entry) {
-+    assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL &&
-+                int_copy_entry != NULL && long_copy_entry != NULL);
-+    Label L_long_aligned, L_int_aligned, L_short_aligned;
-+    const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
-+
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", name);
-+    address start = __ pc();
-+    __ enter(); // required for proper stackwalking of RuntimeStub frame
-+
-+    // bump this on entry, not on exit:
-+    inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr);
-+
-+    __ orr(t0, s, d);
-+    __ orr(t0, t0, count);
-+
-+    __ andi(t0, t0, BytesPerLong - 1);
-+    __ beqz(t0, L_long_aligned);
-+    __ andi(t0, t0, BytesPerInt - 1);
-+    __ beqz(t0, L_int_aligned);
-+    __ andi(t0, t0, 1);
-+    __ beqz(t0, L_short_aligned);
-+    __ j(RuntimeAddress(byte_copy_entry));
-+
-+    __ BIND(L_short_aligned);
-+    __ srli(count, count, LogBytesPerShort);  // size => short_count
-+    __ j(RuntimeAddress(short_copy_entry));
-+    __ BIND(L_int_aligned);
-+    __ srli(count, count, LogBytesPerInt);    // size => int_count
-+    __ j(RuntimeAddress(int_copy_entry));
-+    __ BIND(L_long_aligned);
-+    __ srli(count, count, LogBytesPerLong);   // size => long_count
-+    __ j(RuntimeAddress(long_copy_entry));
-+
-+    return start;
-+  }
-+
-+  //
-+  //  Generate generic array copy stubs
-+  //
-+  //  Input:
-+  //    c_rarg0    -  src oop
-+  //    c_rarg1    -  src_pos (32-bits)
-+  //    c_rarg2    -  dst oop
-+  //    c_rarg3    -  dst_pos (32-bits)
-+  //    c_rarg4    -  element count (32-bits)
-+  //
-+  //  Output:
-+  //    x10 ==  0  -  success
-+  //    x10 == -1^K - failure, where K is partial transfer count
-+  //
-+  address generate_generic_copy(const char* name,
-+                                address byte_copy_entry, address short_copy_entry,
-+                                address int_copy_entry, address oop_copy_entry,
-+                                address long_copy_entry, address checkcast_copy_entry) {
-+    assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL &&
-+                int_copy_entry != NULL && oop_copy_entry != NULL &&
-+                long_copy_entry != NULL && checkcast_copy_entry != NULL);
-+    Label L_failed, L_failed_0, L_objArray;
-+    Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs;
-+
-+    // Input registers
-+    const Register src        = c_rarg0;  // source array oop
-+    const Register src_pos    = c_rarg1;  // source position
-+    const Register dst        = c_rarg2;  // destination array oop
-+    const Register dst_pos    = c_rarg3;  // destination position
-+    const Register length     = c_rarg4;
-+
-+    // Registers used as temps
-+    const Register dst_klass = c_rarg5;
-+
-+    __ align(CodeEntryAlignment);
-+
-+    StubCodeMark mark(this, "StubRoutines", name);
-+
-+    address start = __ pc();
-+
-+    __ enter(); // required for proper stackwalking of RuntimeStub frame
-+
-+    // bump this on entry, not on exit:
-+    inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
-+
-+    //-----------------------------------------------------------------------
-+    // Assembler stub will be used for this call to arraycopy
-+    // if the following conditions are met:
-+    //
-+    // (1) src and dst must not be null.
-+    // (2) src_pos must not be negative.
-+    // (3) dst_pos must not be negative.
-+    // (4) length  must not be negative.
-+    // (5) src klass and dst klass should be the same and not NULL.
-+    // (6) src and dst should be arrays.
-+    // (7) src_pos + length must not exceed length of src.
-+    // (8) dst_pos + length must not exceed length of dst.
-+    //
-+
-+    // if [src == NULL] then return -1
-+    __ beqz(src, L_failed);
-+
-+    // if [src_pos < 0] then return -1
-+    // i.e. sign bit set
-+    __ andi(t0, src_pos, 1UL << 31);
-+    __ bnez(t0, L_failed);
-+
-+    // if [dst == NULL] then return -1
-+    __ beqz(dst, L_failed);
-+
-+    // if [dst_pos < 0] then return -1
-+    // i.e. sign bit set
-+    __ andi(t0, dst_pos, 1UL << 31);
-+    __ bnez(t0, L_failed);
-+
-+    // registers used as temp
-+    const Register scratch_length    = x28; // elements count to copy
-+    const Register scratch_src_klass = x29; // array klass
-+    const Register lh                = x30; // layout helper
-+
-+    // if [length < 0] then return -1
-+    __ addw(scratch_length, length, zr);    // length (elements count, 32-bits value)
-+    // i.e. sign bit set
-+    __ andi(t0, scratch_length, 1UL << 31);
-+    __ bnez(t0, L_failed);
-+
-+    __ load_klass(scratch_src_klass, src);
-+#ifdef ASSERT
-+    {
-+      BLOCK_COMMENT("assert klasses not null {");
-+      Label L1, L2;
-+      __ bnez(scratch_src_klass, L2);   // it is broken if klass is NULL
-+      __ bind(L1);
-+      __ stop("broken null klass");
-+      __ bind(L2);
-+      __ load_klass(t0, dst);
-+      __ beqz(t0, L1);     // this would be broken also
-+      BLOCK_COMMENT("} assert klasses not null done");
-+    }
-+#endif
-+
-+    // Load layout helper (32-bits)
-+    //
-+    //  |array_tag|     | header_size | element_type |     |log2_element_size|
-+    // 32        30    24            16              8     2                 0
-+    //
-+    //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
-+    //
-+
-+    const int lh_offset = in_bytes(Klass::layout_helper_offset());
-+
-+    // Handle objArrays completely differently...
-+    const jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
-+    __ lw(lh, Address(scratch_src_klass, lh_offset));
-+    __ mvw(t0, objArray_lh);
-+    __ beq(lh, t0, L_objArray);
-+
-+    // if [src->klass() != dst->klass()] then return -1
-+    __ load_klass(t1, dst);
-+    __ bne(t1, scratch_src_klass, L_failed);
-+
-+    // if [src->is_Array() != NULL] then return -1
-+    // i.e. (lh >= 0)
-+    __ andi(t0, lh, 1UL << 31);
-+    __ beqz(t0, L_failed);
-+
-+    // At this point, it is known to be a typeArray (array_tag 0x3).
-+#ifdef ASSERT
-+    {
-+      BLOCK_COMMENT("assert primitive array {");
-+      Label L;
-+      __ mvw(t1, Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
-+      __ bge(lh, t1, L);
-+      __ stop("must be a primitive array");
-+      __ bind(L);
-+      BLOCK_COMMENT("} assert primitive array done");
-+    }
-+#endif
-+
-+    arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
-+                           t1, L_failed);
-+
-+    // TypeArrayKlass
-+    //
-+    // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize)
-+    // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize)
-+    //
-+
-+    const Register t0_offset = t0;    // array offset
-+    const Register x22_elsize = lh;   // element size
-+
-+    // Get array_header_in_bytes()
-+    int lh_header_size_width = exact_log2(Klass::_lh_header_size_mask + 1);
-+    int lh_header_size_msb = Klass::_lh_header_size_shift + lh_header_size_width;
-+    __ slli(t0_offset, lh, XLEN - lh_header_size_msb);          // left shift to remove 24 ~ 32;
-+    __ srli(t0_offset, t0_offset, XLEN - lh_header_size_width); // array_offset
-+
-+    __ add(src, src, t0_offset);           // src array offset
-+    __ add(dst, dst, t0_offset);           // dst array offset
-+    BLOCK_COMMENT("choose copy loop based on element size");
-+
-+    // next registers should be set before the jump to corresponding stub
-+    const Register from     = c_rarg0;  // source array address
-+    const Register to       = c_rarg1;  // destination array address
-+    const Register count    = c_rarg2;  // elements count
-+
-+    // 'from', 'to', 'count' registers should be set in such order
-+    // since they are the same as 'src', 'src_pos', 'dst'.
-+
-+    assert(Klass::_lh_log2_element_size_shift == 0, "fix this code");
-+
-+    // The possible values of elsize are 0-3, i.e. exact_log2(element
-+    // size in bytes).  We do a simple bitwise binary search.
-+  __ BIND(L_copy_bytes);
-+    __ andi(t0, x22_elsize, 2);
-+    __ bnez(t0, L_copy_ints);
-+    __ andi(t0, x22_elsize, 1);
-+    __ bnez(t0, L_copy_shorts);
-+    __ add(from, src, src_pos); // src_addr
-+    __ add(to, dst, dst_pos); // dst_addr
-+    __ addw(count, scratch_length, zr); // length
-+    __ j(RuntimeAddress(byte_copy_entry));
-+
-+  __ BIND(L_copy_shorts);
-+    __ shadd(from, src_pos, src, t0, 1); // src_addr
-+    __ shadd(to, dst_pos, dst, t0, 1); // dst_addr
-+    __ addw(count, scratch_length, zr); // length
-+    __ j(RuntimeAddress(short_copy_entry));
-+
-+  __ BIND(L_copy_ints);
-+    __ andi(t0, x22_elsize, 1);
-+    __ bnez(t0, L_copy_longs);
-+    __ shadd(from, src_pos, src, t0, 2); // src_addr
-+    __ shadd(to, dst_pos, dst, t0, 2); // dst_addr
-+    __ addw(count, scratch_length, zr); // length
-+    __ j(RuntimeAddress(int_copy_entry));
-+
-+  __ BIND(L_copy_longs);
-+#ifdef ASSERT
-+    {
-+      BLOCK_COMMENT("assert long copy {");
-+      Label L;
-+      __ andi(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> x22_elsize
-+      __ addw(lh, lh, zr);
-+      __ mvw(t0, LogBytesPerLong);
-+      __ beq(x22_elsize, t0, L);
-+      __ stop("must be long copy, but elsize is wrong");
-+      __ bind(L);
-+      BLOCK_COMMENT("} assert long copy done");
-+    }
-+#endif
-+    __ shadd(from, src_pos, src, t0, 3); // src_addr
-+    __ shadd(to, dst_pos, dst, t0, 3); // dst_addr
-+    __ addw(count, scratch_length, zr); // length
-+    __ j(RuntimeAddress(long_copy_entry));
-+
-+    // ObjArrayKlass
-+  __ BIND(L_objArray);
-+    // live at this point:  scratch_src_klass, scratch_length, src[_pos], dst[_pos]
-+
-+    Label L_plain_copy, L_checkcast_copy;
-+    // test array classes for subtyping
-+    __ load_klass(t2, dst);
-+    __ bne(scratch_src_klass, t2, L_checkcast_copy); // usual case is exact equality
-+
-+    // Identically typed arrays can be copied without element-wise checks.
-+    arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
-+                           t1, L_failed);
-+
-+    __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop);
-+    __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-+    __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop);
-+    __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-+    __ addw(count, scratch_length, zr); // length
-+  __ BIND(L_plain_copy);
-+    __ j(RuntimeAddress(oop_copy_entry));
-+
-+  __ BIND(L_checkcast_copy);
-+    // live at this point:  scratch_src_klass, scratch_length, t2 (dst_klass)
-+    {
-+      // Before looking at dst.length, make sure dst is also an objArray.
-+      __ lwu(t0, Address(t2, lh_offset));
-+      __ mvw(t1, objArray_lh);
-+      __ bne(t0, t1, L_failed);
-+
-+      // It is safe to examine both src.length and dst.length.
-+      arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
-+                             t2, L_failed);
-+
-+      __ load_klass(dst_klass, dst); // reload
++      __ load_klass(dst_klass, dst); // reload
 +
 +      // Marshal the base address arguments now, freeing registers.
 +      __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop);
@@ -48206,50 +43491,6 @@ index 00000000000..b3fdd04db1b
 +    return entry;
 +  }
 +
-+  address generate_method_entry_barrier() {
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
-+
-+    Label deoptimize_label;
-+
-+    address start = __ pc();
-+
-+    __ set_last_Java_frame(sp, fp, ra, t0);
-+
-+    __ enter();
-+    __ add(t1, sp, wordSize);
-+
-+    __ sub(sp, sp, 4 * wordSize);
-+
-+    __ push_call_clobbered_registers();
-+
-+    __ mv(c_rarg0, t1);
-+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetNMethod::nmethod_stub_entry_barrier), 1);
-+
-+    __ reset_last_Java_frame(true);
-+
-+    __ mv(t0, x10);
-+
-+    __ pop_call_clobbered_registers();
-+
-+    __ bnez(t0, deoptimize_label);
-+
-+    __ leave();
-+    __ ret();
-+
-+    __ BIND(deoptimize_label);
-+
-+    __ ld(t0, Address(sp, 0));
-+    __ ld(fp, Address(sp, wordSize));
-+    __ ld(ra, Address(sp, wordSize * 2));
-+    __ ld(t1, Address(sp, wordSize * 3));
-+
-+    __ mv(sp, t0);
-+    __ jr(t1);
-+
-+    return start;
-+  }
-+
 +  // x10  = result
 +  // x11  = str1
 +  // x12  = cnt1
@@ -48686,111 +43927,6 @@ index 00000000000..b3fdd04db1b
 +
 +    return entry;
 +  }
-+
-+  // Arguments:
-+  //
-+  // Input:
-+  //   c_rarg0   - newArr address
-+  //   c_rarg1   - oldArr address
-+  //   c_rarg2   - newIdx
-+  //   c_rarg3   - shiftCount
-+  //   c_rarg4   - numIter
-+  //
-+  address generate_bigIntegerLeftShift() {
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", "bigIntegerLeftShiftWorker");
-+    address entry = __ pc();
-+
-+    Label loop, exit;
-+
-+    Register newArr        = c_rarg0;
-+    Register oldArr        = c_rarg1;
-+    Register newIdx        = c_rarg2;
-+    Register shiftCount    = c_rarg3;
-+    Register numIter       = c_rarg4;
-+
-+    Register shiftRevCount = c_rarg5;
-+    Register oldArrNext    = t1;
-+
-+    __ beqz(numIter, exit);
-+    __ shadd(newArr, newIdx, newArr, t0, 2);
-+
-+    __ li(shiftRevCount, 32);
-+    __ sub(shiftRevCount, shiftRevCount, shiftCount);
-+
-+    __ bind(loop);
-+    __ addi(oldArrNext, oldArr, 4);
-+    __ vsetvli(t0, numIter, Assembler::e32, Assembler::m4);
-+    __ vle32_v(v0, oldArr);
-+    __ vle32_v(v4, oldArrNext);
-+    __ vsll_vx(v0, v0, shiftCount);
-+    __ vsrl_vx(v4, v4, shiftRevCount);
-+    __ vor_vv(v0, v0, v4);
-+    __ vse32_v(v0, newArr);
-+    __ sub(numIter, numIter, t0);
-+    __ shadd(oldArr, t0, oldArr, t1, 2);
-+    __ shadd(newArr, t0, newArr, t1, 2);
-+    __ bnez(numIter, loop);
-+
-+    __ bind(exit);
-+    __ ret();
-+
-+    return entry;
-+  }
-+
-+  // Arguments:
-+  //
-+  // Input:
-+  //   c_rarg0   - newArr address
-+  //   c_rarg1   - oldArr address
-+  //   c_rarg2   - newIdx
-+  //   c_rarg3   - shiftCount
-+  //   c_rarg4   - numIter
-+  //
-+  address generate_bigIntegerRightShift() {
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
-+    address entry = __ pc();
-+
-+    Label loop, exit;
-+
-+    Register newArr        = c_rarg0;
-+    Register oldArr        = c_rarg1;
-+    Register newIdx        = c_rarg2;
-+    Register shiftCount    = c_rarg3;
-+    Register numIter       = c_rarg4;
-+    Register idx           = numIter;
-+
-+    Register shiftRevCount = c_rarg5;
-+    Register oldArrNext    = c_rarg6;
-+    Register newArrCur     = t0;
-+    Register oldArrCur     = t1;
-+
-+    __ beqz(idx, exit);
-+    __ shadd(newArr, newIdx, newArr, t0, 2);
-+
-+    __ li(shiftRevCount, 32);
-+    __ sub(shiftRevCount, shiftRevCount, shiftCount);
-+
-+    __ bind(loop);
-+    __ vsetvli(t0, idx, Assembler::e32, Assembler::m4);
-+    __ sub(idx, idx, t0);
-+    __ shadd(oldArrNext, idx, oldArr, t1, 2);
-+    __ shadd(newArrCur, idx, newArr, t1, 2);
-+    __ addi(oldArrCur, oldArrNext, 4);
-+    __ vle32_v(v0, oldArrCur);
-+    __ vle32_v(v4, oldArrNext);
-+    __ vsrl_vx(v0, v0, shiftCount);
-+    __ vsll_vx(v4, v4, shiftRevCount);
-+    __ vor_vv(v0, v0, v4);
-+    __ vse32_v(v0, newArrCur);
-+    __ bnez(idx, loop);
-+
-+    __ bind(exit);
-+    __ ret();
-+
-+    return entry;
-+  }
 +#endif
 +
 +#ifdef COMPILER2
@@ -49656,22 +44792,12 @@ index 00000000000..b3fdd04db1b
 +      MontgomeryMultiplyGenerator g(_masm, /*squaring*/true);
 +      StubRoutines::_montgomerySquare = g.generate_square();
 +    }
-+
-+    if (UseRVVForBigIntegerShiftIntrinsics) {
-+      StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift();
-+      StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift();
-+    }
 +#endif
 +
 +    generate_compare_long_strings();
 +
 +    generate_string_indexof_stubs();
 +
-+    BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
-+    if (bs_nm != NULL) {
-+      StubRoutines::riscv::_method_entry_barrier = generate_method_entry_barrier();
-+    }
-+
 +    StubRoutines::riscv::set_completed();
 +  }
 +
@@ -49687,20 +44813,15 @@ index 00000000000..b3fdd04db1b
 +  ~StubGenerator() {}
 +}; // end class declaration
 +
-+#define UCM_TABLE_MAX_ENTRIES 8
 +void StubGenerator_generate(CodeBuffer* code, bool all) {
-+  if (UnsafeCopyMemory::_table == NULL) {
-+    UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
-+  }
-+
 +  StubGenerator g(code, all);
 +}
 diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
 new file mode 100644
-index 00000000000..395a2d338e4
+index 0000000000..9202d9ec4b
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
-@@ -0,0 +1,58 @@
+@@ -0,0 +1,57 @@
 +/*
 + * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
@@ -49756,15 +44877,14 @@ index 00000000000..395a2d338e4
 +address StubRoutines::riscv::_string_indexof_linear_uu = NULL;
 +address StubRoutines::riscv::_string_indexof_linear_ul = NULL;
 +address StubRoutines::riscv::_large_byte_array_inflate = NULL;
-+address StubRoutines::riscv::_method_entry_barrier = NULL;
 +
 +bool StubRoutines::riscv::_completed = false;
 diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
 new file mode 100644
-index 00000000000..51f07819c33
+index 0000000000..0c9445e18a
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
-@@ -0,0 +1,161 @@
+@@ -0,0 +1,155 @@
 +/*
 + * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
@@ -49834,8 +44954,6 @@ index 00000000000..51f07819c33
 +  static address _string_indexof_linear_ul;
 +  static address _large_byte_array_inflate;
 +
-+  static address _method_entry_barrier;
-+
 +  static bool _completed;
 +
 + public:
@@ -49912,10 +45030,6 @@ index 00000000000..51f07819c33
 +    return _large_byte_array_inflate;
 +  }
 +
-+  static address method_entry_barrier() {
-+    return _method_entry_barrier;
-+  }
-+
 +  static bool complete() {
 +    return _completed;
 +  }
@@ -49928,10 +45042,10 @@ index 00000000000..51f07819c33
 +#endif // CPU_RISCV_STUBROUTINES_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
 new file mode 100644
-index 00000000000..6537b2dbd94
+index 0000000000..e639fa7e12
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-@@ -0,0 +1,1794 @@
+@@ -0,0 +1,1833 @@
 +/*
 + * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
@@ -49985,7 +45099,6 @@ index 00000000000..6537b2dbd94
 +#include "runtime/timer.hpp"
 +#include "runtime/vframeArray.hpp"
 +#include "utilities/debug.hpp"
-+#include "utilities/powerOfTwo.hpp"
 +#include <sys/types.h>
 +
 +#ifndef PRODUCT
@@ -50491,31 +45604,81 @@ index 00000000000..6537b2dbd94
 +//
 +// xmethod: method
 +//
-+void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow) {
++void TemplateInterpreterGenerator::generate_counter_incr(
++        Label* overflow,
++        Label* profile_method,
++        Label* profile_method_continue) {
 +  Label done;
 +  // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not.
-+  int increment = InvocationCounter::count_increment;
-+  Label no_mdo;
-+  if (ProfileInterpreter) {
-+    // Are we profiling?
-+    __ ld(x10, Address(xmethod, Method::method_data_offset()));
-+    __ beqz(x10, no_mdo);
-+    // Increment counter in the MDO
-+    const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) +
-+                                         in_bytes(InvocationCounter::counter_offset()));
-+    const Address mask(x10, in_bytes(MethodData::invoke_mask_offset()));
-+    __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow);
-+    __ j(done);
++  if (TieredCompilation) {
++    int increment = InvocationCounter::count_increment;
++    Label no_mdo;
++    if (ProfileInterpreter) {
++      // Are we profiling?
++      __ ld(x10, Address(xmethod, Method::method_data_offset()));
++      __ beqz(x10, no_mdo);
++      // Increment counter in the MDO
++      const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) +
++                                                in_bytes(InvocationCounter::counter_offset()));
++      const Address mask(x10, in_bytes(MethodData::invoke_mask_offset()));
++      __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow);
++      __ j(done);
++    }
++    __ bind(no_mdo);
++    // Increment counter in MethodCounters
++    const Address invocation_counter(t1,
++                  MethodCounters::invocation_counter_offset() +
++                  InvocationCounter::counter_offset());
++    __ get_method_counters(xmethod, t1, done);
++    const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset()));
++    __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow);
++    __ bind(done);
++  } else { // not TieredCompilation
++    const Address backedge_counter(t1,
++                  MethodCounters::backedge_counter_offset() +
++                  InvocationCounter::counter_offset());
++    const Address invocation_counter(t1,
++                  MethodCounters::invocation_counter_offset() +
++                  InvocationCounter::counter_offset());
++
++    __ get_method_counters(xmethod, t1, done);
++
++    if (ProfileInterpreter) { // %%% Merge this into MethodData*
++      __ lwu(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset()));
++      __ addw(x11, x11, 1);
++      __ sw(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset()));
++    }
++    // Update standard invocation counters
++    __ lwu(x11, invocation_counter);
++    __ lwu(x10, backedge_counter);
++
++    __ addw(x11, x11, InvocationCounter::count_increment);
++    __ andi(x10, x10, InvocationCounter::count_mask_value);
++
++    __ sw(x11, invocation_counter);
++    __ addw(x10, x10, x11);                // add both counters
++
++    // profile_method is non-null only for interpreted method so
++    // profile_method != NULL == !native_call
++
++    if (ProfileInterpreter && profile_method != NULL) {
++      // Test to see if we should create a method data oop
++      __ ld(t1, Address(xmethod, Method::method_counters_offset()));
++      __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset())));
++      __ blt(x10, t1, *profile_method_continue);
++
++      // if no method data exists, go to profile_method
++      __ test_method_data_pointer(t1, *profile_method);
++    }
++
++    {
++      __ ld(t1, Address(xmethod, Method::method_counters_offset()));
++      __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_invocation_limit_offset())));
++      __ bltu(x10, t1, done);
++      __ j(*overflow);
++    }
++    __ bind(done);
 +  }
-+  __ bind(no_mdo);
-+  // Increment counter in MethodCounters
-+  const Address invocation_counter(t1,
-+                                   MethodCounters::invocation_counter_offset() +
-+                                   InvocationCounter::counter_offset());
-+  __ get_method_counters(xmethod, t1, done);
-+  const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset()));
-+  __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow);
-+  __ bind(done);
 +}
 +
 +void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) {
@@ -50699,9 +45862,18 @@ index 00000000000..6537b2dbd94
 +  __ sd(ProfileInterpreter ? t0 : zr, Address(sp, 6 * wordSize));
 +
 +  // Get mirror and store it in the frame as GC root for this Method*
-+  __ load_mirror(t2, xmethod);
-+  __ sd(zr, Address(sp, 5 * wordSize));
-+  __ sd(t2, Address(sp, 4 * wordSize));
++#if INCLUDE_SHENANDOAHGC
++  if (UseShenandoahGC) {
++    __ load_mirror(x28, xmethod);
++    __ sd(zr, Address(sp, 5 * wordSize));
++    __ sd(x28, Address(sp, 4 * wordSize));
++  } else
++#endif
++  {
++    __ load_mirror(t2, xmethod);
++    __ sd(zr, Address(sp, 5 * wordSize));
++    __ sd(t2, Address(sp, 4 * wordSize));
++  }
 +
 +  __ ld(xcpool, Address(xmethod, Method::const_offset()));
 +  __ ld(xcpool, Address(xcpool, ConstMethod::constants_offset()));
@@ -50770,7 +45942,7 @@ index 00000000000..6537b2dbd94
 +
 +  address entry = __ pc();
 +
-+  const int referent_offset = java_lang_ref_Reference::referent_offset();
++  const int referent_offset = java_lang_ref_Reference::referent_offset;
 +  guarantee(referent_offset > 0, "referent offset not initialized");
 +
 +  Label slow_path;
@@ -50829,42 +46001,16 @@ index 00000000000..6537b2dbd94
 +}
 +
 +void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
-+  // See more discussion in stackOverflow.hpp.
-+
-+  const int shadow_zone_size = checked_cast<int>(StackOverflow::stack_shadow_zone_size());
++  // Bang each page in the shadow zone. We can't assume it's been done for
++  // an interpreter frame with greater than a page of locals, so each page
++  // needs to be checked.  Only true for non-native.
++  const int n_shadow_pages = JavaThread::stack_shadow_zone_size() / os::vm_page_size();
++  const int start_page = native_call ? n_shadow_pages : 1;
 +  const int page_size = os::vm_page_size();
-+  const int n_shadow_pages = shadow_zone_size / page_size;
-+
-+#ifdef ASSERT
-+  Label L_good_limit;
-+  __ ld(t0, Address(xthread, JavaThread::shadow_zone_safe_limit()));
-+  __ bnez(t0, L_good_limit);
-+  __ stop("shadow zone safe limit is not initialized");
-+  __ bind(L_good_limit);
-+
-+  Label L_good_watermark;
-+  __ ld(t0, Address(xthread, JavaThread::shadow_zone_growth_watermark()));
-+  __ bnez(t0, L_good_watermark);
-+  __ stop("shadow zone growth watermark is not initialized");
-+  __ bind(L_good_watermark);
-+#endif
-+
-+  Label L_done;
-+
-+  __ ld(t0, Address(xthread, JavaThread::shadow_zone_growth_watermark()));
-+  __ bgtu(sp, t0, L_done);
-+
-+  for (int p = 1; p <= n_shadow_pages; p++) {
-+    __ bang_stack_with_offset(p * page_size);
++  for (int pages = start_page; pages <= n_shadow_pages ; pages++) {
++    __ sub(t0, sp, pages * page_size);
++    __ sd(zr, Address(t0));
 +  }
-+
-+  // Record the new watermark, but only if the update is above the safe limit.
-+  // Otherwise, the next time around the check above would pass the safe limit.
-+  __ ld(t0, Address(xthread, JavaThread::shadow_zone_safe_limit()));
-+  __ bleu(sp, t0, L_done);
-+  __ sd(sp, Address(xthread, JavaThread::shadow_zone_growth_watermark()));
-+
-+  __ bind(L_done);
 +}
 +
 +// Interpreter stub for calling a native method. (asm interpreter)
@@ -50929,7 +46075,7 @@ index 00000000000..6537b2dbd94
 +  // increment invocation count & check for overflow
 +  Label invocation_counter_overflow;
 +  if (inc_counter) {
-+    generate_counter_incr(&invocation_counter_overflow);
++    generate_counter_incr(&invocation_counter_overflow, NULL, NULL);
 +  }
 +
 +  Label continue_after_compile;
@@ -51094,16 +46240,7 @@ index 00000000000..6537b2dbd94
 +  // check for safepoint operation in progress and/or pending suspend requests
 +  {
 +    Label L, Continue;
-+
-+    // We need an acquire here to ensure that any subsequent load of the
-+    // global SafepointSynchronize::_state flag is ordered after this load
-+    // of the thread-local polling word. We don't want this poll to
-+    // return false (i.e. not safepointing) and a later poll of the global
-+    // SafepointSynchronize::_state spuriously to return true.
-+    //
-+    // This is to avoid a race when we're in a native->Java transition
-+    // racing the code which wakes up from a safepoint.
-+    __ safepoint_poll(L, true /* at_return */, true /* acquire */, false /* in_nmethod */);
++    __ safepoint_poll_acquire(L);
 +    __ lwu(t1, Address(xthread, JavaThread::suspend_flags_offset()));
 +    __ beqz(t1, Continue);
 +    __ bind(L);
@@ -51159,7 +46296,7 @@ index 00000000000..6537b2dbd94
 +  {
 +    Label no_reguard;
 +    __ lwu(t0, Address(xthread, in_bytes(JavaThread::stack_guard_state_offset())));
-+    __ addi(t1, zr, (u1)StackOverflow::stack_guard_yellow_reserved_disabled);
++    __ addi(t1, zr, (u1)JavaThread::stack_guard_yellow_reserved_disabled);
 +    __ bne(t0, t1, no_reguard);
 +
 +    __ pusha(); // only save smashed registers
@@ -51350,8 +46487,15 @@ index 00000000000..6537b2dbd94
 +
 +  // increment invocation count & check for overflow
 +  Label invocation_counter_overflow;
++  Label profile_method;
++  Label profile_method_continue;
 +  if (inc_counter) {
-+    generate_counter_incr(&invocation_counter_overflow);
++    generate_counter_incr(&invocation_counter_overflow,
++                          &profile_method,
++                          &profile_method_continue);
++    if (ProfileInterpreter) {
++      __ bind(profile_method_continue);
++    }
 +  }
 +
 +  Label continue_after_compile;
@@ -51388,6 +46532,15 @@ index 00000000000..6537b2dbd94
 +
 +  // invocation counter overflow
 +  if (inc_counter) {
++    if (ProfileInterpreter) {
++      // We have decided to profile this method in the interpreter
++      __ bind(profile_method);
++      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
++      __ set_method_data_pointer_for_bcp();
++      // don't think we need this
++      __ get_method(x11);
++      __ j(profile_method_continue);
++    }
 +    // Handle overflow of counter and compile method
 +    __ bind(invocation_counter_overflow);
 +    generate_counter_overflow(continue_after_compile);
@@ -51728,10 +46881,10 @@ index 00000000000..6537b2dbd94
 +#endif // !PRODUCT
 diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
 new file mode 100644
-index 00000000000..d2a301c6e74
+index 0000000000..84b1afc7dc
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-@@ -0,0 +1,3951 @@
+@@ -0,0 +1,4006 @@
 +/*
 + * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
@@ -51762,7 +46915,6 @@ index 00000000000..d2a301c6e74
 +#include "asm/macroAssembler.inline.hpp"
 +#include "gc/shared/barrierSetAssembler.hpp"
 +#include "gc/shared/collectedHeap.hpp"
-+#include "gc/shared/tlab_globals.hpp"
 +#include "interpreter/interp_masm.hpp"
 +#include "interpreter/interpreter.hpp"
 +#include "interpreter/interpreterRuntime.hpp"
@@ -51778,10 +46930,15 @@ index 00000000000..d2a301c6e74
 +#include "runtime/sharedRuntime.hpp"
 +#include "runtime/stubRoutines.hpp"
 +#include "runtime/synchronizer.hpp"
-+#include "utilities/powerOfTwo.hpp"
 +
 +#define __ _masm->
 +
++// Platform-dependent initialization
++
++void TemplateTable::pd_initialize() {
++  // No RISC-V specific initialization
++}
++
 +// Address computation: local variables
 +
 +static inline Address iaddress(int n) {
@@ -52139,7 +47296,6 @@ index 00000000000..d2a301c6e74
 +    int32_t offset = 0;
 +    __ movptr_with_offset(rarg, Universe::the_null_sentinel_addr(), offset);
 +    __ ld(tmp, Address(rarg, offset));
-+    __ resolve_oop_handle(tmp);
 +    __ bne(result, tmp, notNull);
 +    __ mv(result, zr);  // NULL object reference
 +    __ bind(notNull);
@@ -53475,6 +48631,7 @@ index 00000000000..d2a301c6e74
 +  assert(UseLoopCounter || !UseOnStackReplacement,
 +         "on-stack-replacement requires loop counters");
 +  Label backedge_counter_overflow;
++  Label profile_method;
 +  Label dispatch;
 +  if (UseLoopCounter) {
 +    // increment backedge counter for backward branches
@@ -53499,31 +48656,75 @@ index 00000000000..d2a301c6e74
 +    __ beqz(t0, dispatch); // No MethodCounters allocated, OutOfMemory
 +    __ bind(has_counters);
 +
-+    Label no_mdo;
-+    int increment = InvocationCounter::count_increment;
-+    if (ProfileInterpreter) {
-+      // Are we profiling?
-+      __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset())));
-+      __ beqz(x11, no_mdo);
-+      // Increment the MDO backedge counter
-+      const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) +
-+                                         in_bytes(InvocationCounter::counter_offset()));
-+      const Address mask(x11, in_bytes(MethodData::backedge_mask_offset()));
-+      __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
-+                                 x10, t0, false,
++    if (TieredCompilation) {
++      Label no_mdo;
++      int increment = InvocationCounter::count_increment;
++      if (ProfileInterpreter) {
++        // Are we profiling?
++        __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset())));
++        __ beqz(x11, no_mdo);
++        // Increment the MDO backedge counter
++        const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) +
++                                           in_bytes(InvocationCounter::counter_offset()));
++        const Address mask(x11, in_bytes(MethodData::backedge_mask_offset()));
++        __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
++                                   x10, t0, false,
++                                   UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
++        __ j(dispatch);
++      }
++      __ bind(no_mdo);
++      // Increment backedge counter in MethodCounters*
++      __ ld(t0, Address(xmethod, Method::method_counters_offset()));
++      const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset()));
++      __ increment_mask_and_jump(Address(t0, be_offset), increment, mask,
++                                 x10, t1, false,
 +                                 UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
-+      __ j(dispatch);
++    } else { // not TieredCompilation
++      // increment counter
++      __ ld(t1, Address(xmethod, Method::method_counters_offset()));
++      __ lwu(x10, Address(t1, be_offset));     // load backedge counter
++      __ addw(t0, x10, InvocationCounter::count_increment); // increment counter
++      __ sw(t0, Address(t1, be_offset));       // store counter
++
++      __ lwu(x10, Address(t1, inv_offset));    // load invocation counter
++      __ andi(x10, x10, (unsigned)InvocationCounter::count_mask_value, x13); // and the status bits
++      __ addw(x10, x10, t0);        // add both counters
++
++      if (ProfileInterpreter) {
++        // Test to see if we should create a method data oop
++        __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset())));
++        __ blt(x10, t0, dispatch);
++
++        // if no method data exists, go to profile method
++        __ test_method_data_pointer(x10, profile_method);
++
++        if (UseOnStackReplacement) {
++          // check for overflow against x11 which is the MDO taken count
++          __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset())));
++          __ bltu(x11, t0, dispatch); // Intel == Assembler::below, lo:unsigned lower
++
++          // When ProfileInterpreter is on, the backedge_count comes
++          // from the MethodData*, which value does not get reset on
++          // the call to frequency_counter_overflow().  To avoid
++          // excessive calls to the overflow routine while the method is
++          // being compiled, add a second test to make sure the overflow
++          // function is called only once every overflow_frequency.
++          const int overflow_frequency = 1024;
++          __ andi(x11, x11, overflow_frequency - 1);
++          __ beqz(x11, backedge_counter_overflow);
++
++        }
++      } else {
++        if (UseOnStackReplacement) {
++          // check for overflow against x10, which is the sum of the
++          // counters
++          __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset())));
++          __ bgeu(x10, t0, backedge_counter_overflow); // Intel == Assembler::aboveEqual
++        }
++      }
 +    }
-+    __ bind(no_mdo);
-+    // Increment backedge counter in MethodCounters*
-+    __ ld(t0, Address(xmethod, Method::method_counters_offset()));
-+    const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset()));
-+    __ increment_mask_and_jump(Address(t0, be_offset), increment, mask,
-+                               x10, t1, false,
-+                               UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
 +    __ bind(dispatch);
 +  }
-+
 +  // Pre-load the next target bytecode into t0
 +  __ load_unsigned_byte(t0, Address(xbcp, 0));
 +
@@ -53532,52 +48733,63 @@ index 00000000000..d2a301c6e74
 +  // xbcp: target bcp
 +  __ dispatch_only(vtos, /*generate_poll*/true);
 +
-+  if (UseLoopCounter && UseOnStackReplacement) {
-+    // invocation counter overflow
-+    __ bind(backedge_counter_overflow);
-+    __ neg(x12, x12);
-+    __ add(x12, x12, xbcp);     // branch xbcp
-+    // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp)
-+    __ call_VM(noreg,
-+               CAST_FROM_FN_PTR(address,
-+                                InterpreterRuntime::frequency_counter_overflow),
-+               x12);
-+    __ load_unsigned_byte(x11, Address(xbcp, 0));  // restore target bytecode
-+
-+    // x10: osr nmethod (osr ok) or NULL (osr not possible)
-+    // w11: target bytecode
-+    // x12: temporary
-+    __ beqz(x10, dispatch);     // test result -- no osr if null
-+    // nmethod may have been invalidated (VM may block upon call_VM return)
-+    __ lbu(x12, Address(x10, nmethod::state_offset()));
-+    if (nmethod::in_use != 0) {
-+      __ sub(x12, x12, nmethod::in_use);
++  if (UseLoopCounter) {
++    if (ProfileInterpreter && !TieredCompilation) {
++      // Out-of-line code to allocate method data oop.
++      __ bind(profile_method);
++      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
++      __ load_unsigned_byte(x11, Address(xbcp, 0));  // restore target bytecode
++      __ set_method_data_pointer_for_bcp();
++      __ j(dispatch);
 +    }
-+    __ bnez(x12, dispatch);
 +
-+    // We have the address of an on stack replacement routine in x10
-+    // We need to prepare to execute the OSR method. First we must
-+    // migrate the locals and monitors off of the stack.
++    if (UseOnStackReplacement) {
++      // invocation counter overflow
++      __ bind(backedge_counter_overflow);
++      __ neg(x12, x12);
++      __ add(x12, x12, xbcp);     // branch xbcp
++      // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp)
++      __ call_VM(noreg,
++                 CAST_FROM_FN_PTR(address,
++                                  InterpreterRuntime::frequency_counter_overflow),
++                 x12);
++      __ load_unsigned_byte(x11, Address(xbcp, 0));  // restore target bytecode
++
++      // x10: osr nmethod (osr ok) or NULL (osr not possible)
++      // w11: target bytecode
++      // x12: temporary
++      __ beqz(x10, dispatch);     // test result -- no osr if null
++      // nmethod may have been invalidated (VM may block upon call_VM return)
++      __ lbu(x12, Address(x10, nmethod::state_offset()));
++      if (nmethod::in_use != 0) {
++        __ sub(x12, x12, nmethod::in_use);
++      }
++      __ bnez(x12, dispatch);
++
++      // We have the address of an on stack replacement routine in x10
++      // We need to prepare to execute the OSR method. First we must
++      // migrate the locals and monitors off of the stack.
 +
-+    __ mv(x9, x10);                             // save the nmethod
++      __ mv(x9, x10);                             // save the nmethod
 +
-+    call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
++      call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
 +
-+    // x10 is OSR buffer, move it to expected parameter location
-+    __ mv(j_rarg0, x10);
++      // x10 is OSR buffer, move it to expected parameter location
++      __ mv(j_rarg0, x10);
 +
-+    // remove activation
-+    // get sender esp
-+    __ ld(esp,
-+        Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize));
-+    // remove frame anchor
-+    __ leave();
-+    // Ensure compiled code always sees stack at proper alignment
-+    __ andi(sp, esp, -16);
++      // remove activation
++      // get sender esp
++      __ ld(esp,
++          Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize));
++      // remove frame anchor
++      __ leave();
++      // Ensure compiled code always sees stack at proper alignment
++      __ andi(sp, esp, -16);
 +
-+    // and begin the OSR nmethod
-+    __ ld(t0, Address(x9, nmethod::osr_entry_point_offset()));
-+    __ jr(t0);
++      // and begin the OSR nmethod
++      __ ld(t0, Address(x9, nmethod::osr_entry_point_offset()));
++      __ jr(t0);
++    }
 +  }
 +}
 +
@@ -53981,7 +49193,7 @@ index 00000000000..d2a301c6e74
 +  const Register temp = x9;
 +  assert_different_registers(Rcache, index, temp);
 +
-+  Label resolved, clinit_barrier_slow;
++  Label resolved;
 +
 +  Bytecodes::Code code = bytecode();
 +  switch (code) {
@@ -53995,10 +49207,6 @@ index 00000000000..d2a301c6e74
 +  __ mv(t0, (int) code);
 +  __ beq(temp, t0, resolved);
 +
-+  // resolve first time through
-+  // Class initialization barrier slow path lands here as well.
-+  __ bind(clinit_barrier_slow);
-+
 +  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
 +  __ mv(temp, (int) code);
 +  __ call_VM(noreg, entry, temp);
@@ -54008,13 +49216,6 @@ index 00000000000..d2a301c6e74
 +  // n.b. unlike x86 Rcache is now rcpool plus the indexed offset
 +  // so all clients ofthis method must be modified accordingly
 +  __ bind(resolved);
-+
-+  // Class initialization barrier for static methods
-+  if (VM_Version::supports_fast_class_init_checks() && bytecode() == Bytecodes::_invokestatic) {
-+    __ load_resolved_method_at_index(byte_no, temp, Rcache);
-+    __ load_method_holder(temp, temp);
-+    __ clinit_barrier(temp, t0, NULL, &clinit_barrier_slow);
-+  }
 +}
 +
 +// The Rcache and index registers must be set before call
@@ -54921,6 +50122,7 @@ index 00000000000..d2a301c6e74
 +    // since the parameter_size includes it.
 +    __ push_reg(x9);
 +    __ mv(x9, index);
++    assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
 +    __ load_resolved_reference_at_index(index, x9);
 +    __ pop_reg(x9);
 +    __ push_reg(index);  // push appendix (MethodType, CallSite, etc.)
@@ -55104,7 +50306,9 @@ index 00000000000..d2a301c6e74
 +  __ profile_virtual_call(x13, x30, x9);
 +
 +  // Get declaring interface class from method, and itable index
-+  __ load_method_holder(x10, xmethod);
++  __ ld(x10, Address(xmethod, Method::const_offset()));
++  __ ld(x10, Address(x10, ConstMethod::constants_offset()));
++  __ ld(x10, Address(x10, ConstantPool::pool_holder_offset_in_bytes()));
 +  __ lwu(xmethod, Address(xmethod, Method::itable_index_offset()));
 +  __ subw(xmethod, xmethod, Method::itable_index_max);
 +  __ negw(xmethod, xmethod);
@@ -55291,9 +50495,13 @@ index 00000000000..d2a301c6e74
 +      __ bnez(x13, loop);
 +    }
 +
-+    // initialize object hader only.
++    // initialize object header only.
 +    __ bind(initialize_header);
-+    __ mv(t0, (intptr_t)markWord::prototype().value());
++    if (UseBiasedLocking) {
++      __ ld(t0, Address(x14, Klass::prototype_header_offset()));
++    } else {
++      __ mv(t0, (intptr_t)markOopDesc::prototype());
++    }
 +    __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes()));
 +    __ store_klass_gap(x10, zr);   // zero klass gap for compressed oops
 +    __ store_klass(x10, x14);      // store klass last
@@ -55302,7 +50510,7 @@ index 00000000000..d2a301c6e74
 +      SkipIfEqual skip(_masm, &DTraceAllocProbes, false);
 +      // Trigger dtrace event for fastpath
 +      __ push(atos); // save the return value
-+      __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast<int (*)(oopDesc*)>(SharedRuntime::dtrace_object_alloc)), x10);
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), x10);
 +      __ pop(atos); // restore the return value
 +    }
 +    __ j(done);
@@ -55685,7 +50893,7 @@ index 00000000000..d2a301c6e74
 +}
 diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.hpp b/src/hotspot/cpu/riscv/templateTable_riscv.hpp
 new file mode 100644
-index 00000000000..fcc86108d28
+index 0000000000..fcc86108d2
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/templateTable_riscv.hpp
 @@ -0,0 +1,42 @@
@@ -55731,14 +50939,14 @@ index 00000000000..fcc86108d28
 +static void index_check(Register array, Register index);
 +
 +#endif // CPU_RISCV_TEMPLATETABLE_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp b/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp
+diff --git a/src/hotspot/cpu/riscv/vmStructs_riscv.hpp b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp
 new file mode 100644
-index 00000000000..4f50adb05c3
+index 0000000000..6c89133de0
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp
-@@ -0,0 +1,33 @@
++++ b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp
+@@ -0,0 +1,42 @@
 +/*
-+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -55762,22 +50970,31 @@ index 00000000000..4f50adb05c3
 + *
 + */
 +
-+#include "precompiled.hpp"
-+#include "prims/universalNativeInvoker.hpp"
-+#include "utilities/debug.hpp"
++#ifndef CPU_RISCV_VMSTRUCTS_RISCV_HPP
++#define CPU_RISCV_VMSTRUCTS_RISCV_HPP
 +
-+address ProgrammableInvoker::generate_adapter(jobject jabi, jobject jlayout) {
-+  Unimplemented();
-+  return nullptr;
-+}
-diff --git a/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp b/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp
++// These are the CPU-specific fields, types and integer
++// constants required by the Serviceability Agent. This file is
++// referenced by vmStructs.cpp.
++
++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
++  volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*)
++
++#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
++
++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++
++#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++
++#endif // CPU_RISCV_VMSTRUCTS_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp
 new file mode 100644
-index 00000000000..ce70da72f2e
+index 0000000000..6bdce51506
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp
-@@ -0,0 +1,42 @@
++++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp
+@@ -0,0 +1,87 @@
 +/*
-+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -55802,31 +51019,76 @@ index 00000000000..ce70da72f2e
 + */
 +
 +#include "precompiled.hpp"
-+#include "prims/universalUpcallHandler.hpp"
-+#include "utilities/debug.hpp"
++#include "memory/allocation.hpp"
++#include "memory/allocation.inline.hpp"
++#include "runtime/os.inline.hpp"
++#include "vm_version_ext_riscv.hpp"
 +
-+address ProgrammableUpcallHandler::generate_upcall_stub(jobject jrec, jobject jabi, jobject jlayout) {
-+  Unimplemented();
-+  return nullptr;
++// VM_Version_Ext statics
++int VM_Version_Ext::_no_of_threads = 0;
++int VM_Version_Ext::_no_of_cores = 0;
++int VM_Version_Ext::_no_of_sockets = 0;
++bool VM_Version_Ext::_initialized = false;
++char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0};
++char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0};
++
++void VM_Version_Ext::initialize_cpu_information(void) {
++  // do nothing if cpu info has been initialized
++  if (_initialized) {
++    return;
++  }
++
++  _no_of_cores  = os::processor_count();
++  _no_of_threads = _no_of_cores;
++  _no_of_sockets = _no_of_cores;
++  snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64");
++  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string);
++  _initialized = true;
 +}
 +
-+address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject mh, Method* entry, jobject jabi, jobject jconv) {
-+  ShouldNotCallThis();
-+  return nullptr;
++int VM_Version_Ext::number_of_threads(void) {
++  initialize_cpu_information();
++  return _no_of_threads;
 +}
 +
-+bool ProgrammableUpcallHandler::supports_optimized_upcalls() {
-+  return false;
++int VM_Version_Ext::number_of_cores(void) {
++  initialize_cpu_information();
++  return _no_of_cores;
 +}
-diff --git a/src/hotspot/cpu/riscv/vmStructs_riscv.hpp b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp
++
++int VM_Version_Ext::number_of_sockets(void) {
++  initialize_cpu_information();
++  return _no_of_sockets;
++}
++
++const char* VM_Version_Ext::cpu_name(void) {
++  initialize_cpu_information();
++  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing);
++  if (NULL == tmp) {
++    return NULL;
++  }
++  strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE);
++  return tmp;
++}
++
++const char* VM_Version_Ext::cpu_description(void) {
++  initialize_cpu_information();
++  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing);
++  if (NULL == tmp) {
++    return NULL;
++  }
++  strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
++  return tmp;
++}
+diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp
 new file mode 100644
-index 00000000000..6c89133de02
+index 0000000000..711e4aeaf6
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp
-@@ -0,0 +1,42 @@
++++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp
+@@ -0,0 +1,55 @@
 +/*
-+ * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -55849,29 +51111,42 @@ index 00000000000..6c89133de02
 + *
 + */
 +
-+#ifndef CPU_RISCV_VMSTRUCTS_RISCV_HPP
-+#define CPU_RISCV_VMSTRUCTS_RISCV_HPP
++#ifndef CPU_RISCV_VM_VERSION_EXT_RISCV_HPP
++#define CPU_RISCV_VM_VERSION_EXT_RISCV_HPP
 +
-+// These are the CPU-specific fields, types and integer
-+// constants required by the Serviceability Agent. This file is
-+// referenced by vmStructs.cpp.
++#include "runtime/vm_version.hpp"
++#include "utilities/macros.hpp"
 +
-+#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
-+  volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*)
++class VM_Version_Ext : public VM_Version {
++ private:
++  static const size_t      CPU_TYPE_DESC_BUF_SIZE = 256;
++  static const size_t      CPU_DETAILED_DESC_BUF_SIZE = 4096;
 +
-+#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
++  static int               _no_of_threads;
++  static int               _no_of_cores;
++  static int               _no_of_sockets;
++  static bool              _initialized;
++  static char              _cpu_name[CPU_TYPE_DESC_BUF_SIZE];
++  static char              _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE];
 +
-+#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++ public:
++  static int number_of_threads(void);
++  static int number_of_cores(void);
++  static int number_of_sockets(void);
 +
-+#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++  static const char* cpu_name(void);
++  static const char* cpu_description(void);
++  static void initialize_cpu_information(void);
 +
-+#endif // CPU_RISCV_VMSTRUCTS_RISCV_HPP
++};
++
++#endif // CPU_RISCV_VM_VERSION_EXT_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
 new file mode 100644
-index 00000000000..768c7633ca6
+index 0000000000..0e8f526bd9
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-@@ -0,0 +1,230 @@
+@@ -0,0 +1,209 @@
 +/*
 + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
@@ -55907,11 +51182,19 @@ index 00000000000..768c7633ca6
 +#include OS_HEADER_INLINE(os)
 +
 +const char* VM_Version::_uarch = "";
++const char* VM_Version::_vm_mode = "";
 +uint32_t VM_Version::_initial_vector_length = 0;
 +
 +void VM_Version::initialize() {
 +  get_os_cpu_info();
 +
++  // check if satp.mode is supported, currently supports up to SV48(RV64)
++  if (get_satp_mode() > VM_SV48) {
++    vm_exit_during_initialization(
++      err_msg("Unsupported satp mode: %s. Only satp modes up to sv48 are supported for now.",
++              _vm_mode));
++  }
++
 +  if (FLAG_IS_DEFAULT(UseFMA)) {
 +    FLAG_SET_DEFAULT(UseFMA, true);
 +  }
@@ -55956,11 +51239,6 @@ index 00000000000..768c7633ca6
 +    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
 +  }
 +
-+  if (UseSHA3Intrinsics) {
-+    warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
-+    FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
-+  }
-+
 +  if (UseCRC32Intrinsics) {
 +    warning("CRC32 intrinsics are not available on this CPU.");
 +    FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
@@ -55971,11 +51249,6 @@ index 00000000000..768c7633ca6
 +    FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
 +  }
 +
-+  if (UseMD5Intrinsics) {
-+    warning("MD5 intrinsics are not available on this CPU.");
-+    FLAG_SET_DEFAULT(UseMD5Intrinsics, false);
-+  }
-+
 +  if (UseRVV) {
 +    if (!(_features & CPU_V)) {
 +      warning("RVV is not supported on this CPU");
@@ -55986,11 +51259,6 @@ index 00000000000..768c7633ca6
 +    }
 +  }
 +
-+  if (UseRVB && !(_features & CPU_B)) {
-+    warning("RVB is not supported on this CPU");
-+    FLAG_SET_DEFAULT(UseRVB, false);
-+  }
-+
 +  if (UseRVC && !(_features & CPU_C)) {
 +    warning("RVC is not supported on this CPU");
 +    FLAG_SET_DEFAULT(UseRVC, false);
@@ -56000,7 +51268,7 @@ index 00000000000..768c7633ca6
 +    FLAG_SET_DEFAULT(AvoidUnalignedAccesses, true);
 +  }
 +
-+  if (UseRVB) {
++  if (UseZbb) {
 +    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
 +      FLAG_SET_DEFAULT(UsePopCountInstruction, true);
 +    }
@@ -56021,6 +51289,10 @@ index 00000000000..768c7633ca6
 +#ifdef COMPILER2
 +  c2_initialize();
 +#endif // COMPILER2
++
++  UNSUPPORTED_OPTION(CriticalJNINatives);
++
++  FLAG_SET_DEFAULT(UseMembar, true);
 +}
 +
 +#ifdef COMPILER2
@@ -56041,10 +51313,6 @@ index 00000000000..768c7633ca6
 +    FLAG_SET_DEFAULT(MaxVectorSize, 0);
 +  }
 +
-+  if (!UseRVV) {
-+    FLAG_SET_DEFAULT(UseRVVForBigIntegerShiftIntrinsics, false);
-+  }
-+
 +  if (UseRVV) {
 +    if (FLAG_IS_DEFAULT(MaxVectorSize)) {
 +      MaxVectorSize = _initial_vector_length;
@@ -56088,26 +51356,12 @@ index 00000000000..768c7633ca6
 +  }
 +}
 +#endif // COMPILER2
-+
-+void VM_Version::initialize_cpu_information(void) {
-+  // do nothing if cpu info has been initialized
-+  if (_initialized) {
-+    return;
-+  }
-+
-+  _no_of_cores  = os::processor_count();
-+  _no_of_threads = _no_of_cores;
-+  _no_of_sockets = _no_of_cores;
-+  snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64");
-+  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string);
-+  _initialized = true;
-+}
 diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
 new file mode 100644
-index 00000000000..8e35530359a
+index 0000000000..875511f522
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
-@@ -0,0 +1,72 @@
+@@ -0,0 +1,80 @@
 +/*
 + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
@@ -56148,18 +51402,27 @@ index 00000000000..8e35530359a
 +  static void c2_initialize();
 +#endif // COMPILER2
 +
++// VM modes (satp.mode) privileged ISA 1.10
++enum VM_MODE {
++  VM_MBARE = 0,
++  VM_SV39  = 8,
++  VM_SV48  = 9,
++  VM_SV57  = 10,
++  VM_SV64  = 11
++};
++
 +protected:
 +  static const char* _uarch;
++  static const char* _vm_mode;
 +  static uint32_t _initial_vector_length;
 +  static void get_os_cpu_info();
 +  static uint32_t get_current_vector_length();
++  static VM_MODE get_satp_mode();
 +
 +public:
 +  // Initialization
 +  static void initialize();
 +
-+  constexpr static bool supports_stack_watermark_barrier() { return true; }
-+
 +  enum Feature_Flag {
 +#define CPU_FEATURE_FLAGS(decl)               \
 +    decl(I,            "i",            8)     \
@@ -56168,8 +51431,7 @@ index 00000000000..8e35530359a
 +    decl(F,            "f",            5)     \
 +    decl(D,            "d",            3)     \
 +    decl(C,            "c",            2)     \
-+    decl(V,            "v",           21)     \
-+    decl(B,            "b",            1)
++    decl(V,            "v",           21)
 +
 +#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1 << bit),
 +    CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
@@ -56182,10 +51444,10 @@ index 00000000000..8e35530359a
 +#endif // CPU_RISCV_VM_VERSION_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
 new file mode 100644
-index 00000000000..aa7222dc64a
+index 0000000000..c4338715f9
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
-@@ -0,0 +1,64 @@
+@@ -0,0 +1,51 @@
 +/*
 + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
@@ -56228,34 +51490,21 @@ index 00000000000..aa7222dc64a
 +  FloatRegister freg = ::as_FloatRegister(0);
 +  for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
 +    for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) {
-+      regName[i++] = reg->name();
++      regName[i++] = freg->name();
 +    }
 +    freg = freg->successor();
 +  }
 +
-+  VectorRegister vreg = ::as_VectorRegister(0);
-+  for ( ; i < ConcreteRegisterImpl::max_vpr ; ) {
-+    for (int j = 0 ; j < VectorRegisterImpl::max_slots_per_register ; j++) {
-+      regName[i++] = reg->name();
-+    }
-+    vreg = vreg->successor();
-+  }
-+
 +  for ( ; i < ConcreteRegisterImpl::number_of_registers ; i++) {
-+    regName[i] = "NON-GPR-FPR-VPR";
++    regName[i] = "NON-GPR-FPR";
 +  }
 +}
-+
-+VMReg VMRegImpl::vmStorageToVMReg(int type, int index) {
-+  Unimplemented();
-+  return VMRegImpl::Bad();
-+}
 diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.hpp
 new file mode 100644
-index 00000000000..9e611b1f671
+index 0000000000..6f613a8f11
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/vmreg_riscv.hpp
-@@ -0,0 +1,68 @@
+@@ -0,0 +1,53 @@
 +/*
 + * Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
@@ -56292,10 +51541,6 @@ index 00000000000..9e611b1f671
 +  return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr;
 +}
 +
-+inline bool is_VectorRegister() {
-+  return value() >= ConcreteRegisterImpl::max_fpr && value() < ConcreteRegisterImpl::max_vpr;
-+}
-+
 +inline Register as_Register() {
 +  assert(is_Register(), "must be");
 +  return ::as_Register(value() / RegisterImpl::max_slots_per_register);
@@ -56307,26 +51552,15 @@ index 00000000000..9e611b1f671
 +                            FloatRegisterImpl::max_slots_per_register);
 +}
 +
-+inline VectorRegister as_VectorRegister() {
-+  assert(is_VectorRegister() && ((value() & (VectorRegisterImpl::max_slots_per_register - 1)) == 0), "must be");
-+  return ::as_VectorRegister((value() - ConcreteRegisterImpl::max_fpr) /
-+                             VectorRegisterImpl::max_slots_per_register);
-+}
-+
 +inline bool is_concrete() {
 +  assert(is_reg(), "must be");
-+  if (is_VectorRegister()) {
-+    int base = value() - ConcreteRegisterImpl::max_fpr;
-+    return (base % VectorRegisterImpl::max_slots_per_register) == 0;
-+  } else {
-+    return is_even(value());
-+  }
++  return is_even(value());
 +}
 +
 +#endif // CPU_RISCV_VMREG_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp
 new file mode 100644
-index 00000000000..06b70020b4b
+index 0000000000..06b70020b4
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp
 @@ -0,0 +1,46 @@
@@ -56378,7 +51612,7 @@ index 00000000000..06b70020b4b
 +#endif // CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP
 diff --git a/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp
 new file mode 100644
-index 00000000000..78b81138003
+index 0000000000..0d205240a5
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp
 @@ -0,0 +1,260 @@
@@ -56555,7 +51789,7 @@ index 00000000000..78b81138003
 +  assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
 +
 +  // Entry arguments:
-+  //  t2: CompiledICHolder
++  //  t1: CompiledICHolder
 +  //  j_rarg0: Receiver
 +
 +  // This stub is called from compiled code which has no callee-saved registers,
@@ -56642,59 +51876,11 @@ index 00000000000..78b81138003
 +  const unsigned int icache_line_size = wordSize;
 +  return icache_line_size;
 +}
-diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
-index 897be2209e2..ee298f56653 100644
---- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
-+++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
-@@ -1,6 +1,6 @@
- /*
-- * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2016, 2019, SAP SE. All rights reserved.
-+ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2016, 2019 SAP SE. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -1447,7 +1447,10 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op
- }
- 
- // result = condition ? opr1 : opr2
--void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
-+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
-+                          LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
-+  assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on s390");
-+
-   Assembler::branch_condition acond = Assembler::bcondEqual, ncond = Assembler::bcondNotEqual;
-   switch (condition) {
-     case lir_cond_equal:        acond = Assembler::bcondEqual;    ncond = Assembler::bcondNotEqual; break;
-diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
-index cee3140f4f7..82e9de5a06f 100644
---- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
-+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -1970,7 +1970,10 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
-   }
- }
- 
--void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
-+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
-+                          LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
-+  assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on x86");
-+
-   Assembler::Condition acond, ncond;
-   switch (condition) {
-     case lir_cond_equal:        acond = Assembler::equal;        ncond = Assembler::notEqual;     break;
 diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
-index 3799adf5dd9..6f75e623a9a 100644
+index 2842a11f92..208a374eea 100644
 --- a/src/hotspot/os/linux/os_linux.cpp
 +++ b/src/hotspot/os/linux/os_linux.cpp
-@@ -2845,6 +2845,8 @@ void os::get_summary_cpu_info(char* cpuinfo, size_t length) {
+@@ -2829,6 +2829,8 @@ void os::get_summary_cpu_info(char* cpuinfo, size_t length) {
    strncpy(cpuinfo, "IA64", length);
  #elif defined(PPC)
    strncpy(cpuinfo, "PPC64", length);
@@ -56703,9 +51889,19 @@ index 3799adf5dd9..6f75e623a9a 100644
  #elif defined(S390)
    strncpy(cpuinfo, "S390", length);
  #elif defined(SPARC)
+@@ -4060,7 +4062,8 @@ size_t os::Linux::find_large_page_size() {
+     IA64_ONLY(256 * M)
+     PPC_ONLY(4 * M)
+     S390_ONLY(1 * M)
+-    SPARC_ONLY(4 * M);
++    SPARC_ONLY(4 * M)
++    RISCV64_ONLY(2 * M);
+ #endif // ZERO
+ 
+   FILE *fp = fopen("/proc/meminfo", "r");
 diff --git a/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp
 new file mode 100644
-index 00000000000..f2610af6cdd
+index 0000000000..f2610af6cd
 --- /dev/null
 +++ b/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp
 @@ -0,0 +1,26 @@
@@ -56737,10 +51933,10 @@ index 00000000000..f2610af6cdd
 +// nothing required here
 diff --git a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
 new file mode 100644
-index 00000000000..761da5d743e
+index 0000000000..4a1ebee8b0
 --- /dev/null
 +++ b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
-@@ -0,0 +1,134 @@
+@@ -0,0 +1,189 @@
 +/*
 + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
@@ -56776,26 +51972,96 @@ index 00000000000..761da5d743e
 +// Note that memory_order_conservative requires a full barrier after atomic stores.
 +// See https://patchwork.kernel.org/patch/3575821/
 +
++#if defined(__clang_major__)
++#define FULL_COMPILER_ATOMIC_SUPPORT
++#elif (__GNUC__ > 13) || ((__GNUC__ == 13) && (__GNUC_MINOR__ >= 2))
++#define FULL_COMPILER_ATOMIC_SUPPORT
++#endif
++
++#define FULL_MEM_BARRIER  __sync_synchronize()
++#define READ_MEM_BARRIER  __atomic_thread_fence(__ATOMIC_ACQUIRE);
++#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
++
 +template<size_t byte_size>
-+struct Atomic::PlatformAdd {
-+  template<typename D, typename I>
-+  D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const {
++struct Atomic::PlatformAdd
++  : Atomic::FetchAndAdd<Atomic::PlatformAdd<byte_size> >
++{
++  template<typename I, typename D>
++  D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const {
++#ifndef FULL_COMPILER_ATOMIC_SUPPORT
++    // If we add add and fetch for sub word and are using older compiler
++    // it must be added here due to not using lib atomic.
++    STATIC_ASSERT(byte_size >= 4);
++#endif
++
 +    D res = __atomic_add_fetch(dest, add_value, __ATOMIC_RELEASE);
 +    FULL_MEM_BARRIER;
 +    return res;
 +  }
 +
-+  template<typename D, typename I>
-+  D fetch_and_add(D volatile* dest, I add_value, atomic_memory_order order) const {
-+    return add_and_fetch(dest, add_value, order) - add_value;
++  template<typename I, typename D>
++  D fetch_and_add(I add_value, D volatile* dest, atomic_memory_order order) const {
++    return add_and_fetch(add_value, dest, order) - add_value;
 +  }
 +};
 +
++#ifndef FULL_COMPILER_ATOMIC_SUPPORT
++template<>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
++                                                T volatile* dest __attribute__((unused)),
++                                                T compare_value,
++                                                atomic_memory_order order) const {
++  STATIC_ASSERT(1 == sizeof(T));
++
++  if (order != memory_order_relaxed) {
++    FULL_MEM_BARRIER;
++  }
++
++  uint32_t volatile* aligned_dst = (uint32_t volatile*)(((uintptr_t)dest) & (~((uintptr_t)0x3)));
++  int shift = 8 * (((uintptr_t)dest) - ((uintptr_t)aligned_dst)); // 0, 8, 16, 24
++
++  uint64_t mask = 0xfful << shift; // 0x00000000..FF..
++  uint64_t remask = ~mask;         // 0xFFFFFFFF..00..
++
++  uint64_t w_cv = ((uint64_t)(unsigned char)compare_value) << shift;  // widen to 64-bit 0x00000000..CC..
++  uint64_t w_ev = ((uint64_t)(unsigned char)exchange_value) << shift; // widen to 64-bit 0x00000000..EE..
++
++  uint64_t old_value;
++  uint64_t rc_temp;
++
++  __asm__ __volatile__ (
++    "1:  lr.w      %0, %2      \n\t"
++    "    and       %1, %0, %5  \n\t" // ignore unrelated bytes and widen to 64-bit 0x00000000..XX..
++    "    bne       %1, %3, 2f  \n\t" // compare 64-bit w_cv
++    "    and       %1, %0, %6  \n\t" // remove old byte
++    "    or        %1, %1, %4  \n\t" // add new byte
++    "    sc.w      %1, %1, %2  \n\t" // store new word
++    "    bnez      %1, 1b      \n\t"
++    "2:                        \n\t"
++    : /*%0*/"=&r" (old_value), /*%1*/"=&r" (rc_temp), /*%2*/"+A" (*aligned_dst)
++    : /*%3*/"r" (w_cv), /*%4*/"r" (w_ev), /*%5*/"r" (mask), /*%6*/"r" (remask)
++    : "memory" );
++
++  if (order != memory_order_relaxed) {
++    FULL_MEM_BARRIER;
++  }
++
++  return (T)((old_value & mask) >> shift);
++}
++#endif
++
 +template<size_t byte_size>
 +template<typename T>
-+inline T Atomic::PlatformXchg<byte_size>::operator()(T volatile* dest,
-+                                                     T exchange_value,
++inline T Atomic::PlatformXchg<byte_size>::operator()(T exchange_value,
++                                                     T volatile* dest,
 +                                                     atomic_memory_order order) const {
++#ifndef FULL_COMPILER_ATOMIC_SUPPORT
++  // If we add xchg for sub word and are using older compiler
++  // it must be added here due to not using lib atomic.
++  STATIC_ASSERT(byte_size >= 4);
++#endif
++
 +  STATIC_ASSERT(byte_size == sizeof(T));
 +  T res = __atomic_exchange_n(dest, exchange_value, __ATOMIC_RELEASE);
 +  FULL_MEM_BARRIER;
@@ -56805,10 +52071,15 @@ index 00000000000..761da5d743e
 +// __attribute__((unused)) on dest is to get rid of spurious GCC warnings.
 +template<size_t byte_size>
 +template<typename T>
-+inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T volatile* dest __attribute__((unused)),
++inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T exchange_value,
++                                                        T volatile* dest __attribute__((unused)),
 +                                                        T compare_value,
-+                                                        T exchange_value,
 +                                                        atomic_memory_order order) const {
++
++#ifndef FULL_COMPILER_ATOMIC_SUPPORT
++  STATIC_ASSERT(byte_size >= 4);
++#endif
++
 +  STATIC_ASSERT(byte_size == sizeof(T));
 +  T value = compare_value;
 +  if (order != memory_order_relaxed) {
@@ -56826,9 +52097,9 @@ index 00000000000..761da5d743e
 +
 +template<>
 +template<typename T>
-+inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest __attribute__((unused)),
++inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
++                                                T volatile* dest __attribute__((unused)),
 +                                                T compare_value,
-+                                                T exchange_value,
 +                                                atomic_memory_order order) const {
 +  STATIC_ASSERT(4 == sizeof(T));
 +  if (order != memory_order_relaxed) {
@@ -56853,31 +52124,11 @@ index 00000000000..761da5d743e
 +  return rv;
 +}
 +
-+template<size_t byte_size>
-+struct Atomic::PlatformOrderedLoad<byte_size, X_ACQUIRE>
-+{
-+  template <typename T>
-+  T operator()(const volatile T* p) const { T data; __atomic_load(const_cast<T*>(p), &data, __ATOMIC_ACQUIRE); return data; }
-+};
-+
-+template<size_t byte_size>
-+struct Atomic::PlatformOrderedStore<byte_size, RELEASE_X>
-+{
-+  template <typename T>
-+  void operator()(volatile T* p, T v) const { __atomic_store(const_cast<T*>(p), &v, __ATOMIC_RELEASE); }
-+};
-+
-+template<size_t byte_size>
-+struct Atomic::PlatformOrderedStore<byte_size, RELEASE_X_FENCE>
-+{
-+  template <typename T>
-+  void operator()(volatile T* p, T v) const { release_store(p, v); OrderAccess::fence(); }
-+};
-+
++#undef FULL_COMPILER_ATOMIC_SUPPORT
 +#endif // OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
 diff --git a/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp
 new file mode 100644
-index 00000000000..28868c76406
+index 0000000000..28868c7640
 --- /dev/null
 +++ b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp
 @@ -0,0 +1,45 @@
@@ -56926,12 +52177,12 @@ index 00000000000..28868c76406
 +}
 +
 +#endif // OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp
+diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp
 new file mode 100644
-index 00000000000..147cfdf3c10
+index 0000000000..bdf36d6b4c
 --- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp
-@@ -0,0 +1,31 @@
++++ b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp
+@@ -0,0 +1,124 @@
 +/*
 + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
@@ -56957,63 +52208,108 @@ index 00000000000..147cfdf3c10
 + *
 + */
 +
-+#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP
-+#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP
++#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP
++#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP
 +
-+// Empty for build system
++static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  (void)memmove(to, from, count * HeapWordSize);
++}
 +
-+#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp
-new file mode 100644
-index 00000000000..1aa58f27871
---- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp
-@@ -0,0 +1,42 @@
-+/*
-+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  switch (count) {
++    case 8:  to[7] = from[7];   // fall through
++    case 7:  to[6] = from[6];   // fall through
++    case 6:  to[5] = from[5];   // fall through
++    case 5:  to[4] = from[4];   // fall through
++    case 4:  to[3] = from[3];   // fall through
++    case 3:  to[2] = from[2];   // fall through
++    case 2:  to[1] = from[1];   // fall through
++    case 1:  to[0] = from[0];   // fall through
++    case 0:  break;
++    default:
++      memcpy(to, from, count * HeapWordSize);
++      break;
++  }
++}
 +
-+#ifndef OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP
-+#define OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP
++static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
++  switch (count) {
++    case 8:  to[7] = from[7];
++    case 7:  to[6] = from[6];
++    case 6:  to[5] = from[5];
++    case 5:  to[4] = from[4];
++    case 4:  to[3] = from[3];
++    case 3:  to[2] = from[2];
++    case 2:  to[1] = from[1];
++    case 1:  to[0] = from[0];
++    case 0:  break;
++    default:
++      while (count-- > 0) {
++        *to++ = *from++;
++      }
++      break;
++  }
++}
 +
-+#include <sys/syscall.h>
++static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_words(from, to, count);
++}
 +
-+//
-+// Support for building on older Linux systems
-+//
++static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_disjoint_words(from, to, count);
++}
 +
-+#ifndef SYS_memfd_create
-+#define SYS_memfd_create     279
-+#endif
-+#ifndef SYS_fallocate
-+#define SYS_fallocate        47
-+#endif
++static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
++  (void)memmove(to, from, count);
++}
++
++static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
++  pd_conjoint_bytes(from, to, count);
++}
++
++static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
++  _Copy_conjoint_jshorts_atomic(from, to, count);
++}
++
++static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
++  _Copy_conjoint_jints_atomic(from, to, count);
++}
++
++static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
++  _Copy_conjoint_jlongs_atomic(from, to, count);
++}
++
++static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
++  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size.");
++  _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count);
++}
++
++static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
++  _Copy_arrayof_conjoint_bytes(from, to, count);
++}
++
++static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
++  _Copy_arrayof_conjoint_jshorts(from, to, count);
++}
 +
-+#endif // OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP
++static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
++  _Copy_arrayof_conjoint_jints(from, to, count);
++}
++
++static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
++  _Copy_arrayof_conjoint_jlongs(from, to, count);
++}
++
++static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
++  assert(!UseCompressedOops, "foo!");
++  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
++  _Copy_arrayof_conjoint_jlongs(from, to, count);
++}
++
++#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP
 diff --git a/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp
 new file mode 100644
-index 00000000000..297414bfcd5
+index 0000000000..297414bfcd
 --- /dev/null
 +++ b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp
 @@ -0,0 +1,43 @@
@@ -57062,10 +52358,10 @@ index 00000000000..297414bfcd5
 +#endif // OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP
 diff --git a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
 new file mode 100644
-index 00000000000..1c33dc1e87f
+index 0000000000..5b5d35553f
 --- /dev/null
 +++ b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
-@@ -0,0 +1,63 @@
+@@ -0,0 +1,74 @@
 +/*
 + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
@@ -57105,10 +52401,6 @@ index 00000000000..1c33dc1e87f
 +inline void OrderAccess::loadstore()  { acquire(); }
 +inline void OrderAccess::storeload()  { fence(); }
 +
-+#define FULL_MEM_BARRIER  __sync_synchronize()
-+#define READ_MEM_BARRIER  __atomic_thread_fence(__ATOMIC_ACQUIRE);
-+#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
-+
 +inline void OrderAccess::acquire() {
 +  READ_MEM_BARRIER;
 +}
@@ -57121,20 +52413,35 @@ index 00000000000..1c33dc1e87f
 +  FULL_MEM_BARRIER;
 +}
 +
-+inline void OrderAccess::cross_modify_fence_impl() {
-+  asm volatile("fence.i" : : : "memory");
-+  if (UseConservativeFence) {
-+    asm volatile("fence ir, ir" : : : "memory");
-+  }
-+}
++
++template<size_t byte_size>
++struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE>
++{
++  template <typename T>
++  T operator()(const volatile T* p) const { T data; __atomic_load(const_cast<T*>(p), &data, __ATOMIC_ACQUIRE); return data; }
++};
++
++template<size_t byte_size>
++struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X>
++{
++  template <typename T>
++  void operator()(T v, volatile T* p) const { __atomic_store(const_cast<T*>(p), &v, __ATOMIC_RELEASE); }
++};
++
++template<size_t byte_size>
++struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X_FENCE>
++{
++  template <typename T>
++  void operator()(T v, volatile T* p) const { release_store(p, v); OrderAccess::fence(); }
++};
 +
 +#endif // OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
 diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
 new file mode 100644
-index 00000000000..1f46bbab0a2
+index 0000000000..8b772892b4
 --- /dev/null
 +++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
-@@ -0,0 +1,466 @@
+@@ -0,0 +1,624 @@
 +/*
 + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
@@ -57174,6 +52481,7 @@ index 00000000000..1f46bbab0a2
 +#include "prims/jniFastGetField.hpp"
 +#include "prims/jvm_misc.hpp"
 +#include "runtime/arguments.hpp"
++#include "runtime/extendedPC.hpp"
 +#include "runtime/frame.inline.hpp"
 +#include "runtime/interfaceSupport.inline.hpp"
 +#include "runtime/java.hpp"
@@ -57185,7 +52493,6 @@ index 00000000000..1f46bbab0a2
 +#include "runtime/stubRoutines.hpp"
 +#include "runtime/thread.inline.hpp"
 +#include "runtime/timer.hpp"
-+#include "signals_posix.hpp"
 +#include "utilities/debug.hpp"
 +#include "utilities/events.hpp"
 +#include "utilities/vmError.hpp"
@@ -57223,11 +52530,11 @@ index 00000000000..1f46bbab0a2
 +  return (char*) -1;
 +}
 +
-+address os::Posix::ucontext_get_pc(const ucontext_t * uc) {
++address os::Linux::ucontext_get_pc(const ucontext_t * uc) {
 +  return (address)uc->uc_mcontext.__gregs[REG_PC];
 +}
 +
-+void os::Posix::ucontext_set_pc(ucontext_t * uc, address pc) {
++void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) {
 +  uc->uc_mcontext.__gregs[REG_PC] = (intptr_t)pc;
 +}
 +
@@ -57239,13 +52546,29 @@ index 00000000000..1f46bbab0a2
 +  return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP];
 +}
 +
-+address os::fetch_frame_from_context(const void* ucVoid,
-+                                     intptr_t** ret_sp, intptr_t** ret_fp) {
-+  address epc;
++// For Forte Analyzer AsyncGetCallTrace profiling support - thread
++// is currently interrupted by SIGPROF.
++// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal
++// frames. Currently we don't do that on Linux, so it's the same as
++// os::fetch_frame_from_context().
++ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread,
++  const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) {
++
++  assert(thread != NULL, "just checking");
++  assert(ret_sp != NULL, "just checking");
++  assert(ret_fp != NULL, "just checking");
++
++  return os::fetch_frame_from_context(uc, ret_sp, ret_fp);
++}
++
++ExtendedPC os::fetch_frame_from_context(const void* ucVoid,
++                    intptr_t** ret_sp, intptr_t** ret_fp) {
++
++  ExtendedPC epc;
 +  const ucontext_t* uc = (const ucontext_t*)ucVoid;
 +
 +  if (uc != NULL) {
-+    epc = os::Posix::ucontext_get_pc(uc);
++    epc = ExtendedPC(os::Linux::ucontext_get_pc(uc));
 +    if (ret_sp != NULL) {
 +      *ret_sp = os::Linux::ucontext_get_sp(uc);
 +    }
@@ -57253,7 +52576,8 @@ index 00000000000..1f46bbab0a2
 +      *ret_fp = os::Linux::ucontext_get_fp(uc);
 +    }
 +  } else {
-+    epc = NULL;
++    // construct empty ExtendedPC for return value checking
++    epc = ExtendedPC(NULL);
 +    if (ret_sp != NULL) {
 +      *ret_sp = (intptr_t *)NULL;
 +    }
@@ -57265,23 +52589,51 @@ index 00000000000..1f46bbab0a2
 +  return epc;
 +}
 +
-+frame os::fetch_compiled_frame_from_context(const void* ucVoid) {
-+  const ucontext_t* uc = (const ucontext_t*)ucVoid;
-+  // In compiled code, the stack banging is performed before RA
-+  // has been saved in the frame. RA is live, and SP and FP
-+  // belong to the caller.
-+  intptr_t* frame_fp = os::Linux::ucontext_get_fp(uc);
-+  intptr_t* frame_sp = os::Linux::ucontext_get_sp(uc);
-+  address frame_pc = (address)(uc->uc_mcontext.__gregs[REG_LR]
-+                         - NativeInstruction::instruction_size);
-+  return frame(frame_sp, frame_fp, frame_pc);
-+}
-+
 +frame os::fetch_frame_from_context(const void* ucVoid) {
 +  intptr_t* frame_sp = NULL;
 +  intptr_t* frame_fp = NULL;
-+  address epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp);
-+  return frame(frame_sp, frame_fp, epc);
++  ExtendedPC epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp);
++  return frame(frame_sp, frame_fp, epc.pc());
++}
++
++bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) {
++  address pc = (address) os::Linux::ucontext_get_pc(uc);
++  if (Interpreter::contains(pc)) {
++    // interpreter performs stack banging after the fixed frame header has
++    // been generated while the compilers perform it before. To maintain
++    // semantic consistency between interpreted and compiled frames, the
++    // method returns the Java sender of the current frame.
++    *fr = os::fetch_frame_from_context(uc);
++    if (!fr->is_first_java_frame()) {
++      assert(fr->safe_for_sender(thread), "Safety check");
++      *fr = fr->java_sender();
++    }
++  } else {
++    // more complex code with compiled code
++    assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above");
++    CodeBlob* cb = CodeCache::find_blob(pc);
++    if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) {
++      // Not sure where the pc points to, fallback to default
++      // stack overflow handling
++      return false;
++    } else {
++      // In compiled code, the stack banging is performed before RA
++      // has been saved in the frame.  RA is live, and SP and FP
++      // belong to the caller.
++      intptr_t* fp = os::Linux::ucontext_get_fp(uc);
++      intptr_t* sp = os::Linux::ucontext_get_sp(uc);
++      address pc = (address)(uc->uc_mcontext.__gregs[REG_LR]
++                         - NativeInstruction::instruction_size);
++      *fr = frame(sp, fp, pc);
++      if (!fr->is_java_frame()) {
++        assert(fr->safe_for_sender(thread), "Safety check");
++        assert(!fr->is_first_frame(), "Safety check");
++        *fr = fr->java_sender();
++      }
++    }
++  }
++  assert(fr->is_java_frame(), "Safety check");
++  return true;
 +}
 +
 +// By default, gcc always saves frame pointer rfp on this stack. This
@@ -57309,31 +52661,138 @@ index 00000000000..1f46bbab0a2
 +}
 +
 +// Utility functions
-+bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
-+                                             ucontext_t* uc, JavaThread* thread) {
++extern "C" JNIEXPORT int
++JVM_handle_linux_signal(int sig,
++                        siginfo_t* info,
++                        void* ucVoid,
++                        int abort_if_unrecognized) {
++  ucontext_t* uc = (ucontext_t*) ucVoid;
++
++  Thread* t = Thread::current_or_null_safe();
++
++  // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away
++  // (no destructors can be run)
++  os::ThreadCrashProtection::check_crash_protection(sig, t);
++
++  SignalHandlerMark shm(t);
++
++  // Note: it's not uncommon that JNI code uses signal/sigset to install
++  // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
++  // or have a SIGILL handler when detecting CPU type). When that happens,
++  // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
++  // avoid unnecessary crash when libjsig is not preloaded, try handle signals
++  // that do not require siginfo/ucontext first.
++
++  if (sig == SIGPIPE || sig == SIGXFSZ) {
++    // allow chained handler to go first
++    if (os::Linux::chained_handler(sig, info, ucVoid)) {
++      return true;
++    } else {
++      // Ignoring SIGPIPE/SIGXFSZ - see bugs 4229104 or 6499219
++      return true;
++    }
++  }
++
++#ifdef CAN_SHOW_REGISTERS_ON_ASSERT
++  if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) {
++    if (handle_assert_poison_fault(ucVoid, info->si_addr)) {
++      return 1;
++    }
++  }
++#endif
++
++  JavaThread* thread = NULL;
++  VMThread* vmthread = NULL;
++  if (os::Linux::signal_handlers_are_installed) {
++    if (t != NULL ){
++      if(t->is_Java_thread()) {
++        thread = (JavaThread *) t;
++      }
++      else if(t->is_VM_thread()){
++        vmthread = (VMThread *)t;
++      }
++    }
++  }
++
++  // Handle SafeFetch faults
++  if ((sig == SIGSEGV || sig == SIGBUS) && uc != NULL) {
++    address const pc = (address) os::Linux::ucontext_get_pc(uc);
++    if (pc && StubRoutines::is_safefetch_fault(pc)) {
++      os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc));
++      return 1;
++    }
++  }
 +
 +  // decide if this trap can be handled by a stub
 +  address stub = NULL;
 +
-+  address pc = NULL;
++  address pc          = NULL;
 +
 +  //%note os_trap_1
 +  if (info != NULL && uc != NULL && thread != NULL) {
-+    pc = (address) os::Posix::ucontext_get_pc(uc);
-+
-+    address addr = (address) info->si_addr;
-+
-+    // Make sure the high order byte is sign extended, as it may be masked away by the hardware.
-+    if ((uintptr_t(addr) & (uintptr_t(1) << 55)) != 0) {
-+      addr = address(uintptr_t(addr) | (uintptr_t(0xFF) << 56));
-+    }
++    pc = (address) os::Linux::ucontext_get_pc(uc);
 +
 +    // Handle ALL stack overflow variations here
 +    if (sig == SIGSEGV) {
++      address addr = (address) info->si_addr;
++
 +      // check if fault address is within thread stack
-+      if (thread->is_in_full_stack(addr)) {
-+        if (os::Posix::handle_stack_overflow(thread, addr, pc, uc, &stub)) {
-+          return true; // continue
++      if (thread->on_local_stack(addr)) {
++        // stack overflow
++        if (thread->in_stack_yellow_reserved_zone(addr)) {
++          if (thread->thread_state() == _thread_in_Java) {
++            if (thread->in_stack_reserved_zone(addr)) {
++              frame fr;
++              if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) {
++                assert(fr.is_java_frame(), "Must be a Java frame");
++                frame activation =
++                  SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr);
++                if (activation.sp() != NULL) {
++                  thread->disable_stack_reserved_zone();
++                  if (activation.is_interpreted_frame()) {
++                    thread->set_reserved_stack_activation((address)(
++                      activation.fp() + frame::interpreter_frame_initial_sp_offset));
++                  } else {
++                    thread->set_reserved_stack_activation((address)activation.unextended_sp());
++                  }
++                  return 1;
++                }
++              }
++            }
++            // Throw a stack overflow exception.  Guard pages will be reenabled
++            // while unwinding the stack.
++            thread->disable_stack_yellow_reserved_zone();
++            stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
++          } else {
++            // Thread was in the vm or native code.  Return and try to finish.
++            thread->disable_stack_yellow_reserved_zone();
++            return 1;
++          }
++        } else if (thread->in_stack_red_zone(addr)) {
++          // Fatal red zone violation.  Disable the guard pages and fall through
++          // to handle_unexpected_exception way down below.
++          thread->disable_stack_red_zone();
++          tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
++
++          // This is a likely cause, but hard to verify. Let's just print
++          // it as a hint.
++          tty->print_raw_cr("Please check if any of your loaded .so files has "
++                            "enabled executable stack (see man page execstack(8))");
++        } else {
++          // Accessing stack address below sp may cause SEGV if current
++          // thread has MAP_GROWSDOWN stack. This should only happen when
++          // current thread was created by user code with MAP_GROWSDOWN flag
++          // and then attached to VM. See notes in os_linux.cpp.
++          if (thread->osthread()->expanding_stack() == 0) {
++             thread->osthread()->set_expanding_stack();
++             if (os::Linux::manually_expand_stack(thread, addr)) {
++               thread->osthread()->clear_expanding_stack();
++               return 1;
++             }
++             thread->osthread()->clear_expanding_stack();
++          } else {
++             fatal("recursive segv. expanding stack.");
++          }
 +        }
 +      }
 +    }
@@ -57349,7 +52808,7 @@ index 00000000000..1f46bbab0a2
 +          tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL");
 +        }
 +        stub = SharedRuntime::get_handle_wrong_method_stub();
-+      } else if (sig == SIGSEGV && SafepointMechanism::is_poll_address((address)info->si_addr)) {
++      } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) {
 +        stub = SharedRuntime::get_poll_stub(pc);
 +      } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
 +        // BugId 4454115: A read from a MappedByteBuffer can fault
@@ -57357,34 +52816,12 @@ index 00000000000..1f46bbab0a2
 +        // Do not crash the VM in such a case.
 +        CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
 +        CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
-+        bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc));
-+        if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) {
++        if (nm != NULL && nm->has_unsafe_access()) {
 +          address next_pc = pc + NativeCall::instruction_size;
-+          if (is_unsafe_arraycopy) {
-+            next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
-+          }
 +          stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
 +        }
-+      } else if (sig == SIGILL && nativeInstruction_at(pc)->is_stop()) {
-+        // Pull a pointer to the error message out of the instruction
-+        // stream.
-+        const uint64_t *detail_msg_ptr
-+          = (uint64_t*)(pc + NativeInstruction::instruction_size);
-+        const char *detail_msg = (const char *)*detail_msg_ptr;
-+        const char *msg = "stop";
-+        if (TraceTraps) {
-+          tty->print_cr("trap: %s: (SIGILL)", msg);
-+        }
-+
-+        // End life with a fatal error, message and detail message and the context.
-+        // Note: no need to do any post-processing here (e.g. signal chaining)
-+        va_list va_dummy;
-+        VMError::report_and_die(thread, uc, NULL, 0, msg, detail_msg, va_dummy);
-+        va_end(va_dummy);
-+
-+        ShouldNotReachHere();
 +      } else if (sig == SIGFPE  &&
-+          (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) {
++                 (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) {
 +        stub =
 +          SharedRuntime::
 +          continuation_for_implicit_exception(thread,
@@ -57392,42 +52829,70 @@ index 00000000000..1f46bbab0a2
 +                                              SharedRuntime::
 +                                              IMPLICIT_DIVIDE_BY_ZERO);
 +      } else if (sig == SIGSEGV &&
-+                 MacroAssembler::uses_implicit_null_check((void*)addr)) {
++               !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) {
 +          // Determination of interpreter/vtable stub/compiled code null exception
 +          stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
 +      }
-+    } else if ((thread->thread_state() == _thread_in_vm ||
-+                thread->thread_state() == _thread_in_native) &&
-+                sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
-+                thread->doing_unsafe_access()) {
++    } else if (thread->thread_state() == _thread_in_vm &&
++               sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
++               thread->doing_unsafe_access()) {
 +      address next_pc = pc + NativeCall::instruction_size;
-+      if (UnsafeCopyMemory::contains_pc(pc)) {
-+        next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
-+      }
 +      stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
 +    }
 +
 +    // jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
 +    // and the heap gets shrunk before the field access.
 +    if ((sig == SIGSEGV) || (sig == SIGBUS)) {
-+      address addr_slow = JNI_FastGetField::find_slowcase_pc(pc);
-+      if (addr_slow != (address)-1) {
-+        stub = addr_slow;
++      address addr = JNI_FastGetField::find_slowcase_pc(pc);
++      if (addr != (address)-1) {
++        stub = addr;
 +      }
 +    }
++
++    // Check to see if we caught the safepoint code in the
++    // process of write protecting the memory serialization page.
++    // It write enables the page immediately after protecting it
++    // so we can just return to retry the write.
++    if ((sig == SIGSEGV) &&
++        os::is_memory_serialize_page(thread, (address) info->si_addr)) {
++      // Block current thread until the memory serialize page permission restored.
++      os::block_on_serialize_page_trap();
++      return true;
++    }
 +  }
 +
 +  if (stub != NULL) {
 +    // save all thread context in case we need to restore it
-+    if (thread != NULL) {
-+      thread->set_saved_exception_pc(pc);
-+    }
++    if (thread != NULL) thread->set_saved_exception_pc(pc);
 +
-+    os::Posix::ucontext_set_pc(uc, stub);
++    os::Linux::ucontext_set_pc(uc, stub);
 +    return true;
 +  }
 +
-+  return false; // Mute compiler
++  // signal-chaining
++  if (os::Linux::chained_handler(sig, info, ucVoid)) {
++     return true;
++  }
++
++  if (!abort_if_unrecognized) {
++    // caller wants another chance, so give it to him
++    return false;
++  }
++
++  if (pc == NULL && uc != NULL) {
++    pc = os::Linux::ucontext_get_pc(uc);
++  }
++
++  // unmask current signal
++  sigset_t newset;
++  sigemptyset(&newset);
++  sigaddset(&newset, sig);
++  sigprocmask(SIG_UNBLOCK, &newset, NULL);
++
++  VMError::report_and_die(t, sig, pc, info, ucVoid);
++
++  ShouldNotReachHere();
++  return true; // Mute compiler
 +}
 +
 +void os::Linux::init_thread_fpu_state(void) {
@@ -57490,7 +52955,7 @@ index 00000000000..1f46bbab0a2
 +  // Note: it may be unsafe to inspect memory near pc. For example, pc may
 +  // point to garbage if entry point in an nmethod is corrupted. Leave
 +  // this at the end, and hope for the best.
-+  address pc = os::Posix::ucontext_get_pc(uc);
++  address pc = os::Linux::ucontext_get_pc(uc);
 +  print_instructions(st, pc, sizeof(char));
 +  st->cr();
 +}
@@ -57603,10 +53068,10 @@ index 00000000000..1f46bbab0a2
 +};
 diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp
 new file mode 100644
-index 00000000000..6d415630661
+index 0000000000..f3e3a73bc5
 --- /dev/null
 +++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp
-@@ -0,0 +1,59 @@
+@@ -0,0 +1,40 @@
 +/*
 + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
@@ -57646,29 +53111,10 @@ index 00000000000..6d415630661
 +    *(jlong *) dst = *(const jlong *) src;
 +  }
 +
-+  // SYSCALL_RISCV_FLUSH_ICACHE is used to flush instruction cache. The "fence.i" instruction
-+  // only work on the current hart, so kernel provides the icache flush syscall to flush icache
-+  // on each hart. You can pass a flag to determine a global or local icache flush.
-+  static void icache_flush(long int start, long int end)
-+  {
-+    const int SYSCALL_RISCV_FLUSH_ICACHE = 259;
-+    register long int __a7 asm ("a7") = SYSCALL_RISCV_FLUSH_ICACHE;
-+    register long int __a0 asm ("a0") = start;
-+    register long int __a1 asm ("a1") = end;
-+    // the flush can be applied to either all threads or only the current.
-+    // 0 means a global icache flush, and the icache flush will be applied
-+    // to other harts concurrently executing.
-+    register long int __a2 asm ("a2") = 0;
-+    __asm__ volatile ("ecall\n\t"
-+                      : "+r" (__a0)
-+                      : "r" (__a0), "r" (__a1), "r" (__a2), "r" (__a7)
-+                      : "memory");
-+  }
-+
 +#endif // OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP
 diff --git a/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
 new file mode 100644
-index 00000000000..a6432c84ec7
+index 0000000000..2bd48e09c3
 --- /dev/null
 +++ b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
 @@ -0,0 +1,38 @@
@@ -57703,19 +53149,147 @@ index 00000000000..a6432c84ec7
 +#include "runtime/prefetch.hpp"
 +
 +
-+inline void Prefetch::read (const void *loc, intx interval) {
++inline void Prefetch::read (void *loc, intx interval) {
 +}
 +
 +inline void Prefetch::write(void *loc, intx interval) {
 +}
 +
 +#endif // OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP
+diff --git a/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.cpp b/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.cpp
+new file mode 100644
+index 0000000000..ffcd819487
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.cpp
+@@ -0,0 +1,77 @@
++/*
++ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2023, Rivos Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "logging/log.hpp"
++#include "riscv_flush_icache.hpp"
++#include "runtime/os.hpp"
++#include "runtime/vm_version.hpp"
++#include "utilities/debug.hpp"
++
++#include <sys/syscall.h>
++#include <unistd.h>
++
++#define check_with_errno(check_type, cond, msg)                             \
++  do {                                                                      \
++    int err = errno;                                                        \
++    check_type(cond, "%s; error='%s' (errno=%s)", msg, os::strerror(err),   \
++               os::errno_name(err));                                        \
++} while (false)
++
++#define assert_with_errno(cond, msg)    check_with_errno(assert, cond, msg)
++#define guarantee_with_errno(cond, msg) check_with_errno(guarantee, cond, msg)
++
++#ifndef NR_riscv_flush_icache
++#ifndef NR_arch_specific_syscall
++#define NR_arch_specific_syscall 244
++#endif
++#define NR_riscv_flush_icache (NR_arch_specific_syscall + 15)
++#endif
++
++#define SYS_RISCV_FLUSH_ICACHE_LOCAL 1UL
++#define SYS_RISCV_FLUSH_ICACHE_ALL   0UL
++
++static long sys_flush_icache(uintptr_t start, uintptr_t end , uintptr_t flags) {
++  return syscall(NR_riscv_flush_icache, start, end, flags);
++}
++
++bool RiscvFlushIcache::test() {
++  ATTRIBUTE_ALIGNED(64) char memory[64];
++  long ret = sys_flush_icache((uintptr_t)&memory[0],
++                              (uintptr_t)&memory[sizeof(memory) - 1],
++                              SYS_RISCV_FLUSH_ICACHE_ALL);
++  if (ret == 0) {
++    return true;
++  }
++  int err = errno;                                                        \
++  log_error(os)("Syscall: RISCV_FLUSH_ICACHE not available; error='%s' (errno=%s)",
++                os::strerror(err), os::errno_name(err));
++  return false;
++}
++
++void RiscvFlushIcache::flush(uintptr_t start, uintptr_t end) {
++  long ret = sys_flush_icache(start, end, SYS_RISCV_FLUSH_ICACHE_ALL);
++  guarantee_with_errno(ret == 0, "riscv_flush_icache failed");
++}
+diff --git a/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.hpp b/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.hpp
+new file mode 100644
+index 0000000000..f4e7263b39
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.hpp
+@@ -0,0 +1,39 @@
++/*
++ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2023, Rivos Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_LINUX_RISCV_FLUSH_ICACHE_LINUX_HPP
++#define OS_LINUX_RISCV_FLUSH_ICACHE_LINUX_HPP
++
++#include "memory/allocation.hpp"
++#include "runtime/vm_version.hpp"
++#include "utilities/growableArray.hpp"
++
++class RiscvFlushIcache: public AllStatic {
++ public:
++  static bool test();
++  static void flush(uintptr_t start, uintptr_t end);
++};
++
++#endif // OS_LINUX_RISCV_FLUSH_ICACHE_LINUX_HPP
 diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
 new file mode 100644
-index 00000000000..3100572e9fd
+index 0000000000..ccceed643e
 --- /dev/null
 +++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
-@@ -0,0 +1,92 @@
+@@ -0,0 +1,100 @@
 +/*
 + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
@@ -57742,6 +53316,7 @@ index 00000000000..3100572e9fd
 + */
 +
 +#include "precompiled.hpp"
++#include "memory/metaspaceShared.hpp"
 +#include "runtime/frame.inline.hpp"
 +#include "runtime/thread.inline.hpp"
 +
@@ -57779,16 +53354,23 @@ index 00000000000..3100572e9fd
 +
 +    intptr_t* ret_fp = NULL;
 +    intptr_t* ret_sp = NULL;
-+    address addr = os::fetch_frame_from_context(uc, &ret_sp, &ret_fp);
-+    if (addr == NULL || ret_sp == NULL ) {
++    ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc,
++      &ret_sp, &ret_fp);
++    if (addr.pc() == NULL || ret_sp == NULL ) {
 +      // ucontext wasn't useful
 +      return false;
 +    }
 +
-+    frame ret_frame(ret_sp, ret_fp, addr);
-+    if (!ret_frame.safe_for_sender(this)) {
++    if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) {
++      // In the middle of a trampoline call. Bail out for safety.
++      // This happens rarely so shouldn't affect profiling.
++      return false;
++    }
++
++    frame ret_frame(ret_sp, ret_fp, addr.pc());
++    if (!ret_frame.safe_for_sender(this)) {
 +#ifdef COMPILER2
-+      frame ret_frame2(ret_sp, NULL, addr);
++      frame ret_frame2(ret_sp, NULL, addr.pc());
 +      if (!ret_frame2.safe_for_sender(this)) {
 +        // nothing else to try if the frame isn't good
 +        return false;
@@ -57810,10 +53392,10 @@ index 00000000000..3100572e9fd
 +void JavaThread::cache_global_variables() { }
 diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
 new file mode 100644
-index 00000000000..61e2cf85b63
+index 0000000000..4b91fa855a
 --- /dev/null
 +++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
-@@ -0,0 +1,48 @@
+@@ -0,0 +1,67 @@
 +/*
 + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
@@ -57850,21 +53432,40 @@ index 00000000000..61e2cf85b63
 +  frame pd_last_frame();
 +
 + public:
++  // Mutators are highly dangerous....
++  intptr_t* last_Java_fp()                       { return _anchor.last_Java_fp(); }
++  void  set_last_Java_fp(intptr_t* fp)           { _anchor.set_last_Java_fp(fp);   }
++
++  void set_base_of_stack_pointer(intptr_t* base_sp) {
++  }
++
 +  static ByteSize last_Java_fp_offset()          {
 +    return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
 +  }
 +
++  intptr_t* base_of_stack_pointer() {
++    return NULL;
++  }
++  void record_base_of_stack_pointer() {
++  }
++
 +  bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
 +    bool isInJava);
 +
 +  bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava);
 +private:
 +  bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
++public:
++  // These routines are only used on cpu architectures that
++  // have separate register stacks (Itanium).
++  static bool register_stack_overflow() { return false; }
++  static void enable_register_stack_guard() {}
++  static void disable_register_stack_guard() {}
 +
 +#endif // OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP
 diff --git a/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp
 new file mode 100644
-index 00000000000..6cf7683a586
+index 0000000000..6cf7683a58
 --- /dev/null
 +++ b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp
 @@ -0,0 +1,55 @@
@@ -57925,10 +53526,10 @@ index 00000000000..6cf7683a586
 +#endif // OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP
 diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
 new file mode 100644
-index 00000000000..4623dbfad42
+index 0000000000..8bcc949fed
 --- /dev/null
 +++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
-@@ -0,0 +1,118 @@
+@@ -0,0 +1,137 @@
 +/*
 + * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
@@ -57991,10 +53592,6 @@ index 00000000000..4623dbfad42
 +#define HWCAP_ISA_V  (1 << ('V' - 'A'))
 +#endif
 +
-+#ifndef HWCAP_ISA_B
-+#define HWCAP_ISA_B  (1 << ('B' - 'A'))
-+#endif
-+
 +#define read_csr(csr)                                           \
 +({                                                              \
 +        register unsigned long __v;                             \
@@ -58010,18 +53607,35 @@ index 00000000000..4623dbfad42
 +  return (uint32_t)read_csr(CSR_VLENB);
 +}
 +
++VM_Version::VM_MODE VM_Version::get_satp_mode() {
++  if (!strcmp(_vm_mode, "sv39")) {
++    return VM_SV39;
++  } else if (!strcmp(_vm_mode, "sv48")) {
++    return VM_SV48;
++  } else if (!strcmp(_vm_mode, "sv57")) {
++    return VM_SV57;
++  } else if (!strcmp(_vm_mode, "sv64")) {
++    return VM_SV64;
++  } else {
++    return VM_MBARE;
++  }
++}
++
 +void VM_Version::get_os_cpu_info() {
 +
 +  uint64_t auxv = getauxval(AT_HWCAP);
 +
-+  static_assert(CPU_I == HWCAP_ISA_I, "Flag CPU_I must follow Linux HWCAP");
-+  static_assert(CPU_M == HWCAP_ISA_M, "Flag CPU_M must follow Linux HWCAP");
-+  static_assert(CPU_A == HWCAP_ISA_A, "Flag CPU_A must follow Linux HWCAP");
-+  static_assert(CPU_F == HWCAP_ISA_F, "Flag CPU_F must follow Linux HWCAP");
-+  static_assert(CPU_D == HWCAP_ISA_D, "Flag CPU_D must follow Linux HWCAP");
-+  static_assert(CPU_C == HWCAP_ISA_C, "Flag CPU_C must follow Linux HWCAP");
-+  static_assert(CPU_V == HWCAP_ISA_V, "Flag CPU_V must follow Linux HWCAP");
-+  static_assert(CPU_B == HWCAP_ISA_B, "Flag CPU_B must follow Linux HWCAP");
++  STATIC_ASSERT(CPU_I == HWCAP_ISA_I);
++  STATIC_ASSERT(CPU_M == HWCAP_ISA_M);
++  STATIC_ASSERT(CPU_A == HWCAP_ISA_A);
++  STATIC_ASSERT(CPU_F == HWCAP_ISA_F);
++  STATIC_ASSERT(CPU_D == HWCAP_ISA_D);
++  STATIC_ASSERT(CPU_C == HWCAP_ISA_C);
++  STATIC_ASSERT(CPU_V == HWCAP_ISA_V);
++
++  // RISC-V has four bit-manipulation ISA-extensions: Zba/Zbb/Zbc/Zbs.
++  // Availability for those extensions could not be queried from HWCAP.
++  // TODO: Add proper detection for those extensions.
 +  _features = auxv & (
 +      HWCAP_ISA_I |
 +      HWCAP_ISA_M |
@@ -58029,14 +53643,20 @@ index 00000000000..4623dbfad42
 +      HWCAP_ISA_F |
 +      HWCAP_ISA_D |
 +      HWCAP_ISA_C |
-+      HWCAP_ISA_V |
-+      HWCAP_ISA_B);
++      HWCAP_ISA_V);
 +
 +  if (FILE *f = fopen("/proc/cpuinfo", "r")) {
 +    char buf[512], *p;
 +    while (fgets(buf, sizeof (buf), f) != NULL) {
 +      if ((p = strchr(buf, ':')) != NULL) {
-+        if (strncmp(buf, "uarch", sizeof "uarch" - 1) == 0) {
++        if (strncmp(buf, "mmu", sizeof "mmu" - 1) == 0) {
++          if (_vm_mode[0] != '\0') {
++            continue;
++          }
++          char* vm_mode = os::strdup(p + 2);
++          vm_mode[strcspn(vm_mode, "\n")] = '\0';
++          _vm_mode = vm_mode;
++        } else if (strncmp(buf, "uarch", sizeof "uarch" - 1) == 0) {
 +          char* uarch = os::strdup(p + 2);
 +          uarch[strcspn(uarch, "\n")] = '\0';
 +          _uarch = uarch;
@@ -58048,7 +53668,7 @@ index 00000000000..4623dbfad42
 +  }
 +}
 diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp
-index e30d39f73d1..733ee9e654c 100644
+index e30d39f73d..c640c546b1 100644
 --- a/src/hotspot/share/c1/c1_LIR.cpp
 +++ b/src/hotspot/share/c1/c1_LIR.cpp
 @@ -1,5 +1,5 @@
@@ -58058,48 +53678,59 @@ index e30d39f73d1..733ee9e654c 100644
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -199,7 +199,6 @@ bool LIR_OprDesc::is_oop() const {
- void LIR_Op2::verify() const {
+@@ -200,6 +200,9 @@ void LIR_Op2::verify() const {
  #ifdef ASSERT
    switch (code()) {
--    case lir_cmove:
+     case lir_cmove:
++#ifdef RISCV
++      assert(false, "lir_cmove is LIR_Op4 on RISCV");
++#endif
      case lir_xchg:
        break;
  
-@@ -252,9 +251,7 @@ void LIR_Op2::verify() const {
+@@ -252,9 +255,13 @@ void LIR_Op2::verify() const {
  
  
  LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block)
--  : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
--  , _cond(cond)
--  , _type(type)
++#ifdef RISCV
 +  : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type)
++#else
+   : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
+   , _cond(cond)
+   , _type(type)
++#endif
    , _label(block->label())
    , _block(block)
    , _ublock(NULL)
-@@ -262,9 +259,7 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block
+@@ -262,9 +269,13 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block
  }
  
  LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) :
--  LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
--  , _cond(cond)
--  , _type(type)
++#ifdef RISCV
 +  LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type)
++#else
+   LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
+   , _cond(cond)
+   , _type(type)
++#endif
    , _label(stub->entry())
    , _block(NULL)
    , _ublock(NULL)
-@@ -272,9 +267,7 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) :
+@@ -272,9 +283,13 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) :
  }
  
  LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock)
--  : LIR_Op(lir_cond_float_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
--  , _cond(cond)
--  , _type(type)
++#ifdef RISCV
 +  : LIR_Op2(lir_cond_float_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type)
++#else
+   : LIR_Op(lir_cond_float_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
+   , _cond(cond)
+   , _type(type)
++#endif
    , _label(block->label())
    , _block(block)
    , _ublock(ublock)
-@@ -296,13 +289,13 @@ void LIR_OpBranch::change_ublock(BlockBegin* b) {
+@@ -296,13 +311,13 @@ void LIR_OpBranch::change_ublock(BlockBegin* b) {
  }
  
  void LIR_OpBranch::negate_cond() {
@@ -58120,61 +53751,66 @@ index e30d39f73d1..733ee9e654c 100644
      default: ShouldNotReachHere();
    }
  }
-@@ -525,6 +518,13 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
+@@ -525,6 +540,15 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
        assert(op->as_OpBranch() != NULL, "must be");
        LIR_OpBranch* opBranch = (LIR_OpBranch*)op;
  
++#ifdef RISCV
 +      assert(opBranch->_tmp1->is_illegal() && opBranch->_tmp2->is_illegal() &&
 +             opBranch->_tmp3->is_illegal() && opBranch->_tmp4->is_illegal() &&
 +             opBranch->_tmp5->is_illegal(), "not used");
 +
 +      if (opBranch->_opr1->is_valid()) do_input(opBranch->_opr1);
 +      if (opBranch->_opr2->is_valid()) do_input(opBranch->_opr2);
++#endif
 +
        if (opBranch->_info != NULL)     do_info(opBranch->_info);
        assert(opBranch->_result->is_illegal(), "not used");
        if (opBranch->_stub != NULL)     opBranch->stub()->visit(this);
-@@ -615,17 +615,19 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
+@@ -615,6 +639,21 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
      // to the result operand, otherwise the backend fails
      case lir_cmove:
      {
--      assert(op->as_Op2() != NULL, "must be");
--      LIR_Op2* op2 = (LIR_Op2*)op;
++#ifdef RISCV
 +      assert(op->as_Op4() != NULL, "must be");
 +      LIR_Op4* op4 = (LIR_Op4*)op;
- 
--      assert(op2->_info == NULL && op2->_tmp1->is_illegal() && op2->_tmp2->is_illegal() &&
--             op2->_tmp3->is_illegal() && op2->_tmp4->is_illegal() && op2->_tmp5->is_illegal(), "not used");
--      assert(op2->_opr1->is_valid() && op2->_opr2->is_valid() && op2->_result->is_valid(), "used");
++
 +      assert(op4->_info == NULL && op4->_tmp1->is_illegal() && op4->_tmp2->is_illegal() &&
 +             op4->_tmp3->is_illegal() && op4->_tmp4->is_illegal() && op4->_tmp5->is_illegal(), "not used");
 +      assert(op4->_opr1->is_valid() && op4->_opr2->is_valid() && op4->_result->is_valid(), "used");
- 
--      do_input(op2->_opr1);
--      do_input(op2->_opr2);
--      do_temp(op2->_opr2);
--      do_output(op2->_result);
++
 +      do_input(op4->_opr1);
 +      do_input(op4->_opr2);
 +      if (op4->_opr3->is_valid()) do_input(op4->_opr3);
 +      if (op4->_opr4->is_valid()) do_input(op4->_opr4);
 +      do_temp(op4->_opr2);
 +      do_output(op4->_result);
++#else
+       assert(op->as_Op2() != NULL, "must be");
+       LIR_Op2* op2 = (LIR_Op2*)op;
+ 
+@@ -626,6 +665,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
+       do_input(op2->_opr2);
+       do_temp(op2->_opr2);
+       do_output(op2->_result);
++#endif
  
        break;
      }
-@@ -1048,6 +1050,10 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) {
+@@ -1048,6 +1088,12 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) {
    masm->emit_op3(this);
  }
  
++#ifdef RISCV
 +void LIR_Op4::emit_code(LIR_Assembler* masm) {
 +  masm->emit_op4(this);
 +}
++#endif
 +
  void LIR_OpLock::emit_code(LIR_Assembler* masm) {
    masm->emit_lock(this);
    if (stub()) {
-@@ -1084,6 +1090,10 @@ LIR_List::LIR_List(Compilation* compilation, BlockBegin* block)
+@@ -1084,6 +1130,10 @@ LIR_List::LIR_List(Compilation* compilation, BlockBegin* block)
    , _file(NULL)
    , _line(0)
  #endif
@@ -58185,7 +53821,7 @@ index e30d39f73d1..733ee9e654c 100644
  { }
  
  
-@@ -1101,6 +1111,38 @@ void LIR_List::set_file_and_line(const char * file, int line) {
+@@ -1101,6 +1151,38 @@ void LIR_List::set_file_and_line(const char * file, int line) {
  }
  #endif
  
@@ -58224,45 +53860,34 @@ index e30d39f73d1..733ee9e654c 100644
  
  void LIR_List::append(LIR_InsertionBuffer* buffer) {
    assert(this == buffer->lir_list(), "wrong lir list");
-@@ -1680,7 +1722,6 @@ const char * LIR_Op::name() const {
-      case lir_cmp_l2i:               s = "cmp_l2i";       break;
-      case lir_ucmp_fd2i:             s = "ucomp_fd2i";    break;
-      case lir_cmp_fd2i:              s = "comp_fd2i";     break;
--     case lir_cmove:                 s = "cmove";         break;
-      case lir_add:                   s = "add";           break;
-      case lir_sub:                   s = "sub";           break;
-      case lir_mul:                   s = "mul";           break;
-@@ -1705,6 +1746,8 @@ const char * LIR_Op::name() const {
-      case lir_irem:                  s = "irem";          break;
-      case lir_fmad:                  s = "fmad";          break;
-      case lir_fmaf:                  s = "fmaf";          break;
-+     // LIR_Op4
-+     case lir_cmove:                 s = "cmove";         break;
-      // LIR_OpJavaCall
-      case lir_static_call:           s = "static";        break;
-      case lir_optvirtual_call:       s = "optvirtual";    break;
-@@ -1841,6 +1884,8 @@ void LIR_Op1::print_patch_code(outputStream* out, LIR_PatchCode code) {
+@@ -1841,6 +1923,10 @@ void LIR_Op1::print_patch_code(outputStream* out, LIR_PatchCode code) {
  // LIR_OpBranch
  void LIR_OpBranch::print_instr(outputStream* out) const {
    print_condition(out, cond());             out->print(" ");
++#ifdef RISCV
 +  in_opr1()->print(out); out->print(" ");
 +  in_opr2()->print(out); out->print(" ");
++#endif
    if (block() != NULL) {
      out->print("[B%d] ", block()->block_id());
    } else if (stub() != NULL) {
-@@ -1927,7 +1972,7 @@ void LIR_OpRoundFP::print_instr(outputStream* out) const {
+@@ -1927,7 +2013,11 @@ void LIR_OpRoundFP::print_instr(outputStream* out) const {
  
  // LIR_Op2
  void LIR_Op2::print_instr(outputStream* out) const {
--  if (code() == lir_cmove || code() == lir_cmp) {
++#ifdef RISCV
 +  if (code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch) {
++#else
+   if (code() == lir_cmove || code() == lir_cmp) {
++#endif
      print_condition(out, condition());         out->print(" ");
    }
    in_opr1()->print(out);    out->print(" ");
-@@ -1978,6 +2023,15 @@ void LIR_Op3::print_instr(outputStream* out) const {
+@@ -1978,6 +2068,17 @@ void LIR_Op3::print_instr(outputStream* out) const {
    result_opr()->print(out);
  }
  
++#ifdef RISCV
 +// LIR_Op4
 +void LIR_Op4::print_instr(outputStream* out) const {
 +  print_condition(out, condition()); out->print(" ");
@@ -58272,11 +53897,12 @@ index e30d39f73d1..733ee9e654c 100644
 +  in_opr4()->print(out);             out->print(" ");
 +  result_opr()->print(out);
 +}
++#endif
  
  void LIR_OpLock::print_instr(outputStream* out) const {
    hdr_opr()->print(out);   out->print(" ");
 diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp
-index 3234ca018b7..efff6bf7a30 100644
+index 3234ca018b..33943e369d 100644
 --- a/src/hotspot/share/c1/c1_LIR.hpp
 +++ b/src/hotspot/share/c1/c1_LIR.hpp
 @@ -1,5 +1,5 @@
@@ -58286,52 +53912,62 @@ index 3234ca018b7..efff6bf7a30 100644
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -867,6 +867,7 @@ class    LIR_Op2;
+@@ -867,6 +867,9 @@ class    LIR_Op2;
  class    LIR_OpDelay;
  class    LIR_Op3;
  class      LIR_OpAllocArray;
++#ifdef RISCV
 +class    LIR_Op4;
++#endif
  class    LIR_OpCall;
  class      LIR_OpJavaCall;
  class      LIR_OpRTCall;
-@@ -916,8 +917,6 @@ enum LIR_Code {
+@@ -916,8 +919,10 @@ enum LIR_Code {
        , lir_null_check
        , lir_return
        , lir_leal
--      , lir_branch
--      , lir_cond_float_branch
++#ifndef RISCV
+       , lir_branch
+       , lir_cond_float_branch
++#endif
        , lir_move
        , lir_convert
        , lir_alloc_object
-@@ -929,11 +928,12 @@ enum LIR_Code {
+@@ -929,11 +934,17 @@ enum LIR_Code {
        , lir_unwind
    , end_op1
    , begin_op2
++#ifdef RISCV
 +      , lir_branch
 +      , lir_cond_float_branch
++#endif
        , lir_cmp
        , lir_cmp_l2i
        , lir_ucmp_fd2i
        , lir_cmp_fd2i
--      , lir_cmove
++#ifndef RISCV
+       , lir_cmove
++#endif
        , lir_add
        , lir_sub
        , lir_mul
-@@ -964,6 +964,9 @@ enum LIR_Code {
+@@ -964,6 +975,11 @@ enum LIR_Code {
        , lir_fmad
        , lir_fmaf
    , end_op3
++#ifdef RISCV
 +  , begin_op4
 +      , lir_cmove
 +  , end_op4
++#endif
    , begin_opJavaCall
        , lir_static_call
        , lir_optvirtual_call
-@@ -1001,6 +1004,11 @@ enum LIR_Code {
+@@ -1001,6 +1017,11 @@ enum LIR_Code {
    , begin_opAssert
      , lir_assert
    , end_opAssert
-+#ifdef INCLUDE_ZGC
++#if defined(RISCV) && defined(INCLUDE_ZGC)
 +  , begin_opZLoadBarrierTest
 +    , lir_zloadbarrier_test
 +  , end_opZLoadBarrierTest
@@ -58339,15 +53975,17 @@ index 3234ca018b7..efff6bf7a30 100644
  };
  
  
-@@ -1134,6 +1142,7 @@ class LIR_Op: public CompilationResourceObj {
+@@ -1134,6 +1155,9 @@ class LIR_Op: public CompilationResourceObj {
    virtual LIR_Op1* as_Op1() { return NULL; }
    virtual LIR_Op2* as_Op2() { return NULL; }
    virtual LIR_Op3* as_Op3() { return NULL; }
++#ifdef RISCV
 +  virtual LIR_Op4* as_Op4() { return NULL; }
++#endif
    virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; }
    virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; }
    virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; }
-@@ -1410,51 +1419,6 @@ class LIR_OpRTCall: public LIR_OpCall {
+@@ -1410,51 +1434,6 @@ class LIR_OpRTCall: public LIR_OpCall {
    virtual void verify() const;
  };
  
@@ -58399,7 +54037,7 @@ index 3234ca018b7..efff6bf7a30 100644
  class ConversionStub;
  
  class LIR_OpConvert: public LIR_Op1 {
-@@ -1614,19 +1578,19 @@ class LIR_Op2: public LIR_Op {
+@@ -1614,19 +1593,19 @@ class LIR_Op2: public LIR_Op {
    void verify() const;
  
   public:
@@ -58420,11 +54058,11 @@ index 3234ca018b7..efff6bf7a30 100644
 -    assert(code == lir_cmp || code == lir_assert, "code check");
 +    , _tmp5(LIR_OprFact::illegalOpr)
 +    , _condition(condition) {
-+    assert(code == lir_cmp || code == lir_branch || code == lir_cond_float_branch || code == lir_assert, "code check");
++    assert(code == lir_cmp || code == lir_assert RISCV_ONLY(|| code == lir_branch || code == lir_cond_float_branch), "code check");
    }
  
    LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type)
-@@ -1651,14 +1615,14 @@ class LIR_Op2: public LIR_Op {
+@@ -1651,14 +1630,14 @@ class LIR_Op2: public LIR_Op {
      , _opr1(opr1)
      , _opr2(opr2)
      , _type(type)
@@ -58438,11 +54076,11 @@ index 3234ca018b7..efff6bf7a30 100644
 -    assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
 +    , _tmp5(LIR_OprFact::illegalOpr)
 +    , _condition(lir_cond_unknown) {
-+    assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check");
++    assert(code != lir_cmp && RISCV_ONLY(code != lir_branch && code != lir_cond_float_branch &&) is_in_range(code, begin_op2, end_op2), "code check");
    }
  
    LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr,
-@@ -1667,14 +1631,14 @@ class LIR_Op2: public LIR_Op {
+@@ -1667,14 +1646,14 @@ class LIR_Op2: public LIR_Op {
      , _opr1(opr1)
      , _opr2(opr2)
      , _type(T_ILLEGAL)
@@ -58456,31 +54094,45 @@ index 3234ca018b7..efff6bf7a30 100644
 -    assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
 +    , _tmp5(tmp5)
 +    , _condition(lir_cond_unknown) {
-+    assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check");
++    assert(code != lir_cmp && RISCV_ONLY(code != lir_branch && code != lir_cond_float_branch &&) is_in_range(code, begin_op2, end_op2), "code check");
    }
  
    LIR_Opr in_opr1() const                        { return _opr1; }
-@@ -1686,10 +1650,10 @@ class LIR_Op2: public LIR_Op {
+@@ -1686,10 +1665,18 @@ class LIR_Op2: public LIR_Op {
    LIR_Opr tmp4_opr() const                       { return _tmp4; }
    LIR_Opr tmp5_opr() const                       { return _tmp5; }
    LIR_Condition condition() const  {
--    assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition;
++#ifdef RISCV
 +    assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch || code() == lir_assert, "only valid for branch and assert"); return _condition;
++#else
+     assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition;
++#endif
    }
    void set_condition(LIR_Condition condition) {
--    assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove");  _condition = condition;
++#ifdef RISCV
 +    assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch, "only valid for branch"); _condition = condition;
++#else
+     assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove");  _condition = condition;
++#endif
    }
  
    void set_fpu_stack_size(int size)              { _fpu_stack_size = size; }
-@@ -1703,6 +1667,51 @@ class LIR_Op2: public LIR_Op {
+@@ -1703,6 +1690,65 @@ class LIR_Op2: public LIR_Op {
    virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
  };
  
++#ifdef RISCV
 +class LIR_OpBranch: public LIR_Op2 {
++#else
++class LIR_OpBranch: public LIR_Op {
++#endif
 + friend class LIR_OpVisitState;
 +
 + private:
++#ifndef RISCV
++  LIR_Condition _cond;
++  BasicType     _type;
++#endif
 +  Label*        _label;
 +  BlockBegin*   _block;  // if this is a branch to a block, this is the block
 +  BlockBegin*   _ublock; // if this is a float-branch, this is the unorderd block
@@ -58488,7 +54140,13 @@ index 3234ca018b7..efff6bf7a30 100644
 +
 + public:
 +  LIR_OpBranch(LIR_Condition cond, BasicType type, Label* lbl)
++#ifdef RISCV
 +    : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL, type)
++#else
++    : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL)
++    , _cond(cond)
++    , _type(type)
++#endif
 +    , _label(lbl)
 +    , _block(NULL)
 +    , _ublock(NULL)
@@ -58500,14 +54158,14 @@ index 3234ca018b7..efff6bf7a30 100644
 +  // for unordered comparisons
 +  LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock);
 +
-+  LIR_Condition cond() const {
-+    return condition();
-+  }
-+
-+  void set_cond(LIR_Condition cond) {
-+    set_condition(cond);
-+  }
-+
++#ifdef RISCV
++  LIR_Condition cond()        const              { return condition();  }
++  void set_cond(LIR_Condition cond)              { set_condition(cond); }
++#else
++  LIR_Condition cond()        const              { return _cond;        }
++  void set_cond(LIR_Condition cond)              { _cond = cond;        }
++#endif
++  BasicType     type()        const              { return _type;        }
 +  Label*        label()       const              { return _label;       }
 +  BlockBegin*   block()       const              { return _block;       }
 +  BlockBegin*   ublock()      const              { return _ublock;      }
@@ -58525,10 +54183,11 @@ index 3234ca018b7..efff6bf7a30 100644
  class LIR_OpAllocArray : public LIR_Op {
   friend class LIR_OpVisitState;
  
-@@ -1766,6 +1775,63 @@ class LIR_Op3: public LIR_Op {
+@@ -1766,6 +1812,65 @@ class LIR_Op3: public LIR_Op {
    virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
  };
  
++#ifdef RISCV
 +class LIR_Op4: public LIR_Op {
 +  friend class LIR_OpVisitState;
 + protected:
@@ -58586,10 +54245,11 @@ index 3234ca018b7..efff6bf7a30 100644
 +
 +  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
 +};
++#endif
  
  //--------------------------------
  class LabelObj: public CompilationResourceObj {
-@@ -1988,6 +2054,10 @@ class LIR_List: public CompilationResourceObj {
+@@ -1988,6 +2093,10 @@ class LIR_List: public CompilationResourceObj {
    const char *  _file;
    int           _line;
  #endif
@@ -58600,7 +54260,7 @@ index 3234ca018b7..efff6bf7a30 100644
  
   public:
    void append(LIR_Op* op) {
-@@ -2000,6 +2070,12 @@ class LIR_List: public CompilationResourceObj {
+@@ -2000,6 +2109,12 @@ class LIR_List: public CompilationResourceObj {
      }
  #endif // PRODUCT
  
@@ -58613,7 +54273,7 @@ index 3234ca018b7..efff6bf7a30 100644
      _operations.append(op);
  
  #ifdef ASSERT
-@@ -2016,6 +2092,10 @@ class LIR_List: public CompilationResourceObj {
+@@ -2016,6 +2131,10 @@ class LIR_List: public CompilationResourceObj {
    void set_file_and_line(const char * file, int line);
  #endif
  
@@ -58624,37 +54284,44 @@ index 3234ca018b7..efff6bf7a30 100644
    //---------- accessors ---------------
    LIR_OpList* instructions_list()                { return &_operations; }
    int         length() const                     { return _operations.length(); }
-@@ -2149,8 +2229,9 @@ class LIR_List: public CompilationResourceObj {
+@@ -2149,9 +2268,16 @@ class LIR_List: public CompilationResourceObj {
    void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info);
    void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* addr, CodeEmitInfo* info);
  
--  void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
--    append(new LIR_Op2(lir_cmove, condition, src1, src2, dst, type));
++#ifdef RISCV
 +  void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type,
 +             LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr) {
 +    append(new LIR_Op4(lir_cmove, condition, src1, src2, cmp_opr1, cmp_opr2, dst, type));
++  }
++#else
+   void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
+     append(new LIR_Op2(lir_cmove, condition, src1, src2, dst, type));
    }
++#endif
  
    void cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value,
+                 LIR_Opr t1, LIR_Opr t2, LIR_Opr result = LIR_OprFact::illegalOpr);
 diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp
-index 160483d5f74..42a0350f7d9 100644
+index 160483d5f7..68aec26c1e 100644
 --- a/src/hotspot/share/c1/c1_LIRAssembler.cpp
 +++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp
-@@ -709,10 +709,6 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
+@@ -709,9 +709,11 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
        comp_fl2i(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op);
        break;
  
--    case lir_cmove:
--      cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type());
--      break;
--
++#ifndef RISCV
+     case lir_cmove:
+       cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type());
+       break;
++#endif
+ 
      case lir_shl:
      case lir_shr:
-     case lir_ushr:
-@@ -776,6 +772,17 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
+@@ -776,6 +778,19 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
    }
  }
  
++#ifdef RISCV
 +void LIR_Assembler::emit_op4(LIR_Op4* op) {
 +  switch(op->code()) {
 +    case lir_cmove:
@@ -58666,34 +54333,40 @@ index 160483d5f74..42a0350f7d9 100644
 +      break;
 +  }
 +}
++#endif
  
  void LIR_Assembler::build_frame() {
    _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
 diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp
-index 44a5bcbe542..c677bd346fc 100644
+index 44a5bcbe54..baeb4aa442 100644
 --- a/src/hotspot/share/c1/c1_LIRAssembler.hpp
 +++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp
-@@ -190,6 +190,7 @@ class LIR_Assembler: public CompilationResourceObj {
+@@ -190,6 +190,9 @@ class LIR_Assembler: public CompilationResourceObj {
    void emit_op1(LIR_Op1* op);
    void emit_op2(LIR_Op2* op);
    void emit_op3(LIR_Op3* op);
++#ifdef RISCV
 +  void emit_op4(LIR_Op4* op);
++#endif
    void emit_opBranch(LIR_OpBranch* op);
    void emit_opLabel(LIR_OpLabel* op);
    void emit_arraycopy(LIR_OpArrayCopy* op);
-@@ -222,8 +223,8 @@ class LIR_Assembler: public CompilationResourceObj {
+@@ -222,8 +225,12 @@ class LIR_Assembler: public CompilationResourceObj {
    void volatile_move_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info);
    void comp_mem_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info);  // info set for null exceptions
    void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op);
--  void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type);
--
++#ifdef RISCV
 +  void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type,
 +             LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr);
++#else
+   void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type);
+-
++#endif
    void call(        LIR_OpJavaCall* op, relocInfo::relocType rtype);
    void ic_call(     LIR_OpJavaCall* op);
    void vtable_call( LIR_OpJavaCall* op);
 diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp
-index c28055fd996..a4dfe8552ae 100644
+index acc969ac9c..512b63c744 100644
 --- a/src/hotspot/share/c1/c1_LinearScan.cpp
 +++ b/src/hotspot/share/c1/c1_LinearScan.cpp
 @@ -1,5 +1,5 @@
@@ -58703,22 +54376,21 @@ index c28055fd996..a4dfe8552ae 100644
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -1242,11 +1242,11 @@ void LinearScan::add_register_hints(LIR_Op* op) {
+@@ -1242,8 +1242,13 @@ void LinearScan::add_register_hints(LIR_Op* op) {
        break;
      }
      case lir_cmove: {
--      assert(op->as_Op2() != NULL, "lir_cmove must be LIR_Op2");
--      LIR_Op2* cmove = (LIR_Op2*)op;
++#ifdef RISCV
 +      assert(op->as_Op4() != NULL, "lir_cmove must be LIR_Op4");
 +      LIR_Op4* cmove = (LIR_Op4*)op;
++#else
+       assert(op->as_Op2() != NULL, "lir_cmove must be LIR_Op2");
+       LIR_Op2* cmove = (LIR_Op2*)op;
++#endif
  
        LIR_Opr move_from = cmove->in_opr1();
--      LIR_Opr move_to = cmove->result_opr();
-+      LIR_Opr move_to   = cmove->result_opr();
- 
-       if (move_to->is_register() && move_from->is_register()) {
-         Interval* from = interval_at(reg_num(move_from));
-@@ -3140,6 +3140,9 @@ void LinearScan::do_linear_scan() {
+       LIR_Opr move_to = cmove->result_opr();
+@@ -3148,6 +3153,9 @@ void LinearScan::do_linear_scan() {
      }
    }
  
@@ -58728,7 +54400,7 @@ index c28055fd996..a4dfe8552ae 100644
    { TIME_LINEAR_SCAN(timer_optimize_lir);
  
      EdgeMoveOptimizer::optimize(ir()->code());
-@@ -3147,6 +3150,7 @@ void LinearScan::do_linear_scan() {
+@@ -3155,6 +3163,7 @@ void LinearScan::do_linear_scan() {
      // check that cfg is still correct after optimizations
      ir()->verify();
    }
@@ -58736,26 +54408,32 @@ index c28055fd996..a4dfe8552ae 100644
  
    NOT_PRODUCT(print_lir(1, "Before Code Generation", false));
    NOT_PRODUCT(LinearScanStatistic::compute(this, _stat_final));
-@@ -6284,14 +6288,14 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) {
+@@ -6292,14 +6301,23 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) {
                // There might be a cmove inserted for profiling which depends on the same
                // compare. If we change the condition of the respective compare, we have
                // to take care of this cmove as well.
--              LIR_Op2* prev_cmove = NULL;
++#ifdef RISCV
 +              LIR_Op4* prev_cmove = NULL;
++#else
+               LIR_Op2* prev_cmove = NULL;
++#endif
  
                for(int j = instructions->length() - 3; j >= 0 && prev_cmp == NULL; j--) {
                  prev_op = instructions->at(j);
                  // check for the cmove
                  if (prev_op->code() == lir_cmove) {
--                  assert(prev_op->as_Op2() != NULL, "cmove must be of type LIR_Op2");
--                  prev_cmove = (LIR_Op2*)prev_op;
++#ifdef RISCV
 +                  assert(prev_op->as_Op4() != NULL, "cmove must be of type LIR_Op4");
 +                  prev_cmove = (LIR_Op4*)prev_op;
++#else
+                   assert(prev_op->as_Op2() != NULL, "cmove must be of type LIR_Op2");
+                   prev_cmove = (LIR_Op2*)prev_op;
++#endif
                    assert(prev_branch->cond() == prev_cmove->condition(), "should be the same");
                  }
                  if (prev_op->code() == lir_cmp) {
 diff --git a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
-index 4771a8b8652..6d377fa005d 100644
+index 4771a8b865..6d377fa005 100644
 --- a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
 +++ b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
 @@ -1,5 +1,5 @@
@@ -58775,7 +54453,7 @@ index 4771a8b8652..6d377fa005d 100644
  #endif
  
 diff --git a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp
-index 9f8ce742433..f36dd612eff 100644
+index 9f8ce74243..125cc169be 100644
 --- a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp
 +++ b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp
 @@ -1,5 +1,5 @@
@@ -58785,17 +54463,20 @@ index 9f8ce742433..f36dd612eff 100644
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -100,7 +100,7 @@ class LIR_OpZLoadBarrierTest : public LIR_Op {
+@@ -100,7 +100,11 @@ private:
  
  public:
    LIR_OpZLoadBarrierTest(LIR_Opr opr) :
--      LIR_Op(),
++#ifdef RISCV
 +      LIR_Op(lir_zloadbarrier_test, LIR_OprFact::illegalOpr, NULL),
++#else
+       LIR_Op(),
++#endif
        _opr(opr) {}
  
    virtual void visit(LIR_OpVisitState* state) {
 diff --git a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
-index e01a242a57e..ff16de0e778 100644
+index e01a242a57..ff16de0e77 100644
 --- a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
 +++ b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
 @@ -102,7 +102,7 @@ inline T JfrBigEndian::read_unaligned(const address location) {
@@ -58807,19 +54488,8 @@ index e01a242a57e..ff16de0e778 100644
    return false;
  #else
    #warning "Unconfigured platform"
-diff --git a/src/hotspot/share/opto/regmask.hpp b/src/hotspot/share/opto/regmask.hpp
-index c64d0879592..bc856d4b617 100644
---- a/src/hotspot/share/opto/regmask.hpp
-+++ b/src/hotspot/share/opto/regmask.hpp
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
 diff --git a/src/hotspot/share/runtime/abstract_vm_version.cpp b/src/hotspot/share/runtime/abstract_vm_version.cpp
-index c46247f2bdb..b5e64b65ff7 100644
+index a383297611..5e9228e705 100644
 --- a/src/hotspot/share/runtime/abstract_vm_version.cpp
 +++ b/src/hotspot/share/runtime/abstract_vm_version.cpp
 @@ -196,7 +196,8 @@ const char* Abstract_VM_Version::jre_release_version() {
@@ -58832,22 +54502,11 @@ index c46247f2bdb..b5e64b65ff7 100644
  #endif // !ZERO
  #endif // !CPU
  
-diff --git a/src/hotspot/share/runtime/synchronizer.cpp b/src/hotspot/share/runtime/synchronizer.cpp
-index e7b32723e47..434826853ee 100644
---- a/src/hotspot/share/runtime/synchronizer.cpp
-+++ b/src/hotspot/share/runtime/synchronizer.cpp
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 1998, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 1998, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
 diff --git a/src/hotspot/share/runtime/thread.hpp b/src/hotspot/share/runtime/thread.hpp
-index aa914eccafc..a2f98e6a251 100644
+index 34c8d98362..7cf95058fe 100644
 --- a/src/hotspot/share/runtime/thread.hpp
 +++ b/src/hotspot/share/runtime/thread.hpp
-@@ -1234,7 +1234,7 @@ class JavaThread: public Thread {
+@@ -1259,7 +1259,7 @@ class JavaThread: public Thread {
    address last_Java_pc(void)                     { return _anchor.last_Java_pc(); }
  
    // Safepoint support
@@ -58857,7 +54516,7 @@ index aa914eccafc..a2f98e6a251 100644
    void set_thread_state(JavaThreadState s)       {
      assert(current_or_null() == NULL || current_or_null() == this,
 diff --git a/src/hotspot/share/runtime/thread.inline.hpp b/src/hotspot/share/runtime/thread.inline.hpp
-index dee8534f739..9af07aeb459 100644
+index dee8534f73..9af07aeb45 100644
 --- a/src/hotspot/share/runtime/thread.inline.hpp
 +++ b/src/hotspot/share/runtime/thread.inline.hpp
 @@ -1,5 +1,5 @@
@@ -58877,10 +54536,10 @@ index dee8534f739..9af07aeb459 100644
    return (JavaThreadState) OrderAccess::load_acquire((volatile jint*)&_thread_state);
  }
 diff --git a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp
-index cf802538689..e8ab3097ac7 100644
+index 6605ab367c..7f1bcff6b3 100644
 --- a/src/hotspot/share/utilities/macros.hpp
 +++ b/src/hotspot/share/utilities/macros.hpp
-@@ -597,6 +597,32 @@
+@@ -601,6 +601,32 @@
  
  #define MACOS_AARCH64_ONLY(x) MACOS_ONLY(AARCH64_ONLY(x))
  
@@ -58914,7 +54573,7 @@ index cf802538689..e8ab3097ac7 100644
  #define LITTLE_ENDIAN_ONLY(code) code
  #define BIG_ENDIAN_ONLY(code)
 diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
-index 0d834302c57..45a927fb5ee 100644
+index 0d834302c5..45a927fb5e 100644
 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
 +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
 @@ -1,5 +1,5 @@
@@ -59000,7 +54659,7 @@ index 0d834302c57..45a927fb5ee 100644
  #define REG_INDEX(reg) sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_##reg
  
 diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
-index 8318e8e0213..ab092d4ee33 100644
+index 8318e8e021..ab092d4ee3 100644
 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
 +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
 @@ -1,5 +1,5 @@
@@ -59020,10 +54679,10 @@ index 8318e8e0213..ab092d4ee33 100644
  
  // This C bool type must be int for compatibility with Linux calls and
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
-index 0f5f0119c73..9bff9ee9b15 100644
+index 0f5f0119c7..9bff9ee9b1 100644
 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
-@@ -36,6 +36,7 @@
+@@ -36,6 +36,7 @@ import sun.jvm.hotspot.debugger.MachineDescription;
  import sun.jvm.hotspot.debugger.MachineDescriptionAMD64;
  import sun.jvm.hotspot.debugger.MachineDescriptionPPC64;
  import sun.jvm.hotspot.debugger.MachineDescriptionAArch64;
@@ -59031,7 +54690,7 @@ index 0f5f0119c73..9bff9ee9b15 100644
  import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86;
  import sun.jvm.hotspot.debugger.MachineDescriptionSPARC32Bit;
  import sun.jvm.hotspot.debugger.MachineDescriptionSPARC64Bit;
-@@ -598,6 +599,8 @@ private void setupDebuggerLinux() {
+@@ -598,6 +599,8 @@ public class HotSpotAgent {
              } else {
                      machDesc = new MachineDescriptionSPARC32Bit();
              }
@@ -59042,7 +54701,7 @@ index 0f5f0119c73..9bff9ee9b15 100644
              machDesc = (MachineDescription)
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java
 new file mode 100644
-index 00000000000..a972516dee3
+index 0000000000..a972516dee
 --- /dev/null
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java
 @@ -0,0 +1,40 @@
@@ -59087,7 +54746,7 @@ index 00000000000..a972516dee3
 +  }
 +}
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
-index 5e5a6bb7141..dc0bcb3da94 100644
+index 5e5a6bb714..dc0bcb3da9 100644
 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
 @@ -1,5 +1,5 @@
@@ -59097,7 +54756,7 @@ index 5e5a6bb7141..dc0bcb3da94 100644
   * Copyright (c) 2015, Red Hat Inc.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
-@@ -34,12 +34,14 @@
+@@ -34,12 +34,14 @@ import sun.jvm.hotspot.debugger.x86.*;
  import sun.jvm.hotspot.debugger.amd64.*;
  import sun.jvm.hotspot.debugger.aarch64.*;
  import sun.jvm.hotspot.debugger.sparc.*;
@@ -59112,7 +54771,7 @@ index 5e5a6bb7141..dc0bcb3da94 100644
  import sun.jvm.hotspot.utilities.*;
  
  class LinuxCDebugger implements CDebugger {
-@@ -116,7 +118,14 @@ public CFrame topFrameForThread(ThreadProxy thread) throws DebuggerException {
+@@ -116,7 +118,14 @@ class LinuxCDebugger implements CDebugger {
         Address pc  = context.getRegisterAsAddress(AARCH64ThreadContext.PC);
         if (pc == null) return null;
         return new LinuxAARCH64CFrame(dbg, fp, pc);
@@ -59130,7 +54789,7 @@ index 5e5a6bb7141..dc0bcb3da94 100644
         return context.getTopFrame(dbg);
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java
 new file mode 100644
-index 00000000000..f06da24bd0e
+index 0000000000..f06da24bd0
 --- /dev/null
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java
 @@ -0,0 +1,90 @@
@@ -59226,7 +54885,7 @@ index 00000000000..f06da24bd0e
 +}
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java
 new file mode 100644
-index 00000000000..fdb841ccf3d
+index 0000000000..fdb841ccf3
 --- /dev/null
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java
 @@ -0,0 +1,48 @@
@@ -59280,7 +54939,7 @@ index 00000000000..fdb841ccf3d
 +}
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java
 new file mode 100644
-index 00000000000..96d5dee47ce
+index 0000000000..96d5dee47c
 --- /dev/null
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java
 @@ -0,0 +1,88 @@
@@ -59374,7 +55033,7 @@ index 00000000000..96d5dee47ce
 +}
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java
 new file mode 100644
-index 00000000000..f2aa845e665
+index 0000000000..f2aa845e66
 --- /dev/null
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java
 @@ -0,0 +1,48 @@
@@ -59428,7 +55087,7 @@ index 00000000000..f2aa845e665
 +}
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java
 new file mode 100644
-index 00000000000..19f64b8ce2d
+index 0000000000..19f64b8ce2
 --- /dev/null
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java
 @@ -0,0 +1,46 @@
@@ -59480,7 +55139,7 @@ index 00000000000..19f64b8ce2d
 +}
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java
 new file mode 100644
-index 00000000000..aecbda59023
+index 0000000000..aecbda5902
 --- /dev/null
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java
 @@ -0,0 +1,55 @@
@@ -59541,7 +55200,7 @@ index 00000000000..aecbda59023
 +}
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java
 new file mode 100644
-index 00000000000..1d3da6be5af
+index 0000000000..1d3da6be5a
 --- /dev/null
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java
 @@ -0,0 +1,48 @@
@@ -59595,7 +55254,7 @@ index 00000000000..1d3da6be5af
 +}
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java
 new file mode 100644
-index 00000000000..725b94e25a3
+index 0000000000..725b94e25a
 --- /dev/null
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java
 @@ -0,0 +1,46 @@
@@ -59645,11 +55304,11 @@ index 00000000000..725b94e25a3
 +    return new RemoteRISCV64Thread(debugger, id);
 +  }
 +}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java
 new file mode 100644
-index 00000000000..fb60a70427a
+index 0000000000..fb60a70427
 --- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java
 @@ -0,0 +1,172 @@
 +/*
 + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
@@ -59824,7 +55483,7 @@ index 00000000000..fb60a70427a
 +    public abstract Address getRegisterAsAddress(int index);
 +}
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
-index 190062785a7..89d676fe3b9 100644
+index 190062785a..89d676fe3b 100644
 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
 @@ -1,5 +1,5 @@
@@ -59834,7 +55493,7 @@ index 190062785a7..89d676fe3b9 100644
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -38,6 +38,7 @@
+@@ -38,6 +38,7 @@ import sun.jvm.hotspot.runtime.win32_aarch64.Win32AARCH64JavaThreadPDAccess;
  import sun.jvm.hotspot.runtime.linux_x86.LinuxX86JavaThreadPDAccess;
  import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess;
  import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess;
@@ -59842,7 +55501,7 @@ index 190062785a7..89d676fe3b9 100644
  import sun.jvm.hotspot.runtime.linux_ppc64.LinuxPPC64JavaThreadPDAccess;
  import sun.jvm.hotspot.runtime.linux_sparc.LinuxSPARCJavaThreadPDAccess;
  import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess;
-@@ -99,6 +100,8 @@ private static synchronized void initialize(TypeDataBase db) {
+@@ -99,6 +100,8 @@ public class Threads {
                  access = new LinuxPPC64JavaThreadPDAccess();
              } else if (cpu.equals("aarch64")) {
                  access = new LinuxAARCH64JavaThreadPDAccess();
@@ -59853,10 +55512,10 @@ index 190062785a7..89d676fe3b9 100644
                  access = (JavaThreadPDAccess)
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
 new file mode 100644
-index 00000000000..f2e224f28ee
+index 0000000000..5c2b6e0e3e
 --- /dev/null
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
-@@ -0,0 +1,134 @@
+@@ -0,0 +1,132 @@
 +/*
 + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2015, Red Hat Inc.
@@ -59893,8 +55552,6 @@ index 00000000000..f2e224f28ee
 +import sun.jvm.hotspot.runtime.riscv64.*;
 +import sun.jvm.hotspot.types.*;
 +import sun.jvm.hotspot.utilities.*;
-+import sun.jvm.hotspot.utilities.Observable;
-+import sun.jvm.hotspot.utilities.Observer;
 +
 +public class LinuxRISCV64JavaThreadPDAccess implements JavaThreadPDAccess {
 +  private static AddressField  lastJavaFPField;
@@ -59993,7 +55650,7 @@ index 00000000000..f2e224f28ee
 +}
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java
 new file mode 100644
-index 00000000000..34701c6922f
+index 0000000000..34701c6922
 --- /dev/null
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java
 @@ -0,0 +1,223 @@
@@ -60222,10 +55879,10 @@ index 00000000000..34701c6922f
 +}
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
 new file mode 100644
-index 00000000000..df280005d72
+index 0000000000..e372bc5f7b
 --- /dev/null
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
-@@ -0,0 +1,556 @@
+@@ -0,0 +1,554 @@
 +/*
 + * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2015, 2019, Red Hat Inc.
@@ -60262,8 +55919,6 @@ index 00000000000..df280005d72
 +import sun.jvm.hotspot.runtime.*;
 +import sun.jvm.hotspot.types.*;
 +import sun.jvm.hotspot.utilities.*;
-+import sun.jvm.hotspot.utilities.Observable;
-+import sun.jvm.hotspot.utilities.Observer;
 +
 +/** Specialization of and implementation of abstract methods of the
 +    Frame class for the riscv64 family of CPUs. */
@@ -60596,19951 +56251,375 @@ index 00000000000..df280005d72
 +  }
 +
 +  private Frame senderForCompiledFrame(RISCV64RegisterMap map, CodeBlob cb) {
-+    if (DEBUG) {
-+      System.out.println("senderForCompiledFrame");
-+    }
-+
-+    //
-+    // NOTE: some of this code is (unfortunately) duplicated  RISCV64CurrentFrameGuess
-+    //
-+
-+    if (Assert.ASSERTS_ENABLED) {
-+      Assert.that(map != null, "map must be set");
-+    }
-+
-+    // frame owned by optimizing compiler
-+    if (Assert.ASSERTS_ENABLED) {
-+        Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size");
-+    }
-+    Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize());
-+
-+    // The return_address is always the word on the stack
-+    Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize());
-+
-+    // This is the saved value of FP which may or may not really be an FP.
-+    // It is only an FP if the sender is an interpreter frame.
-+    Address savedFPAddr = senderSP.addOffsetTo(-2 * VM.getVM().getAddressSize());
-+
-+    if (map.getUpdateMap()) {
-+      // Tell GC to use argument oopmaps for some runtime stubs that need it.
-+      // For C1, the runtime stub might not have oop maps, so set this flag
-+      // outside of update_register_map.
-+      map.setIncludeArgumentOops(cb.callerMustGCArguments());
-+
-+      if (cb.getOopMaps() != null) {
-+        ImmutableOopMapSet.updateRegisterMap(this, cb, map, true);
-+      }
-+
-+      // Since the prolog does the save and restore of FP there is no oopmap
-+      // for it so we must fill in its location as if there was an oopmap entry
-+      // since if our caller was compiled code there could be live jvm state in it.
-+      updateMapWithSavedLink(map, savedFPAddr);
-+    }
-+
-+    return new RISCV64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC);
-+  }
-+
-+  protected boolean hasSenderPD() {
-+    return true;
-+  }
-+
-+  public long frameSize() {
-+    return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize());
-+  }
-+
-+    public Address getLink() {
-+        try {
-+            if (DEBUG) {
-+                System.out.println("Reading link at " + addressOfStackSlot(LINK_OFFSET)
-+                        + " = " + addressOfStackSlot(LINK_OFFSET).getAddressAt(0));
-+            }
-+            return addressOfStackSlot(LINK_OFFSET).getAddressAt(0);
-+        } catch (Exception e) {
-+            if (DEBUG)
-+                System.out.println("Returning null");
-+            return null;
-+        }
-+    }
-+
-+  public Address getUnextendedSP() { return raw_unextendedSP; }
-+
-+  // Return address:
-+  public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); }
-+  public Address getSenderPC()     { return getSenderPCAddr().getAddressAt(0);      }
-+
-+  // return address of param, zero origin index.
-+  public Address getNativeParamAddr(int idx) {
-+    return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx);
-+  }
-+
-+  public Address getSenderSP()     { return addressOfStackSlot(SENDER_SP_OFFSET); }
-+
-+  public Address addressOfInterpreterFrameLocals() {
-+    return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET);
-+  }
-+
-+  private Address addressOfInterpreterFrameBCX() {
-+    return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET);
-+  }
-+
-+  public int getInterpreterFrameBCI() {
-+    // FIXME: this is not atomic with respect to GC and is unsuitable
-+    // for use in a non-debugging, or reflective, system. Need to
-+    // figure out how to express this.
-+    Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0);
-+    Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0);
-+    Method method = (Method)Metadata.instantiateWrapperFor(methodHandle);
-+    return bcpToBci(bcp, method);
-+  }
-+
-+  public Address addressOfInterpreterFrameMDX() {
-+    return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET);
-+  }
-+
-+  // expression stack
-+  // (the max_stack arguments are used by the GC; see class FrameClosure)
-+
-+  public Address addressOfInterpreterFrameExpressionStack() {
-+    Address monitorEnd = interpreterFrameMonitorEnd().address();
-+    return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize());
-+  }
-+
-+  public int getInterpreterFrameExpressionStackDirection() { return -1; }
-+
-+  // top of expression stack
-+  public Address addressOfInterpreterFrameTOS() {
-+    return getSP();
-+  }
-+
-+  /** Expression stack from top down */
-+  public Address addressOfInterpreterFrameTOSAt(int slot) {
-+    return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize());
-+  }
-+
-+  public Address getInterpreterFrameSenderSP() {
-+    if (Assert.ASSERTS_ENABLED) {
-+      Assert.that(isInterpretedFrame(), "interpreted frame expected");
-+    }
-+    return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
-+  }
-+
-+  // Monitors
-+  public BasicObjectLock interpreterFrameMonitorBegin() {
-+    return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET));
-+  }
-+
-+  public BasicObjectLock interpreterFrameMonitorEnd() {
-+    Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0);
-+    if (Assert.ASSERTS_ENABLED) {
-+      // make sure the pointer points inside the frame
-+      Assert.that(AddressOps.gt(getFP(), result), "result must <  than frame pointer");
-+      Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer");
-+    }
-+    return new BasicObjectLock(result);
-+  }
-+
-+  public int interpreterFrameMonitorSize() {
-+    return BasicObjectLock.size();
-+  }
-+
-+  // Method
-+  public Address addressOfInterpreterFrameMethod() {
-+    return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET);
-+  }
-+
-+  // Constant pool cache
-+  public Address addressOfInterpreterFrameCPCache() {
-+    return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET);
-+  }
-+
-+  // Entry frames
-+  public JavaCallWrapper getEntryFrameCallWrapper() {
-+    return new RISCV64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0));
-+  }
-+
-+  protected Address addressOfSavedOopResult() {
-+    // offset is 2 for compiler2 and 3 for compiler1
-+    return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) *
-+                               VM.getVM().getAddressSize());
-+  }
-+
-+  protected Address addressOfSavedReceiver() {
-+    return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
-+  }
-+
-+  private void dumpStack() {
-+    for (Address addr = getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
-+         AddressOps.lt(addr, getSP());
-+         addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
-+      System.out.println(addr + ": " + addr.getAddressAt(0));
-+    }
-+    System.out.println("-----------------------");
-+    for (Address addr = getSP();
-+         AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize()));
-+         addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
-+      System.out.println(addr + ": " + addr.getAddressAt(0));
-+    }
-+  }
-+}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
-new file mode 100644
-index 00000000000..d0ad2b559a6
---- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
-@@ -0,0 +1,61 @@
-+/*
-+ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+package sun.jvm.hotspot.runtime.riscv64;
-+
-+import java.util.*;
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.types.*;
-+import sun.jvm.hotspot.runtime.*;
-+import sun.jvm.hotspot.utilities.*;
-+import sun.jvm.hotspot.utilities.Observable;
-+import sun.jvm.hotspot.utilities.Observer;
-+
-+public class RISCV64JavaCallWrapper extends JavaCallWrapper {
-+  private static AddressField lastJavaFPField;
-+
-+  static {
-+    VM.registerVMInitializedObserver(new Observer() {
-+        public void update(Observable o, Object data) {
-+          initialize(VM.getVM().getTypeDataBase());
-+        }
-+      });
-+  }
-+
-+  private static synchronized void initialize(TypeDataBase db) {
-+    Type type = db.lookupType("JavaFrameAnchor");
-+
-+    lastJavaFPField  = type.getAddressField("_last_Java_fp");
-+  }
-+
-+  public RISCV64JavaCallWrapper(Address addr) {
-+    super(addr);
-+  }
-+
-+  public Address getLastJavaFP() {
-+    return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset()));
-+  }
-+}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java
-new file mode 100644
-index 00000000000..4aeb1c6f557
---- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java
-@@ -0,0 +1,53 @@
-+/*
-+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+package sun.jvm.hotspot.runtime.riscv64;
-+
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.runtime.*;
-+
-+public class RISCV64RegisterMap extends RegisterMap {
-+
-+  /** This is the only public constructor */
-+  public RISCV64RegisterMap(JavaThread thread, boolean updateMap) {
-+    super(thread, updateMap);
-+  }
-+
-+  protected RISCV64RegisterMap(RegisterMap map) {
-+    super(map);
-+  }
-+
-+  public Object clone() {
-+    RISCV64RegisterMap retval = new RISCV64RegisterMap(this);
-+    return retval;
-+  }
-+
-+  // no PD state to clear or copy:
-+  protected void clearPD() {}
-+  protected void initializePD() {}
-+  protected void initializeFromPD(RegisterMap map) {}
-+  protected Address getLocationPD(VMReg reg) { return null; }
-+}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
-index 7d7a6107cab..6552ce255fc 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -54,7 +54,7 @@ public static String getOS() throws UnsupportedPlatformException {
- 
-   public static boolean knownCPU(String cpu) {
-     final String[] KNOWN =
--        new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64"};
-+        new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64", "riscv64"};
- 
-     for(String s : KNOWN) {
-       if(s.equals(cpu))
-diff --git a/test/hotspot/jtreg/compiler/c2/TestBit.java b/test/hotspot/jtreg/compiler/c2/TestBit.java
-index 7805918c28a..823b9f39dbf 100644
---- a/test/hotspot/jtreg/compiler/c2/TestBit.java
-+++ b/test/hotspot/jtreg/compiler/c2/TestBit.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -34,7 +34,7 @@
-  *
-  * @run driver compiler.c2.TestBit
-  *
-- * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le"
-+ * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le" | os.arch == "riscv64"
-  * @requires vm.debug == true & vm.compiler2.enabled
-  */
- public class TestBit {
-@@ -54,7 +54,8 @@ static void runTest(String testName) throws Exception {
-         String expectedTestBitInstruction =
-             "ppc64le".equals(System.getProperty("os.arch")) ? "ANDI" :
-             "aarch64".equals(System.getProperty("os.arch")) ? "tb"   :
--            "amd64".equals(System.getProperty("os.arch"))   ? "test" : null;
-+            "amd64".equals(System.getProperty("os.arch"))   ? "test" :
-+            "riscv64".equals(System.getProperty("os.arch")) ? "andi" : null;
- 
-         if (expectedTestBitInstruction != null) {
-             output.shouldContain(expectedTestBitInstruction);
-diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
-index 558b4218f0b..55374b116e6 100644
---- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
-+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -42,6 +42,7 @@
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
-+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
- import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
- 
-@@ -54,6 +55,8 @@ public static void main(String args[]) throws Throwable {
-                         SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
-                 new GenericTestCaseForUnsupportedAArch64CPU(
-                         SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
-+                new GenericTestCaseForUnsupportedRISCV64CPU(
-+                        SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
-                 new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU(
-                         SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
-                 new GenericTestCaseForOtherCPU(
-diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
-index 3ed72bf0a99..8fb82ee4531 100644
---- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
-+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -42,6 +42,7 @@
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
-+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
- import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
- 
-@@ -54,6 +55,8 @@ public static void main(String args[]) throws Throwable {
-                         SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
-                 new GenericTestCaseForUnsupportedAArch64CPU(
-                         SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
-+                new GenericTestCaseForUnsupportedRISCV64CPU(
-+                        SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
-                 new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU(
-                         SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
-                 new GenericTestCaseForOtherCPU(
-diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
-index c05cf309dae..aca32137eda 100644
---- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
-+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -42,6 +42,7 @@
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
-+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
- import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
- 
-@@ -54,6 +55,8 @@ public static void main(String args[]) throws Throwable {
-                         SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
-                 new GenericTestCaseForUnsupportedAArch64CPU(
-                         SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
-+                new GenericTestCaseForUnsupportedRISCV64CPU(
-+                        SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
-                 new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU(
-                         SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
-                 new GenericTestCaseForOtherCPU(
-diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
-index 58ce5366bae..8deac4f7895 100644
---- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
-+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -41,6 +41,7 @@
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
-+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
- import compiler.intrinsics.sha.cli.testcases.UseSHASpecificTestCaseForUnsupportedCPU;
- 
-@@ -53,6 +54,8 @@ public static void main(String args[]) throws Throwable {
-                         SHAOptionsBase.USE_SHA_OPTION),
-                 new GenericTestCaseForUnsupportedAArch64CPU(
-                         SHAOptionsBase.USE_SHA_OPTION),
-+                new GenericTestCaseForUnsupportedRISCV64CPU(
-+                        SHAOptionsBase.USE_SHA_OPTION),
-                 new UseSHASpecificTestCaseForUnsupportedCPU(
-                         SHAOptionsBase.USE_SHA_OPTION),
-                 new GenericTestCaseForOtherCPU(
-diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
-index faa9fdbae67..26635002040 100644
---- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
-+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -32,26 +32,27 @@
- 
- /**
-  * Generic test case for SHA-related options targeted to any CPU except
-- * AArch64, PPC, S390x, SPARC and X86.
-+ * AArch64, RISCV64, PPC, S390x, SPARC and X86.
-  */
- public class GenericTestCaseForOtherCPU extends
-         SHAOptionsBase.TestCase {
-     public GenericTestCaseForOtherCPU(String optionName) {
--        // Execute the test case on any CPU except AArch64, PPC, S390x, SPARC and X86.
-+        // Execute the test case on any CPU except AArch64, RISCV64, PPC, S390x, SPARC and X86.
-         super(optionName, new NotPredicate(
-                               new OrPredicate(Platform::isAArch64,
-+                              new OrPredicate(Platform::isRISCV64,
-                               new OrPredicate(Platform::isS390x,
-                               new OrPredicate(Platform::isSparc,
-                               new OrPredicate(Platform::isPPC,
-                               new OrPredicate(Platform::isX64,
--                                              Platform::isX86)))))));
-+                                              Platform::isX86))))))));
-     }
- 
-     @Override
-     protected void verifyWarnings() throws Throwable {
-         String shouldPassMessage = String.format("JVM should start with "
-                 + "option '%s' without any warnings", optionName);
--        // Verify that on non-x86, non-SPARC and non-AArch64 CPU usage of
-+        // Verify that on non-x86, non-RISCV64, non-SPARC and non-AArch64 CPU usage of
-         //  SHA-related options will not cause any warnings.
-         CommandLineOptionTest.verifySameJVMStartup(null,
-                 new String[] { ".*" + optionName + ".*" }, shouldPassMessage,
-diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
-new file mode 100644
-index 00000000000..2ecfec07a4c
---- /dev/null
-+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
-@@ -0,0 +1,115 @@
-+/*
-+ * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ */
-+
-+package compiler.intrinsics.sha.cli.testcases;
-+
-+import compiler.intrinsics.sha.cli.DigestOptionsBase;
-+import jdk.test.lib.process.ExitCode;
-+import jdk.test.lib.Platform;
-+import jdk.test.lib.cli.CommandLineOptionTest;
-+import jdk.test.lib.cli.predicate.AndPredicate;
-+import jdk.test.lib.cli.predicate.NotPredicate;
-+
-+/**
-+ * Generic test case for SHA-related options targeted to RISCV64 CPUs
-+ * which don't support instruction required by the tested option.
-+ */
-+public class GenericTestCaseForUnsupportedRISCV64CPU extends
-+        DigestOptionsBase.TestCase {
-+
-+    final private boolean checkUseSHA;
-+
-+    public GenericTestCaseForUnsupportedRISCV64CPU(String optionName) {
-+        this(optionName, true);
-+    }
-+
-+    public GenericTestCaseForUnsupportedRISCV64CPU(String optionName, boolean checkUseSHA) {
-+        super(optionName, new AndPredicate(Platform::isRISCV64,
-+                new NotPredicate(DigestOptionsBase.getPredicateForOption(
-+                        optionName))));
-+
-+        this.checkUseSHA = checkUseSHA;
-+    }
-+
-+    @Override
-+    protected void verifyWarnings() throws Throwable {
-+        String shouldPassMessage = String.format("JVM startup should pass with"
-+                + "option '-XX:-%s' without any warnings", optionName);
-+        //Verify that option could be disabled without any warnings.
-+        CommandLineOptionTest.verifySameJVMStartup(null, new String[] {
-+                        DigestOptionsBase.getWarningForUnsupportedCPU(optionName)
-+                }, shouldPassMessage, shouldPassMessage, ExitCode.OK,
-+                DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-+                CommandLineOptionTest.prepareBooleanFlag(optionName, false));
-+
-+        if (checkUseSHA) {
-+            shouldPassMessage = String.format("If JVM is started with '-XX:-"
-+                    + "%s' '-XX:+%s', output should contain warning.",
-+                    DigestOptionsBase.USE_SHA_OPTION, optionName);
-+
-+            // Verify that when the tested option is enabled, then
-+            // a warning will occur in VM output if UseSHA is disabled.
-+            if (!optionName.equals(DigestOptionsBase.USE_SHA_OPTION)) {
-+                CommandLineOptionTest.verifySameJVMStartup(
-+                        new String[] { DigestOptionsBase.getWarningForUnsupportedCPU(optionName) },
-+                        null,
-+                        shouldPassMessage,
-+                        shouldPassMessage,
-+                        ExitCode.OK,
-+                        DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-+                        CommandLineOptionTest.prepareBooleanFlag(DigestOptionsBase.USE_SHA_OPTION, false),
-+                        CommandLineOptionTest.prepareBooleanFlag(optionName, true));
-+            }
-+        }
-+    }
-+
-+    @Override
-+    protected void verifyOptionValues() throws Throwable {
-+        // Verify that option is disabled by default.
-+        CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
-+                String.format("Option '%s' should be disabled by default",
-+                        optionName),
-+                DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS);
-+
-+        if (checkUseSHA) {
-+            // Verify that option is disabled even if it was explicitly enabled
-+            // using CLI options.
-+            CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
-+                    String.format("Option '%s' should be off on unsupported "
-+                            + "RISCV64CPU even if set to true directly", optionName),
-+                    DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-+                    CommandLineOptionTest.prepareBooleanFlag(optionName, true));
-+
-+            // Verify that option is disabled when +UseSHA was passed to JVM.
-+            CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
-+                    String.format("Option '%s' should be off on unsupported "
-+                            + "RISCV64CPU even if %s flag set to JVM",
-+                            optionName, CommandLineOptionTest.prepareBooleanFlag(
-+                                DigestOptionsBase.USE_SHA_OPTION, true)),
-+                    DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-+                    CommandLineOptionTest.prepareBooleanFlag(
-+                            DigestOptionsBase.USE_SHA_OPTION, true));
-+        }
-+    }
-+}
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
-index 2e3e2717a65..7be8af6d035 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8074981
-  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test
-- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
-+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
-  *      -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
-index 0e06a9e4327..797927b42bf 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8074981
-  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test
-- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
-+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
-  *      -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
-index c3cdbf37464..be8f7d586c2 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8074981
-  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test
-- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
-+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
-  *      -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
-index d33bd411f16..d96d5e29c00 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8074981
-  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test
-- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
-+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions
-  *      -XX:LoopUnrollLimit=250 -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
-index 992fa4b5161..b09c873d05d 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8138583
-  * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : double abs & neg test
-- * @requires os.arch=="aarch64"
-+ * @requires os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
-  *      -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
-index 3e79b3528b7..fe40ed6f98d 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8138583
-  * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : float abs & neg test
-- * @requires os.arch=="aarch64"
-+ * @requires os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
-  *      -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
-index 6603dd224ef..51631910493 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8135028
-  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double sqrt test
-- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
-+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
-  *      -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
-index d9a0c988004..d999ae423cf 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8074981
-  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double test
-- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
-+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
-  *      -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
-index 722db95aed3..65912a5c7fa 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8074981
-  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : float test
-- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
-+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
-  *      -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
-index f58f21feb23..fffdc2f7565 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8074981
-  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : int test
-- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
-+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
-  *      -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
-index 7774dabcb5f..7afe3560f30 100644
---- a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
-+++ b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -61,15 +61,17 @@ public class IntrinsicPredicates {
- 
-     public static final BooleanSupplier SHA1_INSTRUCTION_AVAILABLE
-             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha1" }, null),
-+              new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha1" }, null),
-               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha1" }, null),
-               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha1" }, null),
-               // x86 variants
-               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "sha" },  null),
-               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha" },  null),
--                              new CPUSpecificPredicate("x86.*",     new String[] { "sha" },  null))))));
-+                              new CPUSpecificPredicate("x86.*",     new String[] { "sha" },  null)))))));
- 
-     public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE
-             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha256"       }, null),
-+              new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha256"       }, null),
-               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha256"       }, null),
-               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha256"       }, null),
-               new OrPredicate(new CPUSpecificPredicate("ppc64.*",   new String[] { "sha"          }, null),
-@@ -79,10 +81,11 @@ public class IntrinsicPredicates {
-               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha"          }, null),
-               new OrPredicate(new CPUSpecificPredicate("x86.*",     new String[] { "sha"          }, null),
-               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "avx2", "bmi2" }, null),
--                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null))))))))));
-+                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null)))))))))));
- 
-     public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE
-             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha512"       }, null),
-+              new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha512"       }, null),
-               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha512"       }, null),
-               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha512"       }, null),
-               new OrPredicate(new CPUSpecificPredicate("ppc64.*",   new String[] { "sha"          }, null),
-@@ -92,7 +95,7 @@ public class IntrinsicPredicates {
-               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha"          }, null),
-               new OrPredicate(new CPUSpecificPredicate("x86.*",     new String[] { "sha"          }, null),
-               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "avx2", "bmi2" }, null),
--                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null))))))))));
-+                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null)))))))))));
- 
-     public static final BooleanSupplier ANY_SHA_INSTRUCTION_AVAILABLE
-             = new OrPredicate(IntrinsicPredicates.SHA1_INSTRUCTION_AVAILABLE,
-diff --git a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
-index 57256aa5a32..d4d43b01ae6 100644
---- a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
-+++ b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -112,7 +112,7 @@ public static void main(String args[]) throws Exception {
-             // It's ok for ARM not to have symbols, because it does not support NMT detail
-             // when targeting thumb2. It's also ok for Windows not to have symbols, because
-             // they are only available if the symbols file is included with the build.
--            if (Platform.isWindows() || Platform.isARM()) {
-+            if (Platform.isWindows() || Platform.isARM() || Platform.isRISCV64()) {
-                 return; // we are done
-             }
-             output.reportDiagnosticSummary();
-diff --git a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
-index 127bb6abcd9..eab19273ad8 100644
---- a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
-+++ b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -239,7 +239,7 @@ private static boolean isAlwaysSupportedPlatform() {
-         return Platform.isAix() ||
-             (Platform.isLinux() &&
-              (Platform.isPPC() || Platform.isS390x() || Platform.isX64() ||
--              Platform.isX86())) ||
-+              Platform.isX86() || Platform.isRISCV64())) ||
-             Platform.isOSX() ||
-             Platform.isSolaris();
-     }
-diff --git a/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java b/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java
-index 54640b245f8..f0b7aed5ceb 100644
---- a/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java
-+++ b/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java
-@@ -1,5 +1,4 @@
- /*
-- * Copyright (c) 2018, Google and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
-index 77458554b76..d4bfe31dd7a 100644
---- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
-+++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
-@@ -45,7 +45,7 @@
-  */
- public class TestMutuallyExclusivePlatformPredicates {
-     private static enum MethodGroup {
--        ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"),
-+        ARCH("isAArch64", "isARM", "isRISCV64", "isPPC", "isS390x", "isSparc", "isX64", "isX86"),
-         BITNESS("is32bit", "is64bit"),
-         OS("isAix", "isLinux", "isOSX", "isSolaris", "isWindows"),
-         VM_TYPE("isClient", "isServer", "isGraal", "isMinimal", "isZero", "isEmbedded"),
-diff --git a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
-index 7990c49a1f6..abeff80e5e8 100644
---- a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
-+++ b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2013, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -54,8 +54,8 @@ public static void main(String[] args) throws Throwable {
-             Events.assertField(event, "hwThreads").atLeast(1);
-             Events.assertField(event, "cores").atLeast(1);
-             Events.assertField(event, "sockets").atLeast(1);
--            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390");
--            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390");
-+            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390");
-+            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390");
-         }
-     }
- }
-diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java
-index f4ee0546c70..635cdd18054 100644
---- a/test/lib/jdk/test/lib/Platform.java
-+++ b/test/lib/jdk/test/lib/Platform.java
-@@ -202,6 +202,10 @@ public static boolean isARM() {
-         return isArch("arm.*");
-     }
- 
-+    public static boolean isRISCV64() {
-+        return isArch("riscv64");
-+    }
-+
-     public static boolean isPPC() {
-         return isArch("ppc.*");
-     }
-
-From c51e546566c937354842a27696bd2221087101ae Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 28 Mar 2023 16:30:04 +0800
-Subject: [PATCH 002/140] Drop zgc part
-
----
- .../cpu/riscv/c1_LIRAssembler_riscv.cpp       |   6 +-
- .../riscv/gc/z/zBarrierSetAssembler_riscv.cpp | 441 ------------------
- .../riscv/gc/z/zBarrierSetAssembler_riscv.hpp | 101 ----
- src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp | 212 ---------
- src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp |  36 --
- src/hotspot/cpu/riscv/gc/z/z_riscv64.ad       | 233 ---------
- .../cpu/riscv/macroAssembler_riscv.cpp        |  46 --
- .../cpu/riscv/macroAssembler_riscv.hpp        |   9 -
- src/hotspot/cpu/riscv/stubGenerator_riscv.cpp |  10 -
- 9 files changed, 1 insertion(+), 1093 deletions(-)
- delete mode 100644 src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
- delete mode 100644 src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp
- delete mode 100644 src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp
- delete mode 100644 src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp
- delete mode 100644 src/hotspot/cpu/riscv/gc/z/z_riscv64.ad
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-index 742c2126e60..bba3bd4709c 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -871,11 +871,7 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch
-     if (UseCompressedOops && !wide) {
-       __ decode_heap_oop(dest->as_register());
-     }
--
--    if (!UseZGC) {
--      // Load barrier has not yet been applied, so ZGC can't verify the oop here
--      __ verify_oop(dest->as_register());
--    }
-+    __ verify_oop(dest->as_register());
-   }
- }
- 
-diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
-deleted file mode 100644
-index 3d3f4d4d774..00000000000
---- a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
-+++ /dev/null
-@@ -1,441 +0,0 @@
--/*
-- * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#include "precompiled.hpp"
--#include "asm/macroAssembler.inline.hpp"
--#include "code/codeBlob.hpp"
--#include "code/vmreg.inline.hpp"
--#include "gc/z/zBarrier.inline.hpp"
--#include "gc/z/zBarrierSet.hpp"
--#include "gc/z/zBarrierSetAssembler.hpp"
--#include "gc/z/zBarrierSetRuntime.hpp"
--#include "gc/z/zThreadLocalData.hpp"
--#include "memory/resourceArea.hpp"
--#include "runtime/sharedRuntime.hpp"
--#include "utilities/macros.hpp"
--#ifdef COMPILER1
--#include "c1/c1_LIRAssembler.hpp"
--#include "c1/c1_MacroAssembler.hpp"
--#include "gc/z/c1/zBarrierSetC1.hpp"
--#endif // COMPILER1
--#ifdef COMPILER2
--#include "gc/z/c2/zBarrierSetC2.hpp"
--#endif // COMPILER2
--
--#ifdef PRODUCT
--#define BLOCK_COMMENT(str) /* nothing */
--#else
--#define BLOCK_COMMENT(str) __ block_comment(str)
--#endif
--
--#undef __
--#define __ masm->
--
--void ZBarrierSetAssembler::load_at(MacroAssembler* masm,
--                                   DecoratorSet decorators,
--                                   BasicType type,
--                                   Register dst,
--                                   Address src,
--                                   Register tmp1,
--                                   Register tmp_thread) {
--  if (!ZBarrierSet::barrier_needed(decorators, type)) {
--    // Barrier not needed
--    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
--    return;
--  }
--
--  assert_different_registers(t1, src.base());
--  assert_different_registers(t0, t1, dst);
--
--  Label done;
--
--  // Load bad mask into temp register.
--  __ la(t0, src);
--  __ ld(t1, address_bad_mask_from_thread(xthread));
--  __ ld(dst, Address(t0));
--
--  // Test reference against bad mask. If mask bad, then we need to fix it up.
--  __ andr(t1, dst, t1);
--  __ beqz(t1, done);
--
--  __ enter();
--
--  __ push_call_clobbered_registers_except(RegSet::of(dst));
--
--  if (c_rarg0 != dst) {
--    __ mv(c_rarg0, dst);
--  }
--
--  __ mv(c_rarg1, t0);
--
--  __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2);
--
--  // Make sure dst has the return value.
--  if (dst != x10) {
--    __ mv(dst, x10);
--  }
--
--  __ pop_call_clobbered_registers_except(RegSet::of(dst));
--  __ leave();
--
--  __ bind(done);
--}
--
--#ifdef ASSERT
--
--void ZBarrierSetAssembler::store_at(MacroAssembler* masm,
--                                    DecoratorSet decorators,
--                                    BasicType type,
--                                    Address dst,
--                                    Register val,
--                                    Register tmp1,
--                                    Register tmp2) {
--  // Verify value
--  if (is_reference_type(type)) {
--    // Note that src could be noreg, which means we
--    // are storing null and can skip verification.
--    if (val != noreg) {
--      Label done;
--
--      // tmp1 and tmp2 are often set to noreg.
--      RegSet savedRegs = RegSet::of(t0);
--      __ push_reg(savedRegs, sp);
--
--      __ ld(t0, address_bad_mask_from_thread(xthread));
--      __ andr(t0, val, t0);
--      __ beqz(t0, done);
--      __ stop("Verify oop store failed");
--      __ should_not_reach_here();
--      __ bind(done);
--      __ pop_reg(savedRegs, sp);
--    }
--  }
--
--  // Store value
--  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
--}
--
--#endif // ASSERT
--
--void ZBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm,
--                                              DecoratorSet decorators,
--                                              bool is_oop,
--                                              Register src,
--                                              Register dst,
--                                              Register count,
--                                              RegSet saved_regs) {
--  if (!is_oop) {
--    // Barrier not needed
--    return;
--  }
--
--  BLOCK_COMMENT("ZBarrierSetAssembler::arraycopy_prologue {");
--
--  assert_different_registers(src, count, t0);
--
--  __ push_reg(saved_regs, sp);
--
--  if (count == c_rarg0 && src == c_rarg1) {
--    // exactly backwards!!
--    __ xorr(c_rarg0, c_rarg0, c_rarg1);
--    __ xorr(c_rarg1, c_rarg0, c_rarg1);
--    __ xorr(c_rarg0, c_rarg0, c_rarg1);
--  } else {
--    __ mv(c_rarg0, src);
--    __ mv(c_rarg1, count);
--  }
--
--  __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_array_addr(), 2);
--
--  __ pop_reg(saved_regs, sp);
--
--  BLOCK_COMMENT("} ZBarrierSetAssembler::arraycopy_prologue");
--}
--
--void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm,
--                                                         Register jni_env,
--                                                         Register robj,
--                                                         Register tmp,
--                                                         Label& slowpath) {
--  BLOCK_COMMENT("ZBarrierSetAssembler::try_resolve_jobject_in_native {");
--
--  assert_different_registers(jni_env, robj, tmp);
--
--  // Resolve jobject
--  BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, robj, tmp, slowpath);
--
--  // Compute the offset of address bad mask from the field of jni_environment
--  long int bad_mask_relative_offset = (long int) (in_bytes(ZThreadLocalData::address_bad_mask_offset()) -
--                                                  in_bytes(JavaThread::jni_environment_offset()));
--
--  // Load the address bad mask
--  __ ld(tmp, Address(jni_env, bad_mask_relative_offset));
--
--  // Check address bad mask
--  __ andr(tmp, robj, tmp);
--  __ bnez(tmp, slowpath);
--
--  BLOCK_COMMENT("} ZBarrierSetAssembler::try_resolve_jobject_in_native");
--}
--
--#ifdef COMPILER2
--
--OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
--  if (!OptoReg::is_reg(opto_reg)) {
--    return OptoReg::Bad;
--  }
--
--  const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
--  if (vm_reg->is_FloatRegister()) {
--    return opto_reg & ~1;
--  }
--
--  return opto_reg;
--}
--
--#undef __
--#define __ _masm->
--
--class ZSaveLiveRegisters {
--private:
--  MacroAssembler* const _masm;
--  RegSet                _gp_regs;
--  FloatRegSet           _fp_regs;
--  VectorRegSet          _vp_regs;
--
--public:
--  void initialize(ZLoadBarrierStubC2* stub) {
--    // Record registers that needs to be saved/restored
--    RegMaskIterator rmi(stub->live());
--    while (rmi.has_next()) {
--      const OptoReg::Name opto_reg = rmi.next();
--      if (OptoReg::is_reg(opto_reg)) {
--        const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
--        if (vm_reg->is_Register()) {
--          _gp_regs += RegSet::of(vm_reg->as_Register());
--        } else if (vm_reg->is_FloatRegister()) {
--          _fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister());
--        } else if (vm_reg->is_VectorRegister()) {
--          const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~(VectorRegisterImpl::max_slots_per_register - 1));
--          _vp_regs += VectorRegSet::of(vm_reg_base->as_VectorRegister());
--        } else {
--          fatal("Unknown register type");
--        }
--      }
--    }
--
--    // Remove C-ABI SOE registers, tmp regs and _ref register that will be updated
--    _gp_regs -= RegSet::range(x18, x27) + RegSet::of(x2) + RegSet::of(x8, x9) + RegSet::of(x5, stub->ref());
--  }
--
--  ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
--      _masm(masm),
--      _gp_regs(),
--      _fp_regs(),
--      _vp_regs() {
--    // Figure out what registers to save/restore
--    initialize(stub);
--
--    // Save registers
--    __ push_reg(_gp_regs, sp);
--    __ push_fp(_fp_regs, sp);
--    __ push_vp(_vp_regs, sp);
--  }
--
--  ~ZSaveLiveRegisters() {
--    // Restore registers
--    __ pop_vp(_vp_regs, sp);
--    __ pop_fp(_fp_regs, sp);
--    __ pop_reg(_gp_regs, sp);
--  }
--};
--
--class ZSetupArguments {
--private:
--  MacroAssembler* const _masm;
--  const Register        _ref;
--  const Address         _ref_addr;
--
--public:
--  ZSetupArguments(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
--      _masm(masm),
--      _ref(stub->ref()),
--      _ref_addr(stub->ref_addr()) {
--
--    // Setup arguments
--    if (_ref_addr.base() == noreg) {
--      // No self healing
--      if (_ref != c_rarg0) {
--        __ mv(c_rarg0, _ref);
--      }
--      __ mv(c_rarg1, zr);
--    } else {
--      // Self healing
--      if (_ref == c_rarg0) {
--        // _ref is already at correct place
--        __ la(c_rarg1, _ref_addr);
--      } else if (_ref != c_rarg1) {
--        // _ref is in wrong place, but not in c_rarg1, so fix it first
--        __ la(c_rarg1, _ref_addr);
--        __ mv(c_rarg0, _ref);
--      } else if (_ref_addr.base() != c_rarg0) {
--        assert(_ref == c_rarg1, "Mov ref first, vacating c_rarg0");
--        __ mv(c_rarg0, _ref);
--        __ la(c_rarg1, _ref_addr);
--      } else {
--        assert(_ref == c_rarg1, "Need to vacate c_rarg1 and _ref_addr is using c_rarg0");
--        if (_ref_addr.base() == c_rarg0) {
--          __ mv(t1, c_rarg1);
--          __ la(c_rarg1, _ref_addr);
--          __ mv(c_rarg0, t1);
--        } else {
--          ShouldNotReachHere();
--        }
--      }
--    }
--  }
--
--  ~ZSetupArguments() {
--    // Transfer result
--    if (_ref != x10) {
--      __ mv(_ref, x10);
--    }
--  }
--};
--
--#undef __
--#define __ masm->
--
--void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const {
--  BLOCK_COMMENT("ZLoadBarrierStubC2");
--
--  // Stub entry
--  __ bind(*stub->entry());
--
--  {
--    ZSaveLiveRegisters save_live_registers(masm, stub);
--    ZSetupArguments setup_arguments(masm, stub);
--    int32_t offset = 0;
--    __ la_patchable(t0, stub->slow_path(), offset);
--    __ jalr(x1, t0, offset);
--  }
--
--  // Stub exit
--  __ j(*stub->continuation());
--}
--
--#undef __
--
--#endif // COMPILER2
--
--#ifdef COMPILER1
--#undef __
--#define __ ce->masm()->
--
--void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce,
--                                                         LIR_Opr ref) const {
--  assert_different_registers(xthread, ref->as_register(), t1);
--  __ ld(t1, address_bad_mask_from_thread(xthread));
--  __ andr(t1, t1, ref->as_register());
--}
--
--void ZBarrierSetAssembler::generate_c1_load_barrier_stub(LIR_Assembler* ce,
--                                                         ZLoadBarrierStubC1* stub) const {
--  // Stub entry
--  __ bind(*stub->entry());
--
--  Register ref = stub->ref()->as_register();
--  Register ref_addr = noreg;
--  Register tmp = noreg;
--
--  if (stub->tmp()->is_valid()) {
--    // Load address into tmp register
--    ce->leal(stub->ref_addr(), stub->tmp());
--    ref_addr = tmp = stub->tmp()->as_pointer_register();
--  } else {
--    // Address already in register
--    ref_addr = stub->ref_addr()->as_address_ptr()->base()->as_pointer_register();
--  }
--
--  assert_different_registers(ref, ref_addr, noreg);
--
--  // Save x10 unless it is the result or tmp register
--  // Set up SP to accomodate parameters and maybe x10.
--  if (ref != x10 && tmp != x10) {
--    __ sub(sp, sp, 32);
--    __ sd(x10, Address(sp, 16));
--  } else {
--    __ sub(sp, sp, 16);
--  }
--
--  // Setup arguments and call runtime stub
--  ce->store_parameter(ref_addr, 1);
--  ce->store_parameter(ref, 0);
--
--  __ far_call(stub->runtime_stub());
--
--  // Verify result
--  __ verify_oop(x10, "Bad oop");
--
--
--  // Move result into place
--  if (ref != x10) {
--    __ mv(ref, x10);
--  }
--
--  // Restore x10 unless it is the result or tmp register
--  if (ref != x10 && tmp != x10) {
--    __ ld(x10, Address(sp, 16));
--    __ add(sp, sp, 32);
--  } else {
--    __ add(sp, sp, 16);
--  }
--
--  // Stub exit
--  __ j(*stub->continuation());
--}
--
--#undef __
--#define __ sasm->
--
--void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler* sasm,
--                                                                 DecoratorSet decorators) const {
--  __ prologue("zgc_load_barrier stub", false);
--
--  __ push_call_clobbered_registers_except(RegSet::of(x10));
--
--  // Setup arguments
--  __ load_parameter(0, c_rarg0);
--  __ load_parameter(1, c_rarg1);
--
--  __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2);
--
--  __ pop_call_clobbered_registers_except(RegSet::of(x10));
--
--  __ epilogue();
--}
--
--#undef __
--#endif // COMPILER1
-diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp
-deleted file mode 100644
-index dc07ab635fe..00000000000
---- a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp
-+++ /dev/null
-@@ -1,101 +0,0 @@
--/*
-- * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#ifndef CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP
--#define CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP
--
--#include "code/vmreg.hpp"
--#include "oops/accessDecorators.hpp"
--#ifdef COMPILER2
--#include "opto/optoreg.hpp"
--#endif // COMPILER2
--
--#ifdef COMPILER1
--class LIR_Assembler;
--class LIR_Opr;
--class StubAssembler;
--class ZLoadBarrierStubC1;
--#endif // COMPILER1
--
--#ifdef COMPILER2
--class Node;
--class ZLoadBarrierStubC2;
--#endif // COMPILER2
--
--class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase {
--public:
--  virtual void load_at(MacroAssembler* masm,
--                       DecoratorSet decorators,
--                       BasicType type,
--                       Register dst,
--                       Address src,
--                       Register tmp1,
--                       Register tmp_thread);
--
--#ifdef ASSERT
--  virtual void store_at(MacroAssembler* masm,
--                        DecoratorSet decorators,
--                        BasicType type,
--                        Address dst,
--                        Register val,
--                        Register tmp1,
--                        Register tmp2);
--#endif // ASSERT
--
--  virtual void arraycopy_prologue(MacroAssembler* masm,
--                                  DecoratorSet decorators,
--                                  bool is_oop,
--                                  Register src,
--                                  Register dst,
--                                  Register count,
--                                  RegSet saved_regs);
--
--  virtual void try_resolve_jobject_in_native(MacroAssembler* masm,
--                                             Register jni_env,
--                                             Register robj,
--                                             Register tmp,
--                                             Label& slowpath);
--
--#ifdef COMPILER1
--  void generate_c1_load_barrier_test(LIR_Assembler* ce,
--                                     LIR_Opr ref) const;
--
--  void generate_c1_load_barrier_stub(LIR_Assembler* ce,
--                                     ZLoadBarrierStubC1* stub) const;
--
--  void generate_c1_load_barrier_runtime_stub(StubAssembler* sasm,
--                                             DecoratorSet decorators) const;
--#endif // COMPILER1
--
--#ifdef COMPILER2
--  OptoReg::Name refine_register(const Node* node,
--                                OptoReg::Name opto_reg);
--
--  void generate_c2_load_barrier_stub(MacroAssembler* masm,
--                                     ZLoadBarrierStubC2* stub) const;
--#endif // COMPILER2
--};
--
--#endif // CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp
-deleted file mode 100644
-index d14997790af..00000000000
---- a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp
-+++ /dev/null
-@@ -1,212 +0,0 @@
--/*
-- * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#include "precompiled.hpp"
--#include "gc/shared/gcLogPrecious.hpp"
--#include "gc/shared/gc_globals.hpp"
--#include "gc/z/zGlobals.hpp"
--#include "runtime/globals.hpp"
--#include "runtime/os.hpp"
--#include "utilities/globalDefinitions.hpp"
--#include "utilities/powerOfTwo.hpp"
--
--#ifdef LINUX
--#include <sys/mman.h>
--#endif // LINUX
--
--//
--// The heap can have three different layouts, depending on the max heap size.
--//
--// Address Space & Pointer Layout 1
--// --------------------------------
--//
--//  +--------------------------------+ 0x00007FFFFFFFFFFF (127TB)
--//  .                                .
--//  .                                .
--//  .                                .
--//  +--------------------------------+ 0x0000014000000000 (20TB)
--//  |         Remapped View          |
--//  +--------------------------------+ 0x0000010000000000 (16TB)
--//  .                                .
--//  +--------------------------------+ 0x00000c0000000000 (12TB)
--//  |         Marked1 View           |
--//  +--------------------------------+ 0x0000080000000000 (8TB)
--//  |         Marked0 View           |
--//  +--------------------------------+ 0x0000040000000000 (4TB)
--//  .                                .
--//  +--------------------------------+ 0x0000000000000000
--//
--//   6                  4 4  4 4
--//   3                  6 5  2 1                                             0
--//  +--------------------+----+-----------------------------------------------+
--//  |00000000 00000000 00|1111|11 11111111 11111111 11111111 11111111 11111111|
--//  +--------------------+----+-----------------------------------------------+
--//  |                    |    |
--//  |                    |    * 41-0 Object Offset (42-bits, 4TB address space)
--//  |                    |
--//  |                    * 45-42 Metadata Bits (4-bits)  0001 = Marked0      (Address view 4-8TB)
--//  |                                                    0010 = Marked1      (Address view 8-12TB)
--//  |                                                    0100 = Remapped     (Address view 16-20TB)
--//  |                                                    1000 = Finalizable  (Address view N/A)
--//  |
--//  * 63-46 Fixed (18-bits, always zero)
--//
--//
--// Address Space & Pointer Layout 2
--// --------------------------------
--//
--//  +--------------------------------+ 0x00007FFFFFFFFFFF (127TB)
--//  .                                .
--//  .                                .
--//  .                                .
--//  +--------------------------------+ 0x0000280000000000 (40TB)
--//  |         Remapped View          |
--//  +--------------------------------+ 0x0000200000000000 (32TB)
--//  .                                .
--//  +--------------------------------+ 0x0000180000000000 (24TB)
--//  |         Marked1 View           |
--//  +--------------------------------+ 0x0000100000000000 (16TB)
--//  |         Marked0 View           |
--//  +--------------------------------+ 0x0000080000000000 (8TB)
--//  .                                .
--//  +--------------------------------+ 0x0000000000000000
--//
--//   6                 4 4  4 4
--//   3                 7 6  3 2                                              0
--//  +------------------+-----+------------------------------------------------+
--//  |00000000 00000000 0|1111|111 11111111 11111111 11111111 11111111 11111111|
--//  +-------------------+----+------------------------------------------------+
--//  |                   |    |
--//  |                   |    * 42-0 Object Offset (43-bits, 8TB address space)
--//  |                   |
--//  |                   * 46-43 Metadata Bits (4-bits)  0001 = Marked0      (Address view 8-16TB)
--//  |                                                   0010 = Marked1      (Address view 16-24TB)
--//  |                                                   0100 = Remapped     (Address view 32-40TB)
--//  |                                                   1000 = Finalizable  (Address view N/A)
--//  |
--//  * 63-47 Fixed (17-bits, always zero)
--//
--//
--// Address Space & Pointer Layout 3
--// --------------------------------
--//
--//  +--------------------------------+ 0x00007FFFFFFFFFFF (127TB)
--//  .                                .
--//  .                                .
--//  .                                .
--//  +--------------------------------+ 0x0000500000000000 (80TB)
--//  |         Remapped View          |
--//  +--------------------------------+ 0x0000400000000000 (64TB)
--//  .                                .
--//  +--------------------------------+ 0x0000300000000000 (48TB)
--//  |         Marked1 View           |
--//  +--------------------------------+ 0x0000200000000000 (32TB)
--//  |         Marked0 View           |
--//  +--------------------------------+ 0x0000100000000000 (16TB)
--//  .                                .
--//  +--------------------------------+ 0x0000000000000000
--//
--//   6               4  4  4 4
--//   3               8  7  4 3                                               0
--//  +------------------+----+-------------------------------------------------+
--//  |00000000 00000000 |1111|1111 11111111 11111111 11111111 11111111 11111111|
--//  +------------------+----+-------------------------------------------------+
--//  |                  |    |
--//  |                  |    * 43-0 Object Offset (44-bits, 16TB address space)
--//  |                  |
--//  |                  * 47-44 Metadata Bits (4-bits)  0001 = Marked0      (Address view 16-32TB)
--//  |                                                  0010 = Marked1      (Address view 32-48TB)
--//  |                                                  0100 = Remapped     (Address view 64-80TB)
--//  |                                                  1000 = Finalizable  (Address view N/A)
--//  |
--//  * 63-48 Fixed (16-bits, always zero)
--//
--
--// Default value if probing is not implemented for a certain platform: 128TB
--static const size_t DEFAULT_MAX_ADDRESS_BIT = 47;
--// Minimum value returned, if probing fails: 64GB
--static const size_t MINIMUM_MAX_ADDRESS_BIT = 36;
--
--static size_t probe_valid_max_address_bit() {
--#ifdef LINUX
--  size_t max_address_bit = 0;
--  const size_t page_size = os::vm_page_size();
--  for (size_t i = DEFAULT_MAX_ADDRESS_BIT; i > MINIMUM_MAX_ADDRESS_BIT; --i) {
--    const uintptr_t base_addr = ((uintptr_t) 1U) << i;
--    if (msync((void*)base_addr, page_size, MS_ASYNC) == 0) {
--      // msync suceeded, the address is valid, and maybe even already mapped.
--      max_address_bit = i;
--      break;
--    }
--    if (errno != ENOMEM) {
--      // Some error occured. This should never happen, but msync
--      // has some undefined behavior, hence ignore this bit.
--#ifdef ASSERT
--      fatal("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno));
--#else // ASSERT
--      log_warning_p(gc)("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno));
--#endif // ASSERT
--      continue;
--    }
--    // Since msync failed with ENOMEM, the page might not be mapped.
--    // Try to map it, to see if the address is valid.
--    void* const result_addr = mmap((void*) base_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0);
--    if (result_addr != MAP_FAILED) {
--      munmap(result_addr, page_size);
--    }
--    if ((uintptr_t) result_addr == base_addr) {
--      // address is valid
--      max_address_bit = i;
--      break;
--    }
--  }
--  if (max_address_bit == 0) {
--    // probing failed, allocate a very high page and take that bit as the maximum
--    const uintptr_t high_addr = ((uintptr_t) 1U) << DEFAULT_MAX_ADDRESS_BIT;
--    void* const result_addr = mmap((void*) high_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0);
--    if (result_addr != MAP_FAILED) {
--      max_address_bit = BitsPerSize_t - count_leading_zeros((size_t) result_addr) - 1;
--      munmap(result_addr, page_size);
--    }
--  }
--  log_info_p(gc, init)("Probing address space for the highest valid bit: " SIZE_FORMAT, max_address_bit);
--  return MAX2(max_address_bit, MINIMUM_MAX_ADDRESS_BIT);
--#else // LINUX
--  return DEFAULT_MAX_ADDRESS_BIT;
--#endif // LINUX
--}
--
--size_t ZPlatformAddressOffsetBits() {
--  const static size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1;
--  const size_t max_address_offset_bits = valid_max_address_offset_bits - 3;
--  const size_t min_address_offset_bits = max_address_offset_bits - 2;
--  const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio);
--  const size_t address_offset_bits = log2i_exact(address_offset);
--  return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits);
--}
--
--size_t ZPlatformAddressMetadataShift() {
--  return ZPlatformAddressOffsetBits();
--}
-diff --git a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp
-deleted file mode 100644
-index f20ecd9b073..00000000000
---- a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp
-+++ /dev/null
-@@ -1,36 +0,0 @@
--/*
-- * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#ifndef CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP
--#define CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP
--
--const size_t ZPlatformGranuleSizeShift = 21; // 2MB
--const size_t ZPlatformHeapViews        = 3;
--const size_t ZPlatformCacheLineSize    = 64;
--
--size_t ZPlatformAddressOffsetBits();
--size_t ZPlatformAddressMetadataShift();
--
--#endif // CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad b/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad
-deleted file mode 100644
-index 6b6f87814a5..00000000000
---- a/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad
-+++ /dev/null
-@@ -1,233 +0,0 @@
--//
--// Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
--// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
--// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
--//
--// This code is free software; you can redistribute it and/or modify it
--// under the terms of the GNU General Public License version 2 only, as
--// published by the Free Software Foundation.
--//
--// This code is distributed in the hope that it will be useful, but WITHOUT
--// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
--// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
--// version 2 for more details (a copy is included in the LICENSE file that
--// accompanied this code).
--//
--// You should have received a copy of the GNU General Public License version
--// 2 along with this work; if not, write to the Free Software Foundation,
--// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
--//
--// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
--// or visit www.oracle.com if you need additional information or have any
--// questions.
--//
--
--source_hpp %{
--
--#include "gc/shared/gc_globals.hpp"
--#include "gc/z/c2/zBarrierSetC2.hpp"
--#include "gc/z/zThreadLocalData.hpp"
--
--%}
--
--source %{
--
--static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, int barrier_data) {
--  if (barrier_data == ZLoadBarrierElided) {
--    return;
--  }
--  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, barrier_data);
--  __ ld(tmp, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
--  __ andr(tmp, tmp, ref);
--  __ bnez(tmp, *stub->entry(), true /* far */);
--  __ bind(*stub->continuation());
--}
--
--static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
--  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, ZLoadBarrierStrong);
--  __ j(*stub->entry());
--  __ bind(*stub->continuation());
--}
--
--%}
--
--// Load Pointer
--instruct zLoadP(iRegPNoSp dst, memory mem)
--%{
--  match(Set dst (LoadP mem));
--  predicate(UseZGC && (n->as_Load()->barrier_data() != 0));
--  effect(TEMP dst);
--
--  ins_cost(4 * DEFAULT_COST);
--
--  format %{ "ld  $dst, $mem, #@zLoadP" %}
--
--  ins_encode %{
--    const Address ref_addr (as_Register($mem$$base), $mem$$disp);
--    __ ld($dst$$Register, ref_addr);
--    z_load_barrier(_masm, this, ref_addr, $dst$$Register, t0 /* tmp */, barrier_data());
--  %}
--
--  ins_pipe(iload_reg_mem);
--%}
--
--instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
--  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
--  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
--  predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
--  effect(KILL cr, TEMP_DEF res);
--
--  ins_cost(2 * VOLATILE_REF_COST);
--
--  format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapP\n\t"
--            "mv $res, $res == $oldval" %}
--
--  ins_encode %{
--    Label failed;
--    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
--    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
--               Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
--               true /* result_as_bool */);
--    __ beqz($res$$Register, failed);
--    __ mv(t0, $oldval$$Register);
--    __ bind(failed);
--    if (barrier_data() != ZLoadBarrierElided) {
--      Label good;
--      __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */);
--      __ andr(t1, t1, t0);
--      __ beqz(t1, good);
--      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */);
--      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
--                 Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
--                 true /* result_as_bool */);
--      __ bind(good);
--    }
--  %}
--
--  ins_pipe(pipe_slow);
--%}
--
--instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
--  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
--  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
--  predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong));
--  effect(KILL cr, TEMP_DEF res);
--
--  ins_cost(2 * VOLATILE_REF_COST);
--
--  format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapPAcq\n\t"
--            "mv $res, $res == $oldval" %}
--
--  ins_encode %{
--    Label failed;
--    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
--    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
--               Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
--               true /* result_as_bool */);
--    __ beqz($res$$Register, failed);
--    __ mv(t0, $oldval$$Register);
--    __ bind(failed);
--    if (barrier_data() != ZLoadBarrierElided) {
--      Label good;
--      __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */);
--      __ andr(t1, t1, t0);
--      __ beqz(t1, good);
--      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */);
--      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
--                 Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
--                 true /* result_as_bool */);
--      __ bind(good);
--    }
--  %}
--
--  ins_pipe(pipe_slow);
--%}
--
--instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{
--  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
--  predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
--  effect(TEMP_DEF res);
--
--  ins_cost(2 * VOLATILE_REF_COST);
--
--  format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangeP" %}
--
--  ins_encode %{
--    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
--    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
--               Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register);
--    if (barrier_data() != ZLoadBarrierElided) {
--      Label good;
--      __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
--      __ andr(t0, t0, $res$$Register);
--      __ beqz(t0, good);
--      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */);
--      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
--                 Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register);
--      __ bind(good);
--    }
--  %}
--
--  ins_pipe(pipe_slow);
--%}
--
--instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{
--  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
--  predicate(UseZGC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
--  effect(TEMP_DEF res);
--
--  ins_cost(2 * VOLATILE_REF_COST);
--
--  format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangePAcq" %}
--
--  ins_encode %{
--    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
--    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
--               Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register);
--    if (barrier_data() != ZLoadBarrierElided) {
--      Label good;
--      __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
--      __ andr(t0, t0, $res$$Register);
--      __ beqz(t0, good);
--      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */);
--      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
--                 Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register);
--      __ bind(good);
--    }
--  %}
--
--  ins_pipe(pipe_slow);
--%}
--
--instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
--  match(Set prev (GetAndSetP mem newv));
--  predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
--  effect(TEMP_DEF prev, KILL cr);
--
--  ins_cost(2 * VOLATILE_REF_COST);
--
--  format %{ "atomic_xchg  $prev, $newv, [$mem], #@zGetAndSetP" %}
--
--  ins_encode %{
--    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
--    z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data());
--  %}
--
--  ins_pipe(pipe_serial);
--%}
--
--instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
--  match(Set prev (GetAndSetP mem newv));
--  predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() != 0));
--  effect(TEMP_DEF prev, KILL cr);
--
--  ins_cost(VOLATILE_REF_COST);
--
--  format %{ "atomic_xchg_acq  $prev, $newv, [$mem], #@zGetAndSetPAcq" %}
--
--  ins_encode %{
--    __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
--    z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data());
--  %}
--  ins_pipe(pipe_serial);
--%}
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index 86710295444..9d2cc4cf89f 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -1046,52 +1046,6 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
-   return count;
- }
- 
--#ifdef COMPILER2
--int MacroAssembler::push_vp(unsigned int bitset, Register stack) {
--  CompressibleRegion cr(this);
--  int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
--
--  // Scan bitset to accumulate register pairs
--  unsigned char regs[32];
--  int count = 0;
--  for (int reg = 31; reg >= 0; reg--) {
--    if ((1U << 31) & bitset) {
--      regs[count++] = reg;
--    }
--    bitset <<= 1;
--  }
--
--  for (int i = 0; i < count; i++) {
--    sub(stack, stack, vector_size_in_bytes);
--    vs1r_v(as_VectorRegister(regs[i]), stack);
--  }
--
--  return count * vector_size_in_bytes / wordSize;
--}
--
--int MacroAssembler::pop_vp(unsigned int bitset, Register stack) {
--  CompressibleRegion cr(this);
--  int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
--
--  // Scan bitset to accumulate register pairs
--  unsigned char regs[32];
--  int count = 0;
--  for (int reg = 31; reg >= 0; reg--) {
--    if ((1U << 31) & bitset) {
--      regs[count++] = reg;
--    }
--    bitset <<= 1;
--  }
--
--  for (int i = count - 1; i >= 0; i--) {
--    vl1r_v(as_VectorRegister(regs[i]), stack);
--    add(stack, stack, vector_size_in_bytes);
--  }
--
--  return count * vector_size_in_bytes / wordSize;
--}
--#endif // COMPILER2
--
- void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) {
-   CompressibleRegion cr(this);
-   // Push integer registers x7, x10-x17, x28-x31.
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index 23e09475be1..b2f0455a1f1 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -484,12 +484,6 @@ class MacroAssembler: public Assembler {
-   void pop_reg(Register Rd);
-   int  push_reg(unsigned int bitset, Register stack);
-   int  pop_reg(unsigned int bitset, Register stack);
--  void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
--  void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); }
--#ifdef COMPILER2
--  void push_vp(VectorRegSet regs, Register stack) { if (regs.bits()) push_vp(regs.bits(), stack); }
--  void pop_vp(VectorRegSet regs, Register stack) { if (regs.bits()) pop_vp(regs.bits(), stack); }
--#endif // COMPILER2
- 
-   // Push and pop everything that might be clobbered by a native
-   // runtime call except t0 and t1. (They are always
-@@ -783,9 +777,6 @@ class MacroAssembler: public Assembler {
-   int push_fp(unsigned int bitset, Register stack);
-   int pop_fp(unsigned int bitset, Register stack);
- 
--  int push_vp(unsigned int bitset, Register stack);
--  int pop_vp(unsigned int bitset, Register stack);
--
-   // vext
-   void vmnot_m(VectorRegister vd, VectorRegister vs);
-   void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked);
-diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-index b3fdd04db1b..b05edf7172c 100644
---- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-@@ -546,16 +546,6 @@ class StubGenerator: public StubCodeGenerator {
-     // make sure object is 'reasonable'
-     __ beqz(x10, exit); // if obj is NULL it is OK
- 
--#if INCLUDE_ZGC
--    if (UseZGC) {
--      // Check if mask is good.
--      // verifies that ZAddressBadMask & x10 == 0
--      __ ld(c_rarg3, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
--      __ andr(c_rarg2, x10, c_rarg3);
--      __ bnez(c_rarg2, error);
--    }
--#endif
--
-     // Check if the oop is in the right area of memory
-     __ mv(c_rarg3, (intptr_t) Universe::verify_oop_mask());
-     __ andr(c_rarg2, x10, c_rarg3);
-
-From 7772140df96747b42b13007d0827fc21d2a8b926 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Mon, 27 Mar 2023 15:43:39 +0800
-Subject: [PATCH 003/140] Drop the C2 Vector part
-
----
- make/hotspot/gensrc/GensrcAdlc.gmk            |    1 -
- .../cpu/riscv/c2_MacroAssembler_riscv.cpp     |  325 ---
- .../cpu/riscv/c2_MacroAssembler_riscv.hpp     |   52 -
- src/hotspot/cpu/riscv/globals_riscv.hpp       |    8 +-
- .../cpu/riscv/macroAssembler_riscv.cpp        |   22 +-
- .../cpu/riscv/macroAssembler_riscv.hpp        |    4 +-
- src/hotspot/cpu/riscv/matcher_riscv.hpp       |   44 +-
- src/hotspot/cpu/riscv/register_riscv.cpp      |    5 -
- src/hotspot/cpu/riscv/register_riscv.hpp      |    4 +-
- src/hotspot/cpu/riscv/riscv.ad                |  476 +---
- src/hotspot/cpu/riscv/riscv_v.ad              | 2065 -----------------
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp |   61 +-
- src/hotspot/cpu/riscv/stubGenerator_riscv.cpp |  110 -
- src/hotspot/cpu/riscv/vm_version_riscv.cpp    |    4 -
- src/hotspot/cpu/riscv/vmreg_riscv.cpp         |   10 +-
- src/hotspot/cpu/riscv/vmreg_riscv.hpp         |   17 +-
- 16 files changed, 41 insertions(+), 3167 deletions(-)
- delete mode 100644 src/hotspot/cpu/riscv/riscv_v.ad
-
-diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk
-index 67f4c6f0574..51137b99db2 100644
---- a/make/hotspot/gensrc/GensrcAdlc.gmk
-+++ b/make/hotspot/gensrc/GensrcAdlc.gmk
-@@ -152,7 +152,6 @@ ifeq ($(call check-jvm-feature, compiler2), true)
- 
-   ifeq ($(HOTSPOT_TARGET_CPU_ARCH), riscv)
-     AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
--        $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_v.ad \
-         $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_b.ad \
-     )))
-   endif
-diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
-index 27770dc17aa..73f84a724ca 100644
---- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
-@@ -1319,328 +1319,3 @@ void C2_MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRe
- 
-   bind(Done);
- }
--
--void C2_MacroAssembler::element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2,
--                                        VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE) {
--  Label loop;
--  Assembler::SEW sew = islatin ? Assembler::e8 : Assembler::e16;
--
--  bind(loop);
--  vsetvli(tmp1, cnt, sew, Assembler::m2);
--  vlex_v(vr1, a1, sew);
--  vlex_v(vr2, a2, sew);
--  vmsne_vv(vrs, vr1, vr2);
--  vfirst_m(tmp2, vrs);
--  bgez(tmp2, DONE);
--  sub(cnt, cnt, tmp1);
--  if (!islatin) {
--    slli(tmp1, tmp1, 1); // get byte counts
--  }
--  add(a1, a1, tmp1);
--  add(a2, a2, tmp1);
--  bnez(cnt, loop);
--
--  mv(result, true);
--}
--
--void C2_MacroAssembler::string_equals_v(Register a1, Register a2, Register result, Register cnt, int elem_size) {
--  Label DONE;
--  Register tmp1 = t0;
--  Register tmp2 = t1;
--
--  BLOCK_COMMENT("string_equals_v {");
--
--  mv(result, false);
--
--  if (elem_size == 2) {
--    srli(cnt, cnt, 1);
--  }
--
--  element_compare(a1, a2, result, cnt, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE);
--
--  bind(DONE);
--  BLOCK_COMMENT("} string_equals_v");
--}
--
--// used by C2 ClearArray patterns.
--// base: Address of a buffer to be zeroed
--// cnt: Count in HeapWords
--//
--// base, cnt, v0, v1 and t0 are clobbered.
--void C2_MacroAssembler::clear_array_v(Register base, Register cnt) {
--  Label loop;
--
--  // making zero words
--  vsetvli(t0, cnt, Assembler::e64, Assembler::m4);
--  vxor_vv(v0, v0, v0);
--
--  bind(loop);
--  vsetvli(t0, cnt, Assembler::e64, Assembler::m4);
--  vse64_v(v0, base);
--  sub(cnt, cnt, t0);
--  shadd(base, t0, base, t0, 3);
--  bnez(cnt, loop);
--}
--
--void C2_MacroAssembler::arrays_equals_v(Register a1, Register a2, Register result,
--                                        Register cnt1, int elem_size) {
--  Label DONE;
--  Register tmp1 = t0;
--  Register tmp2 = t1;
--  Register cnt2 = tmp2;
--  int length_offset = arrayOopDesc::length_offset_in_bytes();
--  int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
--
--  BLOCK_COMMENT("arrays_equals_v {");
--
--  // if (a1 == a2), return true
--  mv(result, true);
--  beq(a1, a2, DONE);
--
--  mv(result, false);
--  // if a1 == null or a2 == null, return false
--  beqz(a1, DONE);
--  beqz(a2, DONE);
--  // if (a1.length != a2.length), return false
--  lwu(cnt1, Address(a1, length_offset));
--  lwu(cnt2, Address(a2, length_offset));
--  bne(cnt1, cnt2, DONE);
--
--  la(a1, Address(a1, base_offset));
--  la(a2, Address(a2, base_offset));
--
--  element_compare(a1, a2, result, cnt1, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE);
--
--  bind(DONE);
--
--  BLOCK_COMMENT("} arrays_equals_v");
--}
--
--void C2_MacroAssembler::string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2,
--                                         Register result, Register tmp1, Register tmp2, int encForm) {
--  Label DIFFERENCE, DONE, L, loop;
--  bool encLL = encForm == StrIntrinsicNode::LL;
--  bool encLU = encForm == StrIntrinsicNode::LU;
--  bool encUL = encForm == StrIntrinsicNode::UL;
--
--  bool str1_isL = encLL || encLU;
--  bool str2_isL = encLL || encUL;
--
--  int minCharsInWord = encLL ? wordSize : wordSize / 2;
--
--  BLOCK_COMMENT("string_compare {");
--
--  // for Lating strings, 1 byte for 1 character
--  // for UTF16 strings, 2 bytes for 1 character
--  if (!str1_isL)
--    sraiw(cnt1, cnt1, 1);
--  if (!str2_isL)
--    sraiw(cnt2, cnt2, 1);
--
--  // if str1 == str2, return the difference
--  // save the minimum of the string lengths in cnt2.
--  sub(result, cnt1, cnt2);
--  bgt(cnt1, cnt2, L);
--  mv(cnt2, cnt1);
--  bind(L);
--
--  if (str1_isL == str2_isL) { // LL or UU
--    element_compare(str1, str2, zr, cnt2, tmp1, tmp2, v2, v4, v1, encLL, DIFFERENCE);
--    j(DONE);
--  } else { // LU or UL
--    Register strL = encLU ? str1 : str2;
--    Register strU = encLU ? str2 : str1;
--    VectorRegister vstr1 = encLU ? v4 : v0;
--    VectorRegister vstr2 = encLU ? v0 : v4;
--
--    bind(loop);
--    vsetvli(tmp1, cnt2, Assembler::e8, Assembler::m2);
--    vle8_v(vstr1, strL);
--    vsetvli(tmp1, cnt2, Assembler::e16, Assembler::m4);
--    vzext_vf2(vstr2, vstr1);
--    vle16_v(vstr1, strU);
--    vmsne_vv(v0, vstr2, vstr1);
--    vfirst_m(tmp2, v0);
--    bgez(tmp2, DIFFERENCE);
--    sub(cnt2, cnt2, tmp1);
--    add(strL, strL, tmp1);
--    shadd(strU, tmp1, strU, tmp1, 1);
--    bnez(cnt2, loop);
--    j(DONE);
--  }
--  bind(DIFFERENCE);
--  slli(tmp1, tmp2, 1);
--  add(str1, str1, str1_isL ? tmp2 : tmp1);
--  add(str2, str2, str2_isL ? tmp2 : tmp1);
--  str1_isL ? lbu(tmp1, Address(str1, 0)) : lhu(tmp1, Address(str1, 0));
--  str2_isL ? lbu(tmp2, Address(str2, 0)) : lhu(tmp2, Address(str2, 0));
--  sub(result, tmp1, tmp2);
--
--  bind(DONE);
--}
--
--void C2_MacroAssembler::byte_array_inflate_v(Register src, Register dst, Register len, Register tmp) {
--  Label loop;
--  assert_different_registers(src, dst, len, tmp, t0);
--
--  BLOCK_COMMENT("byte_array_inflate_v {");
--  bind(loop);
--  vsetvli(tmp, len, Assembler::e8, Assembler::m2);
--  vle8_v(v2, src);
--  vsetvli(t0, len, Assembler::e16, Assembler::m4);
--  vzext_vf2(v0, v2);
--  vse16_v(v0, dst);
--  sub(len, len, tmp);
--  add(src, src, tmp);
--  shadd(dst, tmp, dst, tmp, 1);
--  bnez(len, loop);
--  BLOCK_COMMENT("} byte_array_inflate_v");
--}
--
--// Compress char[] array to byte[].
--// result: the array length if every element in array can be encoded; 0, otherwise.
--void C2_MacroAssembler::char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp) {
--  Label done;
--  encode_iso_array_v(src, dst, len, result, tmp);
--  beqz(len, done);
--  mv(result, zr);
--  bind(done);
--}
--
--// result: the number of elements had been encoded.
--void C2_MacroAssembler::encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp) {
--  Label loop, DIFFERENCE, DONE;
--
--  BLOCK_COMMENT("encode_iso_array_v {");
--  mv(result, 0);
--
--  bind(loop);
--  mv(tmp, 0xff);
--  vsetvli(t0, len, Assembler::e16, Assembler::m2);
--  vle16_v(v2, src);
--  // if element > 0xff, stop
--  vmsgtu_vx(v1, v2, tmp);
--  vfirst_m(tmp, v1);
--  vmsbf_m(v0, v1);
--  // compress char to byte
--  vsetvli(t0, len, Assembler::e8);
--  vncvt_x_x_w(v1, v2, Assembler::v0_t);
--  vse8_v(v1, dst, Assembler::v0_t);
--
--  bgez(tmp, DIFFERENCE);
--  add(result, result, t0);
--  add(dst, dst, t0);
--  sub(len, len, t0);
--  shadd(src, t0, src, t0, 1);
--  bnez(len, loop);
--  j(DONE);
--
--  bind(DIFFERENCE);
--  add(result, result, tmp);
--
--  bind(DONE);
--  BLOCK_COMMENT("} encode_iso_array_v");
--}
--
--void C2_MacroAssembler::count_positives_v(Register ary, Register len, Register result, Register tmp) {
--  Label LOOP, SET_RESULT, DONE;
--
--  BLOCK_COMMENT("count_positives_v {");
--  mv(result, zr);
--
--  bind(LOOP);
--  vsetvli(t0, len, Assembler::e8, Assembler::m4);
--  vle8_v(v0, ary);
--  vmslt_vx(v0, v0, zr);
--  vfirst_m(tmp, v0);
--  bgez(tmp, SET_RESULT);
--  // if tmp == -1, all bytes are positive
--  add(result, result, t0);
--
--  sub(len, len, t0);
--  add(ary, ary, t0);
--  bnez(len, LOOP);
--  j(DONE);
--
--  // add remaining positive bytes count
--  bind(SET_RESULT);
--  add(result, result, tmp);
--
--  bind(DONE);
--  BLOCK_COMMENT("} count_positives_v");
--}
--
--void C2_MacroAssembler::string_indexof_char_v(Register str1, Register cnt1,
--                                              Register ch, Register result,
--                                              Register tmp1, Register tmp2,
--                                              bool isL) {
--  mv(result, zr);
--
--  Label loop, MATCH, DONE;
--  Assembler::SEW sew = isL ? Assembler::e8 : Assembler::e16;
--  bind(loop);
--  vsetvli(tmp1, cnt1, sew, Assembler::m4);
--  vlex_v(v0, str1, sew);
--  vmseq_vx(v0, v0, ch);
--  vfirst_m(tmp2, v0);
--  bgez(tmp2, MATCH); // if equal, return index
--
--  add(result, result, tmp1);
--  sub(cnt1, cnt1, tmp1);
--  if (!isL) slli(tmp1, tmp1, 1);
--  add(str1, str1, tmp1);
--  bnez(cnt1, loop);
--
--  mv(result, -1);
--  j(DONE);
--
--  bind(MATCH);
--  add(result, result, tmp2);
--
--  bind(DONE);
--}
--
--// Set dst to NaN if any NaN input.
--void C2_MacroAssembler::minmax_FD_v(VectorRegister dst, VectorRegister src1, VectorRegister src2,
--                                    bool is_double, bool is_min) {
--  assert_different_registers(dst, src1, src2);
--
--  vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32);
--
--  is_min ? vfmin_vv(dst, src1, src2)
--         : vfmax_vv(dst, src1, src2);
--
--  vmfne_vv(v0,  src1, src1);
--  vfadd_vv(dst, src1, src1, Assembler::v0_t);
--  vmfne_vv(v0,  src2, src2);
--  vfadd_vv(dst, src2, src2, Assembler::v0_t);
--}
--
--// Set dst to NaN if any NaN input.
--void C2_MacroAssembler::reduce_minmax_FD_v(FloatRegister dst,
--                                           FloatRegister src1, VectorRegister src2,
--                                           VectorRegister tmp1, VectorRegister tmp2,
--                                           bool is_double, bool is_min) {
--  assert_different_registers(src2, tmp1, tmp2);
--
--  Label L_done, L_NaN;
--  vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32);
--  vfmv_s_f(tmp2, src1);
--
--  is_min ? vfredmin_vs(tmp1, src2, tmp2)
--         : vfredmax_vs(tmp1, src2, tmp2);
--
--  fsflags(zr);
--  // Checking NaNs
--  vmflt_vf(tmp2, src2, src1);
--  frflags(t0);
--  bnez(t0, L_NaN);
--  j(L_done);
--
--  bind(L_NaN);
--  vfmv_s_f(tmp2, src1);
--  vfredsum_vs(tmp1, src2, tmp2);
--
--  bind(L_done);
--  vfmv_f_s(dst, tmp1);
--}
-diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
-index c71df4c101b..90b6554af02 100644
---- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
-@@ -28,13 +28,6 @@
- 
- // C2_MacroAssembler contains high-level macros for C2
- 
-- private:
--  void element_compare(Register r1, Register r2,
--                       Register result, Register cnt,
--                       Register tmp1, Register tmp2,
--                       VectorRegister vr1, VectorRegister vr2,
--                       VectorRegister vrs,
--                       bool is_latin, Label& DONE);
-  public:
- 
-   void string_compare(Register str1, Register str2,
-@@ -145,49 +138,4 @@
-                  FloatRegister src1, FloatRegister src2,
-                  bool is_double, bool is_min);
- 
--  // intrinsic methods implemented by rvv instructions
--  void string_equals_v(Register r1, Register r2,
--                       Register result, Register cnt1,
--                       int elem_size);
--
--  void arrays_equals_v(Register r1, Register r2,
--                       Register result, Register cnt1,
--                       int elem_size);
--
--  void string_compare_v(Register str1, Register str2,
--                        Register cnt1, Register cnt2,
--                        Register result,
--                        Register tmp1, Register tmp2,
--                        int encForm);
--
-- void clear_array_v(Register base, Register cnt);
--
-- void byte_array_inflate_v(Register src, Register dst,
--                           Register len, Register tmp);
--
-- void char_array_compress_v(Register src, Register dst,
--                            Register len, Register result,
--                            Register tmp);
--
-- void encode_iso_array_v(Register src, Register dst,
--                         Register len, Register result,
--                         Register tmp);
--
-- void count_positives_v(Register ary, Register len,
--                        Register result, Register tmp);
--
-- void string_indexof_char_v(Register str1, Register cnt1,
--                            Register ch, Register result,
--                            Register tmp1, Register tmp2,
--                            bool isL);
--
-- void minmax_FD_v(VectorRegister dst,
--                  VectorRegister src1, VectorRegister src2,
--                  bool is_double, bool is_min);
--
-- void reduce_minmax_FD_v(FloatRegister dst,
--                         FloatRegister src1, VectorRegister src2,
--                         VectorRegister tmp1, VectorRegister tmp2,
--                         bool is_double, bool is_min);
--
- #endif // CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
-index cbfc0583883..845064d6cbc 100644
---- a/src/hotspot/cpu/riscv/globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
-@@ -90,10 +90,8 @@ define_pd_global(intx, InlineSmallCode,          1000);
-           "Extend fence.i to fence.i + fence.")                                  \
-   product(bool, AvoidUnalignedAccesses, true,                                    \
-           "Avoid generating unaligned memory accesses")                          \
--  product(bool, UseRVV, false, EXPERIMENTAL, "Use RVV instructions")             \
--  product(bool, UseRVB, false, EXPERIMENTAL, "Use RVB instructions")             \
--  product(bool, UseRVC, false, EXPERIMENTAL, "Use RVC instructions")             \
--  product(bool, UseRVVForBigIntegerShiftIntrinsics, true,                        \
--          "Use RVV instructions for left/right shift of BigInteger")
-+  experimental(bool, UseRVV, false, "Use RVV instructions")                      \
-+  experimental(bool, UseRVB, false, "Use RVB instructions")                      \
-+  experimental(bool, UseRVC, false, "Use RVC instructions")
- 
- #endif // CPU_RISCV_GLOBALS_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index 9d2cc4cf89f..8b8d126f6c9 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -1086,7 +1086,7 @@ void MacroAssembler::popa() {
-   pop_reg(0xffffffe2, sp);
- }
- 
--void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) {
-+void MacroAssembler::push_CPU_state() {
-   CompressibleRegion cr(this);
-   // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
-   push_reg(0xffffffe0, sp);
-@@ -1096,28 +1096,10 @@ void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes)
-   for (int i = 0; i < 32; i++) {
-     fsd(as_FloatRegister(i), Address(sp, i * wordSize));
-   }
--
--  // vector registers
--  if (save_vectors) {
--    sub(sp, sp, vector_size_in_bytes * VectorRegisterImpl::number_of_registers);
--    vsetvli(t0, x0, Assembler::e64, Assembler::m8);
--    for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) {
--      add(t0, sp, vector_size_in_bytes * i);
--      vse64_v(as_VectorRegister(i), t0);
--    }
--  }
- }
- 
--void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) {
-+void MacroAssembler::pop_CPU_state() {
-   CompressibleRegion cr(this);
--  // vector registers
--  if (restore_vectors) {
--    vsetvli(t0, x0, Assembler::e64, Assembler::m8);
--    for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) {
--      vle64_v(as_VectorRegister(i), sp);
--      add(sp, sp, vector_size_in_bytes * 8);
--    }
--  }
- 
-   // float registers
-   for (int i = 0; i < 32; i++) {
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index b2f0455a1f1..b43131514c1 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -501,8 +501,8 @@ class MacroAssembler: public Assembler {
- 
-   void pusha();
-   void popa();
--  void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0);
--  void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0);
-+  void push_CPU_state();
-+  void pop_CPU_state();
- 
-   // if heap base register is used - reinit it with the correct value
-   void reinit_heapbase();
-diff --git a/src/hotspot/cpu/riscv/matcher_riscv.hpp b/src/hotspot/cpu/riscv/matcher_riscv.hpp
-index 23a75d20502..4c7fabd7240 100644
---- a/src/hotspot/cpu/riscv/matcher_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/matcher_riscv.hpp
-@@ -31,16 +31,9 @@
-   // false => size gets scaled to BytesPerLong, ok.
-   static const bool init_array_count_is_in_bytes = false;
- 
--  // Whether this platform implements the scalable vector feature
--  static const bool implements_scalable_vector = true;
--
--  static const bool supports_scalable_vector() {
--    return UseRVV;
--  }
--
--  // riscv supports misaligned vectors store/load.
-+  // riscv doesn't support misaligned vectors store/load on JDK11.
-   static constexpr bool misaligned_vectors_ok() {
--    return true;
-+    return false;
-   }
- 
-   // Whether code generation need accurate ConvI2L types.
-@@ -53,9 +46,6 @@
-   // the cpu only look at the lower 5/6 bits anyway?
-   static const bool need_masked_shift_count = false;
- 
--  // No support for generic vector operands.
--  static const bool supports_generic_vector_operands = false;
--
-   static constexpr bool isSimpleConstant64(jlong value) {
-     // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
-     // Probably always true, even if a temp register is required.
-@@ -127,31 +117,6 @@
-   // the relevant 32 bits.
-   static const bool int_in_long = true;
- 
--  // Does the CPU supports vector variable shift instructions?
--  static constexpr bool supports_vector_variable_shifts(void) {
--    return false;
--  }
--
--  // Does the CPU supports vector variable rotate instructions?
--  static constexpr bool supports_vector_variable_rotates(void) {
--    return false;
--  }
--
--  // Does the CPU supports vector constant rotate instructions?
--  static constexpr bool supports_vector_constant_rotates(int shift) {
--    return false;
--  }
--
--  // Does the CPU supports vector unsigned comparison instructions?
--  static const bool supports_vector_comparison_unsigned(int vlen, BasicType bt) {
--    return false;
--  }
--
--  // Some microarchitectures have mask registers used on vectors
--  static const bool has_predicated_vectors(void) {
--    return false;
--  }
--
-   // true means we have fast l2f convers
-   // false means that conversion is done by runtime call
-   static constexpr bool convL2FSupported(void) {
-@@ -161,9 +126,4 @@
-   // Implements a variant of EncodeISOArrayNode that encode ASCII only
-   static const bool supports_encode_ascii_array = false;
- 
--  // Returns pre-selection estimated size of a vector operation.
--  static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
--    return 0;
--  }
--
- #endif // CPU_RISCV_MATCHER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp
-index f8116e9df8c..96cf1996a83 100644
---- a/src/hotspot/cpu/riscv/register_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/register_riscv.cpp
-@@ -37,11 +37,6 @@ const int ConcreteRegisterImpl::max_fpr =
-     ConcreteRegisterImpl::max_gpr +
-     FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register;
- 
--const int ConcreteRegisterImpl::max_vpr =
--    ConcreteRegisterImpl::max_fpr +
--    VectorRegisterImpl::number_of_registers * VectorRegisterImpl::max_slots_per_register;
--
--
- const char* RegisterImpl::name() const {
-   static const char *const names[number_of_registers] = {
-     "zr", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "fp", "x9",
-diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp
-index a9200cac647..d697751f55f 100644
---- a/src/hotspot/cpu/riscv/register_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/register_riscv.hpp
-@@ -307,14 +307,12 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl {
-   // it's optoregs.
- 
-     number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers +
--                           FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers +
--                           VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers)
-+                           FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers)
-   };
- 
-   // added to make it compile
-   static const int max_gpr;
-   static const int max_fpr;
--  static const int max_vpr;
- };
- 
- typedef AbstractRegSet<Register> RegSet;
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 588887e1d96..85593a942e9 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -226,177 +226,6 @@ reg_def F30_H ( SOC, SOC, Op_RegF,  30, f30->as_VMReg()->next() );
- reg_def F31   ( SOC, SOC, Op_RegF,  31, f31->as_VMReg()         );
- reg_def F31_H ( SOC, SOC, Op_RegF,  31, f31->as_VMReg()->next() );
- 
--// ----------------------------
--// Vector Registers
--// ----------------------------
--
--// For RVV vector registers, we simply extend vector register size to 4
--// 'logical' slots. This is nominally 128 bits but it actually covers
--// all possible 'physical' RVV vector register lengths from 128 ~ 1024
--// bits. The 'physical' RVV vector register length is detected during
--// startup, so the register allocator is able to identify the correct
--// number of bytes needed for an RVV spill/unspill.
--
--reg_def V0    ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()           );
--reg_def V0_H  ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()->next()   );
--reg_def V0_J  ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()->next(2)  );
--reg_def V0_K  ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()->next(3)  );
--
--reg_def V1    ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg() 	        );
--reg_def V1_H  ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg()->next()   );
--reg_def V1_J  ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg()->next(2)  );
--reg_def V1_K  ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg()->next(3)  );
--
--reg_def V2    ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()           );
--reg_def V2_H  ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()->next()   );
--reg_def V2_J  ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()->next(2)  );
--reg_def V2_K  ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()->next(3)  );
--
--reg_def V3    ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()           );
--reg_def V3_H  ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()->next()   );
--reg_def V3_J  ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()->next(2)  );
--reg_def V3_K  ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()->next(3)  );
--
--reg_def V4    ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()           );
--reg_def V4_H  ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()->next()   );
--reg_def V4_J  ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()->next(2)  );
--reg_def V4_K  ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()->next(3)  );
--
--reg_def V5    ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg() 	        );
--reg_def V5_H  ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg()->next()   );
--reg_def V5_J  ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg()->next(2)  );
--reg_def V5_K  ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg()->next(3)  );
--
--reg_def V6    ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()           );
--reg_def V6_H  ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()->next()   );
--reg_def V6_J  ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()->next(2)  );
--reg_def V6_K  ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()->next(3)  );
--
--reg_def V7    ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg() 	        );
--reg_def V7_H  ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg()->next()   );
--reg_def V7_J  ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg()->next(2)  );
--reg_def V7_K  ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg()->next(3)  );
--
--reg_def V8    ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()           );
--reg_def V8_H  ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()->next()   );
--reg_def V8_J  ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()->next(2)  );
--reg_def V8_K  ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()->next(3)  );
--
--reg_def V9    ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()           );
--reg_def V9_H  ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()->next()   );
--reg_def V9_J  ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()->next(2)  );
--reg_def V9_K  ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()->next(3)  );
--
--reg_def V10   ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()          );
--reg_def V10_H ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next()  );
--reg_def V10_J ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(2) );
--reg_def V10_K ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(3) );
--
--reg_def V11   ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()          );
--reg_def V11_H ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next()  );
--reg_def V11_J ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(2) );
--reg_def V11_K ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(3) );
--
--reg_def V12   ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()          );
--reg_def V12_H ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next()  );
--reg_def V12_J ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(2) );
--reg_def V12_K ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(3) );
--
--reg_def V13   ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()          );
--reg_def V13_H ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next()  );
--reg_def V13_J ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(2) );
--reg_def V13_K ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(3) );
--
--reg_def V14   ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()          );
--reg_def V14_H ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next()  );
--reg_def V14_J ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(2) );
--reg_def V14_K ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(3) );
--
--reg_def V15   ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()          );
--reg_def V15_H ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next()  );
--reg_def V15_J ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(2) );
--reg_def V15_K ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(3) );
--
--reg_def V16   ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()          );
--reg_def V16_H ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next()  );
--reg_def V16_J ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(2) );
--reg_def V16_K ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(3) );
--
--reg_def V17   ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()          );
--reg_def V17_H ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next()  );
--reg_def V17_J ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(2) );
--reg_def V17_K ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(3) );
--
--reg_def V18   ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()          );
--reg_def V18_H ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next()  );
--reg_def V18_J ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(2) );
--reg_def V18_K ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(3) );
--
--reg_def V19   ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()          );
--reg_def V19_H ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next()  );
--reg_def V19_J ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(2) );
--reg_def V19_K ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(3) );
--
--reg_def V20   ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()          );
--reg_def V20_H ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next()  );
--reg_def V20_J ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(2) );
--reg_def V20_K ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(3) );
--
--reg_def V21   ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()          );
--reg_def V21_H ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next()  );
--reg_def V21_J ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(2) );
--reg_def V21_K ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(3) );
--
--reg_def V22   ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()          );
--reg_def V22_H ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next()  );
--reg_def V22_J ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(2) );
--reg_def V22_K ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(3) );
--
--reg_def V23   ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()          );
--reg_def V23_H ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next()  );
--reg_def V23_J ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(2) );
--reg_def V23_K ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(3) );
--
--reg_def V24   ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()          );
--reg_def V24_H ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next()  );
--reg_def V24_J ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(2) );
--reg_def V24_K ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(3) );
--
--reg_def V25   ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()          );
--reg_def V25_H ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next()  );
--reg_def V25_J ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(2) );
--reg_def V25_K ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(3) );
--
--reg_def V26   ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()          );
--reg_def V26_H ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next()  );
--reg_def V26_J ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(2) );
--reg_def V26_K ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(3) );
--
--reg_def V27   ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()          );
--reg_def V27_H ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next()  );
--reg_def V27_J ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(2) );
--reg_def V27_K ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(3) );
--
--reg_def V28   ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()          );
--reg_def V28_H ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next()  );
--reg_def V28_J ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(2) );
--reg_def V28_K ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(3) );
--
--reg_def V29   ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()          );
--reg_def V29_H ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next()  );
--reg_def V29_J ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(2) );
--reg_def V29_K ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(3) );
--
--reg_def V30   ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()          );
--reg_def V30_H ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next()  );
--reg_def V30_J ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(2) );
--reg_def V30_K ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(3) );
--
--reg_def V31   ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()          );
--reg_def V31_H ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next()  );
--reg_def V31_J ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(2) );
--reg_def V31_K ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(3) );
--
- // ----------------------------
- // Special Registers
- // ----------------------------
-@@ -495,42 +324,7 @@ alloc_class chunk1(
-     F27, F27_H,
- );
- 
--alloc_class chunk2(
--    V0, V0_H, V0_J, V0_K,
--    V1, V1_H, V1_J, V1_K,
--    V2, V2_H, V2_J, V2_K,
--    V3, V3_H, V3_J, V3_K,
--    V4, V4_H, V4_J, V4_K,
--    V5, V5_H, V5_J, V5_K,
--    V6, V6_H, V6_J, V6_K,
--    V7, V7_H, V7_J, V7_K,
--    V8, V8_H, V8_J, V8_K,
--    V9, V9_H, V9_J, V9_K,
--    V10, V10_H, V10_J, V10_K,
--    V11, V11_H, V11_J, V11_K,
--    V12, V12_H, V12_J, V12_K,
--    V13, V13_H, V13_J, V13_K,
--    V14, V14_H, V14_J, V14_K,
--    V15, V15_H, V15_J, V15_K,
--    V16, V16_H, V16_J, V16_K,
--    V17, V17_H, V17_J, V17_K,
--    V18, V18_H, V18_J, V18_K,
--    V19, V19_H, V19_J, V19_K,
--    V20, V20_H, V20_J, V20_K,
--    V21, V21_H, V21_J, V21_K,
--    V22, V22_H, V22_J, V22_K,
--    V23, V23_H, V23_J, V23_K,
--    V24, V24_H, V24_J, V24_K,
--    V25, V25_H, V25_J, V25_K,
--    V26, V26_H, V26_J, V26_K,
--    V27, V27_H, V27_J, V27_K,
--    V28, V28_H, V28_J, V28_K,
--    V29, V29_H, V29_J, V29_K,
--    V30, V30_H, V30_J, V30_K,
--    V31, V31_H, V31_J, V31_K,
--);
--
--alloc_class chunk3(RFLAGS);
-+alloc_class chunk2(RFLAGS);
- 
- //----------Architecture Description Register Classes--------------------------
- // Several register classes are automatically defined based upon information in
-@@ -826,41 +620,6 @@ reg_class double_reg(
-     F31, F31_H
- );
- 
--// Class for all RVV vector registers
--reg_class vectora_reg(
--    V1, V1_H, V1_J, V1_K,
--    V2, V2_H, V2_J, V2_K,
--    V3, V3_H, V3_J, V3_K,
--    V4, V4_H, V4_J, V4_K,
--    V5, V5_H, V5_J, V5_K,
--    V6, V6_H, V6_J, V6_K,
--    V7, V7_H, V7_J, V7_K,
--    V8, V8_H, V8_J, V8_K,
--    V9, V9_H, V9_J, V9_K,
--    V10, V10_H, V10_J, V10_K,
--    V11, V11_H, V11_J, V11_K,
--    V12, V12_H, V12_J, V12_K,
--    V13, V13_H, V13_J, V13_K,
--    V14, V14_H, V14_J, V14_K,
--    V15, V15_H, V15_J, V15_K,
--    V16, V16_H, V16_J, V16_K,
--    V17, V17_H, V17_J, V17_K,
--    V18, V18_H, V18_J, V18_K,
--    V19, V19_H, V19_J, V19_K,
--    V20, V20_H, V20_J, V20_K,
--    V21, V21_H, V21_J, V21_K,
--    V22, V22_H, V22_J, V22_K,
--    V23, V23_H, V23_J, V23_K,
--    V24, V24_H, V24_J, V24_K,
--    V25, V25_H, V25_J, V25_K,
--    V26, V26_H, V26_J, V26_K,
--    V27, V27_H, V27_J, V27_K,
--    V28, V28_H, V28_J, V28_K,
--    V29, V29_H, V29_J, V29_K,
--    V30, V30_H, V30_J, V30_K,
--    V31, V31_H, V31_J, V31_K
--);
--
- // Class for 64 bit register f0
- reg_class f0_reg(
-     F0, F0_H
-@@ -881,31 +640,6 @@ reg_class f3_reg(
-     F3, F3_H
- );
- 
--// class for vector register v1
--reg_class v1_reg(
--    V1, V1_H, V1_J, V1_K
--);
--
--// class for vector register v2
--reg_class v2_reg(
--    V2, V2_H, V2_J, V2_K
--);
--
--// class for vector register v3
--reg_class v3_reg(
--    V3, V3_H, V3_J, V3_K
--);
--
--// class for vector register v4
--reg_class v4_reg(
--    V4, V4_H, V4_J, V4_K
--);
--
--// class for vector register v5
--reg_class v5_reg(
--    V5, V5_H, V5_J, V5_K
--);
--
- // class for condition codes
- reg_class reg_flags(RFLAGS);
- %}
-@@ -1447,7 +1181,7 @@ const Pipeline * MachEpilogNode::pipeline() const {
- 
- // Figure out which register class each belongs in: rc_int, rc_float or
- // rc_stack.
--enum RC { rc_bad, rc_int, rc_float, rc_vector, rc_stack };
-+enum RC { rc_bad, rc_int, rc_float, rc_stack };
- 
- static enum RC rc_class(OptoReg::Name reg) {
- 
-@@ -1468,13 +1202,7 @@ static enum RC rc_class(OptoReg::Name reg) {
-     return rc_float;
-   }
- 
--  // we have 32 vector register * 4 halves
--  int slots_of_vector_registers = VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers;
--  if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_vector_registers) {
--    return rc_vector;
--  }
--
--  // Between vector regs & stack is the flags regs.
-+  // Between float regs & stack is the flags regs.
-   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
- 
-   return rc_stack;
-@@ -1512,30 +1240,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
-   int src_offset = ra_->reg2offset(src_lo);
-   int dst_offset = ra_->reg2offset(dst_lo);
- 
--  if (bottom_type()->isa_vect() != NULL) {
--    uint ireg = ideal_reg();
--    if (ireg == Op_VecA && cbuf) {
--      C2_MacroAssembler _masm(cbuf);
--      Assembler::CompressibleRegion cr(&_masm);
--      int vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
--      if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
--        // stack to stack
--        __ spill_copy_vector_stack_to_stack(src_offset, dst_offset,
--                                            vector_reg_size_in_bytes);
--      } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_stack) {
--        // vpr to stack
--        __ spill(as_VectorRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo));
--      } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vector) {
--        // stack to vpr
--        __ unspill(as_VectorRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo));
--      } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_vector) {
--        // vpr to vpr
--        __ vmv1r_v(as_VectorRegister(Matcher::_regEncode[dst_lo]), as_VectorRegister(Matcher::_regEncode[src_lo]));
--      } else {
--        ShouldNotReachHere();
--      }
--    }
--  } else if (cbuf != NULL) {
-+  if (cbuf != NULL) {
-     C2_MacroAssembler _masm(cbuf);
-     Assembler::CompressibleRegion cr(&_masm);
-     switch (src_lo_rc) {
-@@ -1619,17 +1324,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
-     } else {
-       st->print("%s", Matcher::regName[dst_lo]);
-     }
--    if (bottom_type()->isa_vect() != NULL) {
--      int vsize = 0;
--      if (ideal_reg() == Op_VecA) {
--        vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8;
--      } else {
--        ShouldNotReachHere();
--      }
--      st->print("\t# vector spill size = %d", vsize);
--    } else {
--      st->print("\t# spill size = %d", is64 ? 64 : 32);
--    }
-+    st->print("\t# spill size = %d", is64 ? 64 : 32);
-   }
- 
-   return 0;
-@@ -1796,14 +1491,6 @@ const bool Matcher::match_rule_supported(int opcode) {
-       }
-       break;
- 
--    case Op_StrCompressedCopy: // fall through
--    case Op_StrInflatedCopy:   // fall through
--    case Op_CountPositives:
--      return UseRVV;
--
--    case Op_EncodeISOArray:
--      return UseRVV && SpecialEncodeISOArray;
--
-     case Op_PopCountI:
-     case Op_PopCountL:
-       return UsePopCountInstruction;
-@@ -1821,37 +1508,15 @@ const bool Matcher::match_rule_supported(int opcode) {
- }
- 
- // Identify extra cases that we might want to provide match rules for vector nodes and
--// other intrinsics guarded with vector length (vlen) and element type (bt).
--const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
--  if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
--    return false;
--  }
--
--  return op_vec_supported(opcode);
--}
--
--const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
-+// other intrinsics guarded with vector length (vlen).
-+const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
-   return false;
- }
- 
--const RegMask* Matcher::predicate_reg_mask(void) {
--  return NULL;
--}
--
--const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) {
--  return NULL;
--}
--
--// Vector calling convention not yet implemented.
--const bool Matcher::supports_vector_calling_convention(void) {
-+const bool Matcher::has_predicated_vectors(void) {
-   return false;
- }
- 
--OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
--  Unimplemented();
--  return OptoRegPair(0, 0);
--}
--
- // Is this branch offset short enough that a short branch can be used?
- //
- // NOTE: If the platform does not provide any short branch variants, then
-@@ -1877,11 +1542,6 @@ bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
- 
- // Vector width in bytes.
- const int Matcher::vector_width_in_bytes(BasicType bt) {
--  if (UseRVV) {
--    // The MaxVectorSize should have been set by detecting RVV max vector register size when check UseRVV.
--    // MaxVectorSize == VM_Version::_initial_vector_length
--    return MaxVectorSize;
--  }
-   return 0;
- }
- 
-@@ -1895,34 +1555,10 @@ const int Matcher::min_vector_size(const BasicType bt) {
- 
- // Vector ideal reg.
- const uint Matcher::vector_ideal_reg(int len) {
--  assert(MaxVectorSize >= len, "");
--  if (UseRVV) {
--    return Op_VecA;
--  }
--
-   ShouldNotReachHere();
-   return 0;
- }
- 
--const int Matcher::scalable_vector_reg_size(const BasicType bt) {
--  return Matcher::max_vector_size(bt);
--}
--
--MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) {
--  ShouldNotReachHere(); // generic vector operands not supported
--  return NULL;
--}
--
--bool Matcher::is_reg2reg_move(MachNode* m) {
--  ShouldNotReachHere(); // generic vector operands not supported
--  return false;
--}
--
--bool Matcher::is_generic_vector(MachOper* opnd) {
--  ShouldNotReachHere(); // generic vector operands not supported
--  return false;
--}
--
- // Return whether or not this register is ever used as an argument.
- // This function is used on startup to build the trampoline stubs in
- // generateOptoStub.  Registers not mentioned will be killed by the VM
-@@ -3384,67 +3020,6 @@ operand fRegD()
-   interface(REG_INTER);
- %}
- 
--// Generic vector class. This will be used for
--// all vector operands.
--operand vReg()
--%{
--  constraint(ALLOC_IN_RC(vectora_reg));
--  match(VecA);
--  op_cost(0);
--  format %{ %}
--  interface(REG_INTER);
--%}
--
--operand vReg_V1()
--%{
--  constraint(ALLOC_IN_RC(v1_reg));
--  match(VecA);
--  match(vReg);
--  op_cost(0);
--  format %{ %}
--  interface(REG_INTER);
--%}
--
--operand vReg_V2()
--%{
--  constraint(ALLOC_IN_RC(v2_reg));
--  match(VecA);
--  match(vReg);
--  op_cost(0);
--  format %{ %}
--  interface(REG_INTER);
--%}
--
--operand vReg_V3()
--%{
--  constraint(ALLOC_IN_RC(v3_reg));
--  match(VecA);
--  match(vReg);
--  op_cost(0);
--  format %{ %}
--  interface(REG_INTER);
--%}
--
--operand vReg_V4()
--%{
--  constraint(ALLOC_IN_RC(v4_reg));
--  match(VecA);
--  match(vReg);
--  op_cost(0);
--  format %{ %}
--  interface(REG_INTER);
--%}
--
--operand vReg_V5()
--%{
--  constraint(ALLOC_IN_RC(v5_reg));
--  match(VecA);
--  match(vReg);
--  op_cost(0);
--  format %{ %}
--  interface(REG_INTER);
--%}
--
- // Java Thread Register
- operand javaThread_RegP(iRegP reg)
- %{
-@@ -7939,17 +7514,6 @@ instruct castDD(fRegD dst)
-   ins_pipe(pipe_class_empty);
- %}
- 
--instruct castVV(vReg dst)
--%{
--  match(Set dst (CastVV dst));
--
--  size(0);
--  format %{ "# castVV of $dst" %}
--  ins_encode(/* empty encoding */);
--  ins_cost(0);
--  ins_pipe(pipe_class_empty);
--%}
--
- // ============================================================================
- // Convert Instructions
- 
-@@ -10076,7 +9640,7 @@ instruct partialSubtypeCheckVsZero(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 su
- instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-                          iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
- %{
--  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
-+  predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
-   match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-   effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
- 
-@@ -10094,7 +9658,7 @@ instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R
- instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-                          iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
- %{
--  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
-+  predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
-   match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-   effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
- 
-@@ -10111,7 +9675,7 @@ instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R
- instruct string_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-                           iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
- %{
--  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
-+  predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
-   match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-   effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
- 
-@@ -10129,7 +9693,7 @@ instruct string_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_
-                           iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3,
-                           rFlagsReg cr)
- %{
--  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
-+  predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
-   match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-   effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
- 
-@@ -10275,7 +9839,7 @@ instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
-                               iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
- %{
-   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
--  predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
-+  predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U);
-   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
-          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
- 
-@@ -10294,7 +9858,7 @@ instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
-                               iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
- %{
-   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
--  predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
-+  predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L);
-   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
-          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
- 
-@@ -10310,7 +9874,6 @@ instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
- // clearing of an array
- instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy)
- %{
--  predicate(!UseRVV);
-   match(Set dummy (ClearArray cnt base));
-   effect(USE_KILL cnt, USE_KILL base);
- 
-@@ -10330,8 +9893,7 @@ instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy)
- 
- instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr)
- %{
--  predicate(!UseRVV && (uint64_t)n->in(2)->get_long()
--            < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
-+  predicate((uint64_t)n->in(2)->get_long() < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
-   match(Set dummy (ClearArray cnt base));
-   effect(USE_KILL base, KILL cr);
- 
-@@ -10348,7 +9910,7 @@ instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg
- instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
-                         iRegI_R10 result, rFlagsReg cr)
- %{
--  predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
-+  predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
-   match(Set result (StrEquals (Binary str1 str2) cnt));
-   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
- 
-@@ -10364,7 +9926,7 @@ instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
- instruct string_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
-                         iRegI_R10 result, rFlagsReg cr)
- %{
--  predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
-+  predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
-   match(Set result (StrEquals (Binary str1 str2) cnt));
-   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
- 
-@@ -10381,7 +9943,7 @@ instruct array_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
-                        iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3,
-                        iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr)
- %{
--  predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
-+  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
-   match(Set result (AryEq ary1 ary2));
-   effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr);
- 
-@@ -10398,7 +9960,7 @@ instruct array_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
-                        iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3,
-                        iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr)
- %{
--  predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
-+  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
-   match(Set result (AryEq ary1 ary2));
-   effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr);
- 
-diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad
-deleted file mode 100644
-index 3828e096b21..00000000000
---- a/src/hotspot/cpu/riscv/riscv_v.ad
-+++ /dev/null
-@@ -1,2065 +0,0 @@
--//
--// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
--// Copyright (c) 2020, Arm Limited. All rights reserved.
--// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
--// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
--//
--// This code is free software; you can redistribute it and/or modify it
--// under the terms of the GNU General Public License version 2 only, as
--// published by the Free Software Foundation.
--//
--// This code is distributed in the hope that it will be useful, but WITHOUT
--// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
--// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
--// version 2 for more details (a copy is included in the LICENSE file that
--// accompanied this code).
--//
--// You should have received a copy of the GNU General Public License version
--// 2 along with this work; if not, write to the Free Software Foundation,
--// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
--//
--// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
--// or visit www.oracle.com if you need additional information or have any
--// questions.
--//
--//
--
--// RISCV Vector Extension Architecture Description File
--
--opclass vmemA(indirect);
--
--source_hpp %{
--  bool op_vec_supported(int opcode);
--%}
--
--source %{
--
--  static void loadStore(C2_MacroAssembler masm, bool is_store,
--                        VectorRegister reg, BasicType bt, Register base) {
--    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
--    masm.vsetvli(t0, x0, sew);
--    if (is_store) {
--      masm.vsex_v(reg, base, sew);
--    } else {
--      masm.vlex_v(reg, base, sew);
--    }
--  }
--
--  bool op_vec_supported(int opcode) {
--    switch (opcode) {
--      // No multiply reduction instructions
--      case Op_MulReductionVD:
--      case Op_MulReductionVF:
--      case Op_MulReductionVI:
--      case Op_MulReductionVL:
--      // Others
--      case Op_Extract:
--      case Op_ExtractB:
--      case Op_ExtractC:
--      case Op_ExtractD:
--      case Op_ExtractF:
--      case Op_ExtractI:
--      case Op_ExtractL:
--      case Op_ExtractS:
--      case Op_ExtractUB:
--      // Vector API specific
--      case Op_AndReductionV:
--      case Op_OrReductionV:
--      case Op_XorReductionV:
--      case Op_LoadVectorGather:
--      case Op_StoreVectorScatter:
--      case Op_VectorBlend:
--      case Op_VectorCast:
--      case Op_VectorCastB2X:
--      case Op_VectorCastD2X:
--      case Op_VectorCastF2X:
--      case Op_VectorCastI2X:
--      case Op_VectorCastL2X:
--      case Op_VectorCastS2X:
--      case Op_VectorInsert:
--      case Op_VectorLoadConst:
--      case Op_VectorLoadMask:
--      case Op_VectorLoadShuffle:
--      case Op_VectorMaskCmp:
--      case Op_VectorRearrange:
--      case Op_VectorReinterpret:
--      case Op_VectorStoreMask:
--      case Op_VectorTest:
--        return false;
--      default:
--        return UseRVV;
--    }
--  }
--
--%}
--
--definitions %{
--  int_def VEC_COST             (200, 200);
--%}
--
--// All VEC instructions
--
--// vector load/store
--instruct loadV(vReg dst, vmemA mem) %{
--  match(Set dst (LoadVector mem));
--  ins_cost(VEC_COST);
--  format %{ "vle $dst, $mem\t#@loadV" %}
--  ins_encode %{
--    VectorRegister dst_reg = as_VectorRegister($dst$$reg);
--    loadStore(C2_MacroAssembler(&cbuf), false, dst_reg,
--              Matcher::vector_element_basic_type(this), as_Register($mem$$base));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct storeV(vReg src, vmemA mem) %{
--  match(Set mem (StoreVector mem src));
--  ins_cost(VEC_COST);
--  format %{ "vse $src, $mem\t#@storeV" %}
--  ins_encode %{
--    VectorRegister src_reg = as_VectorRegister($src$$reg);
--    loadStore(C2_MacroAssembler(&cbuf), true, src_reg,
--              Matcher::vector_element_basic_type(this, $src), as_Register($mem$$base));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector abs
--
--instruct vabsB(vReg dst, vReg src, vReg tmp) %{
--  match(Set dst (AbsVB src));
--  ins_cost(VEC_COST);
--  effect(TEMP tmp);
--  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsB\n\t"
--            "vmax.vv $dst, $tmp, $src" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
--    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vabsS(vReg dst, vReg src, vReg tmp) %{
--  match(Set dst (AbsVS src));
--  ins_cost(VEC_COST);
--  effect(TEMP tmp);
--  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsS\n\t"
--            "vmax.vv $dst, $tmp, $src" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
--    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vabsI(vReg dst, vReg src, vReg tmp) %{
--  match(Set dst (AbsVI src));
--  ins_cost(VEC_COST);
--  effect(TEMP tmp);
--  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsI\n\t"
--            "vmax.vv $dst, $tmp, $src" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
--    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vabsL(vReg dst, vReg src, vReg tmp) %{
--  match(Set dst (AbsVL src));
--  ins_cost(VEC_COST);
--  effect(TEMP tmp);
--  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsL\n\t"
--            "vmax.vv $dst, $tmp, $src" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
--    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vabsF(vReg dst, vReg src) %{
--  match(Set dst (AbsVF src));
--  ins_cost(VEC_COST);
--  format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsF" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vabsD(vReg dst, vReg src) %{
--  match(Set dst (AbsVD src));
--  ins_cost(VEC_COST);
--  format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsD" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector add
--
--instruct vaddB(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (AddVB src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddB" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    __ vadd_vv(as_VectorRegister($dst$$reg),
--               as_VectorRegister($src1$$reg),
--               as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vaddS(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (AddVS src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddS" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    __ vadd_vv(as_VectorRegister($dst$$reg),
--               as_VectorRegister($src1$$reg),
--               as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vaddI(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (AddVI src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddI" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vadd_vv(as_VectorRegister($dst$$reg),
--               as_VectorRegister($src1$$reg),
--               as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vaddL(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (AddVL src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddL" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vadd_vv(as_VectorRegister($dst$$reg),
--               as_VectorRegister($src1$$reg),
--               as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vaddF(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (AddVF src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddF" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vfadd_vv(as_VectorRegister($dst$$reg),
--                as_VectorRegister($src1$$reg),
--                as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vaddD(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (AddVD src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddD" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vfadd_vv(as_VectorRegister($dst$$reg),
--                as_VectorRegister($src1$$reg),
--                as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector and
--
--instruct vand(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (AndV src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vand.vv  $dst, $src1, $src2\t#@vand" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vand_vv(as_VectorRegister($dst$$reg),
--               as_VectorRegister($src1$$reg),
--               as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector or
--
--instruct vor(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (OrV src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vor.vv  $dst, $src1, $src2\t#@vor" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vor_vv(as_VectorRegister($dst$$reg),
--              as_VectorRegister($src1$$reg),
--              as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector xor
--
--instruct vxor(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (XorV src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vxor.vv  $dst, $src1, $src2\t#@vxor" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vxor_vv(as_VectorRegister($dst$$reg),
--               as_VectorRegister($src1$$reg),
--               as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector float div
--
--instruct vdivF(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (DivVF src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vfdiv.vv  $dst, $src1, $src2\t#@vdivF" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vfdiv_vv(as_VectorRegister($dst$$reg),
--                as_VectorRegister($src1$$reg),
--                as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vdivD(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (DivVD src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vfdiv.vv  $dst, $src1, $src2\t#@vdivD" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vfdiv_vv(as_VectorRegister($dst$$reg),
--                as_VectorRegister($src1$$reg),
--                as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector integer max/min
--
--instruct vmax(vReg dst, vReg src1, vReg src2) %{
--  predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT &&
--            n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE);
--  match(Set dst (MaxV src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vmax.vv $dst, $src1, $src2\t#@vmax" %}
--  ins_encode %{
--    BasicType bt = Matcher::vector_element_basic_type(this);
--    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
--    __ vsetvli(t0, x0, sew);
--    __ vmax_vv(as_VectorRegister($dst$$reg),
--               as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vmin(vReg dst, vReg src1, vReg src2) %{
--  predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT &&
--            n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE);
--  match(Set dst (MinV src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vmin.vv $dst, $src1, $src2\t#@vmin" %}
--  ins_encode %{
--    BasicType bt = Matcher::vector_element_basic_type(this);
--    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
--    __ vsetvli(t0, x0, sew);
--    __ vmin_vv(as_VectorRegister($dst$$reg),
--               as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector float-point max/min
--
--instruct vmaxF(vReg dst, vReg src1, vReg src2) %{
--  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
--  match(Set dst (MaxV src1 src2));
--  effect(TEMP_DEF dst);
--  ins_cost(VEC_COST);
--  format %{ "vmaxF $dst, $src1, $src2\t#@vmaxF" %}
--  ins_encode %{
--    __ minmax_FD_v(as_VectorRegister($dst$$reg),
--                   as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
--                   false /* is_double */, false /* is_min */);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vmaxD(vReg dst, vReg src1, vReg src2) %{
--  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
--  match(Set dst (MaxV src1 src2));
--  effect(TEMP_DEF dst);
--  ins_cost(VEC_COST);
--  format %{ "vmaxD $dst, $src1, $src2\t#@vmaxD" %}
--  ins_encode %{
--    __ minmax_FD_v(as_VectorRegister($dst$$reg),
--                   as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
--                   true /* is_double */, false /* is_min */);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vminF(vReg dst, vReg src1, vReg src2) %{
--  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
--  match(Set dst (MinV src1 src2));
--  effect(TEMP_DEF dst);
--  ins_cost(VEC_COST);
--  format %{ "vminF $dst, $src1, $src2\t#@vminF" %}
--  ins_encode %{
--    __ minmax_FD_v(as_VectorRegister($dst$$reg),
--                   as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
--                   false /* is_double */, true /* is_min */);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vminD(vReg dst, vReg src1, vReg src2) %{
--  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
--  match(Set dst (MinV src1 src2));
--  effect(TEMP_DEF dst);
--  ins_cost(VEC_COST);
--  format %{ "vminD $dst, $src1, $src2\t#@vminD" %}
--  ins_encode %{
--    __ minmax_FD_v(as_VectorRegister($dst$$reg),
--                   as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
--                   true /* is_double */, true /* is_min */);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector fmla
--
--// dst_src1 = dst_src1 + src2 * src3
--instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{
--  predicate(UseFMA);
--  match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3)));
--  ins_cost(VEC_COST);
--  format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaF" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vfmacc_vv(as_VectorRegister($dst_src1$$reg),
--                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// dst_src1 = dst_src1 + src2 * src3
--instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{
--  predicate(UseFMA);
--  match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3)));
--  ins_cost(VEC_COST);
--  format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaD" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vfmacc_vv(as_VectorRegister($dst_src1$$reg),
--                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector fmls
--
--// dst_src1 = dst_src1 + -src2 * src3
--// dst_src1 = dst_src1 + src2 * -src3
--instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{
--  predicate(UseFMA);
--  match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3)));
--  match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3))));
--  ins_cost(VEC_COST);
--  format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsF" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg),
--                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// dst_src1 = dst_src1 + -src2 * src3
--// dst_src1 = dst_src1 + src2 * -src3
--instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{
--  predicate(UseFMA);
--  match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3)));
--  match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3))));
--  ins_cost(VEC_COST);
--  format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsD" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg),
--                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector fnmla
--
--// dst_src1 = -dst_src1 + -src2 * src3
--// dst_src1 = -dst_src1 + src2 * -src3
--instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{
--  predicate(UseFMA);
--  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3)));
--  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3))));
--  ins_cost(VEC_COST);
--  format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaF" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg),
--                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// dst_src1 = -dst_src1 + -src2 * src3
--// dst_src1 = -dst_src1 + src2 * -src3
--instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{
--  predicate(UseFMA);
--  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3)));
--  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3))));
--  ins_cost(VEC_COST);
--  format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaD" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg),
--                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector fnmls
--
--// dst_src1 = -dst_src1 + src2 * src3
--instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{
--  predicate(UseFMA);
--  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3)));
--  ins_cost(VEC_COST);
--  format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsF" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vfmsac_vv(as_VectorRegister($dst_src1$$reg),
--                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// dst_src1 = -dst_src1 + src2 * src3
--instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{
--  predicate(UseFMA);
--  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3)));
--  ins_cost(VEC_COST);
--  format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsD" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vfmsac_vv(as_VectorRegister($dst_src1$$reg),
--                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector mla
--
--// dst_src1 = dst_src1 + src2 * src3
--instruct vmlaB(vReg dst_src1, vReg src2, vReg src3) %{
--  match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3)));
--  ins_cost(VEC_COST);
--  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaB" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
--                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// dst_src1 = dst_src1 + src2 * src3
--instruct vmlaS(vReg dst_src1, vReg src2, vReg src3) %{
--  match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3)));
--  ins_cost(VEC_COST);
--  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaS" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
--                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// dst_src1 = dst_src1 + src2 * src3
--instruct vmlaI(vReg dst_src1, vReg src2, vReg src3) %{
--  match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3)));
--  ins_cost(VEC_COST);
--  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaI" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
--                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// dst_src1 = dst_src1 + src2 * src3
--instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) %{
--  match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3)));
--  ins_cost(VEC_COST);
--  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaL" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
--                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector mls
--
--// dst_src1 = dst_src1 - src2 * src3
--instruct vmlsB(vReg dst_src1, vReg src2, vReg src3) %{
--  match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3)));
--  ins_cost(VEC_COST);
--  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsB" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
--                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// dst_src1 = dst_src1 - src2 * src3
--instruct vmlsS(vReg dst_src1, vReg src2, vReg src3) %{
--  match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3)));
--  ins_cost(VEC_COST);
--  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsS" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
--                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// dst_src1 = dst_src1 - src2 * src3
--instruct vmlsI(vReg dst_src1, vReg src2, vReg src3) %{
--  match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3)));
--  ins_cost(VEC_COST);
--  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsI" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
--                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// dst_src1 = dst_src1 - src2 * src3
--instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) %{
--  match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3)));
--  ins_cost(VEC_COST);
--  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsL" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
--                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector mul
--
--instruct vmulB(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (MulVB src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulB" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
--               as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vmulS(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (MulVS src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulS" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
--               as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vmulI(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (MulVI src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulI" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
--               as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vmulL(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (MulVL src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulL" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
--               as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vmulF(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (MulVF src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulF" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
--                as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vmulD(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (MulVD src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulD" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
--                as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector fneg
--
--instruct vnegF(vReg dst, vReg src) %{
--  match(Set dst (NegVF src));
--  ins_cost(VEC_COST);
--  format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegF" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vnegD(vReg dst, vReg src) %{
--  match(Set dst (NegVD src));
--  ins_cost(VEC_COST);
--  format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegD" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// popcount vector
--
--instruct vpopcountI(iRegINoSp dst, vReg src) %{
--  match(Set dst (PopCountVI src));
--  format %{ "vpopc.m $dst, $src\t#@vpopcountI" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vpopc_m(as_Register($dst$$reg), as_VectorRegister($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector add reduction
--
--instruct reduce_addB(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
--  match(Set dst (AddReductionVI src1 src2));
--  effect(TEMP tmp);
--  ins_cost(VEC_COST);
--  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addB\n\t"
--            "vredsum.vs $tmp, $src2, $tmp\n\t"
--            "vmv.x.s  $dst, $tmp" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
--    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
--                  as_VectorRegister($tmp$$reg));
--    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct reduce_addS(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
--  match(Set dst (AddReductionVI src1 src2));
--  effect(TEMP tmp);
--  ins_cost(VEC_COST);
--  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addS\n\t"
--            "vredsum.vs $tmp, $src2, $tmp\n\t"
--            "vmv.x.s  $dst, $tmp" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
--    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
--                  as_VectorRegister($tmp$$reg));
--    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
--  match(Set dst (AddReductionVI src1 src2));
--  effect(TEMP tmp);
--  ins_cost(VEC_COST);
--  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addI\n\t"
--            "vredsum.vs $tmp, $src2, $tmp\n\t"
--            "vmv.x.s  $dst, $tmp" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
--    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
--                  as_VectorRegister($tmp$$reg));
--    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
--  match(Set dst (AddReductionVL src1 src2));
--  effect(TEMP tmp);
--  ins_cost(VEC_COST);
--  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addL\n\t"
--            "vredsum.vs $tmp, $src2, $tmp\n\t"
--            "vmv.x.s  $dst, $tmp" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
--    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
--                  as_VectorRegister($tmp$$reg));
--    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct reduce_addF(fRegF src1_dst, vReg src2, vReg tmp) %{
--  match(Set src1_dst (AddReductionVF src1_dst src2));
--  effect(TEMP tmp);
--  ins_cost(VEC_COST);
--  format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addF\n\t"
--            "vfredosum.vs $tmp, $src2, $tmp\n\t"
--            "vfmv.f.s $src1_dst, $tmp" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister);
--    __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
--                    as_VectorRegister($tmp$$reg));
--    __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct reduce_addD(fRegD src1_dst, vReg src2, vReg tmp) %{
--  match(Set src1_dst (AddReductionVD src1_dst src2));
--  effect(TEMP tmp);
--  ins_cost(VEC_COST);
--  format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addD\n\t"
--            "vfredosum.vs $tmp, $src2, $tmp\n\t"
--            "vfmv.f.s $src1_dst, $tmp" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister);
--    __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
--                    as_VectorRegister($tmp$$reg));
--    __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector integer max reduction
--instruct vreduce_maxB(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
--  match(Set dst (MaxReductionV src1 src2));
--  ins_cost(VEC_COST);
--  effect(TEMP tmp);
--  format %{ "vreduce_maxB $dst, $src1, $src2, $tmp" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
--    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
--    Label Ldone;
--    __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
--    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
--    __ bind(Ldone);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vreduce_maxS(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
--  match(Set dst (MaxReductionV src1 src2));
--  ins_cost(VEC_COST);
--  effect(TEMP tmp);
--  format %{ "vreduce_maxS $dst, $src1, $src2, $tmp" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
--    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
--    Label Ldone;
--    __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
--    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
--    __ bind(Ldone);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vreduce_maxI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
--  match(Set dst (MaxReductionV src1 src2));
--  ins_cost(VEC_COST);
--  effect(TEMP tmp);
--  format %{ "vreduce_maxI $dst, $src1, $src2, $tmp" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
--    __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
--    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vreduce_maxL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
--  match(Set dst (MaxReductionV src1 src2));
--  ins_cost(VEC_COST);
--  effect(TEMP tmp);
--  format %{ "vreduce_maxL $dst, $src1, $src2, $tmp" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
--    __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
--    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector integer min reduction
--instruct vreduce_minB(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
--  match(Set dst (MinReductionV src1 src2));
--  ins_cost(VEC_COST);
--  effect(TEMP tmp);
--  format %{ "vreduce_minB $dst, $src1, $src2, $tmp" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
--    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
--    Label Ldone;
--    __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
--    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
--    __ bind(Ldone);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vreduce_minS(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
--  match(Set dst (MinReductionV src1 src2));
--  ins_cost(VEC_COST);
--  effect(TEMP tmp);
--  format %{ "vreduce_minS $dst, $src1, $src2, $tmp" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
--    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
--    Label Ldone;
--    __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
--    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
--    __ bind(Ldone);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vreduce_minI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
--  match(Set dst (MinReductionV src1 src2));
--  ins_cost(VEC_COST);
--  effect(TEMP tmp);
--  format %{ "vreduce_minI $dst, $src1, $src2, $tmp" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
--    __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
--    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vreduce_minL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
--  match(Set dst (MinReductionV src1 src2));
--  ins_cost(VEC_COST);
--  effect(TEMP tmp);
--  format %{ "vreduce_minL $dst, $src1, $src2, $tmp" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
--    __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
--    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector float max reduction
--
--instruct vreduce_maxF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
--  match(Set dst (MaxReductionV src1 src2));
--  ins_cost(VEC_COST);
--  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
--  format %{ "reduce_maxF $dst, $src1, $src2, $tmp1, $tmp2" %}
--  ins_encode %{
--    __ reduce_minmax_FD_v($dst$$FloatRegister,
--                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
--                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
--                          false /* is_double */, false /* is_min */);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vreduce_maxD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
--  match(Set dst (MaxReductionV src1 src2));
--  ins_cost(VEC_COST);
--  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
--  format %{ "reduce_maxD $dst, $src1, $src2, $tmp1, $tmp2" %}
--  ins_encode %{
--    __ reduce_minmax_FD_v($dst$$FloatRegister,
--                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
--                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
--                          true /* is_double */, false /* is_min */);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector float min reduction
--
--instruct vreduce_minF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
--  match(Set dst (MinReductionV src1 src2));
--  ins_cost(VEC_COST);
--  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
--  format %{ "reduce_minF $dst, $src1, $src2, $tmp1, $tmp2" %}
--  ins_encode %{
--    __ reduce_minmax_FD_v($dst$$FloatRegister,
--                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
--                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
--                          false /* is_double */, true /* is_min */);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vreduce_minD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
--  match(Set dst (MinReductionV src1 src2));
--  ins_cost(VEC_COST);
--  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
--  format %{ "reduce_minD $dst, $src1, $src2, $tmp1, $tmp2" %}
--  ins_encode %{
--    __ reduce_minmax_FD_v($dst$$FloatRegister,
--                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
--                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
--                          true /* is_double */, true /* is_min */);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector Math.rint, floor, ceil
--
--instruct vroundD(vReg dst, vReg src, immI rmode) %{
--  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
--  match(Set dst (RoundDoubleModeV src rmode));
--  format %{ "vroundD $dst, $src, $rmode" %}
--  ins_encode %{
--    switch ($rmode$$constant) {
--      case RoundDoubleModeNode::rmode_rint:
--        __ csrwi(CSR_FRM, C2_MacroAssembler::rne);
--        __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
--        break;
--      case RoundDoubleModeNode::rmode_floor:
--        __ csrwi(CSR_FRM, C2_MacroAssembler::rdn);
--        __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
--        break;
--      case RoundDoubleModeNode::rmode_ceil:
--        __ csrwi(CSR_FRM, C2_MacroAssembler::rup);
--        __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
--        break;
--      default:
--        ShouldNotReachHere();
--        break;
--    }
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector replicate
--
--instruct replicateB(vReg dst, iRegIorL2I src) %{
--  match(Set dst (ReplicateB src));
--  ins_cost(VEC_COST);
--  format %{ "vmv.v.x  $dst, $src\t#@replicateB" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct replicateS(vReg dst, iRegIorL2I src) %{
--  match(Set dst (ReplicateS src));
--  ins_cost(VEC_COST);
--  format %{ "vmv.v.x  $dst, $src\t#@replicateS" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct replicateI(vReg dst, iRegIorL2I src) %{
--  match(Set dst (ReplicateI src));
--  ins_cost(VEC_COST);
--  format %{ "vmv.v.x  $dst, $src\t#@replicateI" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct replicateL(vReg dst, iRegL src) %{
--  match(Set dst (ReplicateL src));
--  ins_cost(VEC_COST);
--  format %{ "vmv.v.x  $dst, $src\t#@replicateL" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct replicateB_imm5(vReg dst, immI5 con) %{
--  match(Set dst (ReplicateB con));
--  ins_cost(VEC_COST);
--  format %{ "vmv.v.i  $dst, $con\t#@replicateB_imm5" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct replicateS_imm5(vReg dst, immI5 con) %{
--  match(Set dst (ReplicateS con));
--  ins_cost(VEC_COST);
--  format %{ "vmv.v.i  $dst, $con\t#@replicateS_imm5" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct replicateI_imm5(vReg dst, immI5 con) %{
--  match(Set dst (ReplicateI con));
--  ins_cost(VEC_COST);
--  format %{ "vmv.v.i  $dst, $con\t#@replicateI_imm5" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct replicateL_imm5(vReg dst, immL5 con) %{
--  match(Set dst (ReplicateL con));
--  ins_cost(VEC_COST);
--  format %{ "vmv.v.i  $dst, $con\t#@replicateL_imm5" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct replicateF(vReg dst, fRegF src) %{
--  match(Set dst (ReplicateF src));
--  ins_cost(VEC_COST);
--  format %{ "vfmv.v.f  $dst, $src\t#@replicateF" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct replicateD(vReg dst, fRegD src) %{
--  match(Set dst (ReplicateD src));
--  ins_cost(VEC_COST);
--  format %{ "vfmv.v.f  $dst, $src\t#@replicateD" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector shift
--
--instruct vasrB(vReg dst, vReg src, vReg shift) %{
--  match(Set dst (RShiftVB src shift));
--  ins_cost(VEC_COST);
--  effect(TEMP_DEF dst);
--  format %{ "vmsgtu.vi v0, $shift 7\t#@vasrB\n\t"
--            "vsra.vi $dst, $src, 7, Assembler::v0_t\n\t"
--            "vmnot.m v0, v0\n\t"
--            "vsra.vv $dst, $src, $shift, Assembler::v0_t" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    // if shift > BitsPerByte - 1, clear the low BitsPerByte - 1 bits
--    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
--    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               BitsPerByte - 1, Assembler::v0_t);
--    // otherwise, shift
--    __ vmnot_m(v0, v0);
--    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($shift$$reg), Assembler::v0_t);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vasrS(vReg dst, vReg src, vReg shift) %{
--  match(Set dst (RShiftVS src shift));
--  ins_cost(VEC_COST);
--  effect(TEMP_DEF dst);
--  format %{ "vmsgtu.vi v0, $shift, 15\t#@vasrS\n\t"
--            "vsra.vi $dst, $src, 15, Assembler::v0_t\n\t"
--            "vmnot.m v0, v0\n\t"
--            "vsra.vv $dst, $src, $shift, Assembler::v0_t" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    // if shift > BitsPerShort - 1, clear the low BitsPerShort - 1 bits
--    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
--    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               BitsPerShort - 1, Assembler::v0_t);
--    // otherwise, shift
--    __ vmnot_m(v0, v0);
--    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($shift$$reg), Assembler::v0_t);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vasrI(vReg dst, vReg src, vReg shift) %{
--  match(Set dst (RShiftVI src shift));
--  ins_cost(VEC_COST);
--  format %{ "vsra.vv $dst, $src, $shift\t#@vasrI" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($shift$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vasrL(vReg dst, vReg src, vReg shift) %{
--  match(Set dst (RShiftVL src shift));
--  ins_cost(VEC_COST);
--  format %{ "vsra.vv $dst, $src, $shift\t#@vasrL" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--         as_VectorRegister($shift$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vlslB(vReg dst, vReg src, vReg shift) %{
--  match(Set dst (LShiftVB src shift));
--  ins_cost(VEC_COST);
--  effect( TEMP_DEF dst);
--  format %{ "vmsgtu.vi v0, $shift, 7\t#@vlslB\n\t"
--            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
--            "vmnot.m v0, v0\n\t"
--            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    // if shift > BitsPerByte - 1, clear the element
--    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
--    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($src$$reg), Assembler::v0_t);
--    // otherwise, shift
--    __ vmnot_m(v0, v0);
--    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($shift$$reg), Assembler::v0_t);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vlslS(vReg dst, vReg src, vReg shift) %{
--  match(Set dst (LShiftVS src shift));
--  ins_cost(VEC_COST);
--  effect(TEMP_DEF dst);
--  format %{ "vmsgtu.vi v0, $shift, 15\t#@vlslS\n\t"
--            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
--            "vmnot.m v0, v0\n\t"
--            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    // if shift > BitsPerShort - 1, clear the element
--    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
--    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($src$$reg), Assembler::v0_t);
--    // otherwise, shift
--    __ vmnot_m(v0, v0);
--    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($shift$$reg), Assembler::v0_t);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vlslI(vReg dst, vReg src, vReg shift) %{
--  match(Set dst (LShiftVI src shift));
--  ins_cost(VEC_COST);
--  format %{ "vsll.vv $dst, $src, $shift\t#@vlslI" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($shift$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vlslL(vReg dst, vReg src, vReg shift) %{
--  match(Set dst (LShiftVL src shift));
--  ins_cost(VEC_COST);
--  format %{ "vsll.vv $dst, $src, $shift\t# vector (D)" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($shift$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vlsrB(vReg dst, vReg src, vReg shift) %{
--  match(Set dst (URShiftVB src shift));
--  ins_cost(VEC_COST);
--  effect(TEMP_DEF dst);
--  format %{ "vmsgtu.vi v0, $shift, 7\t#@vlsrB\n\t"
--            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
--            "vmnot.m v0, v0, v0\n\t"
--            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    // if shift > BitsPerByte - 1, clear the element
--    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
--    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($src$$reg), Assembler::v0_t);
--    // otherwise, shift
--    __ vmnot_m(v0, v0);
--    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($shift$$reg), Assembler::v0_t);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vlsrS(vReg dst, vReg src, vReg shift) %{
--  match(Set dst (URShiftVS src shift));
--  ins_cost(VEC_COST);
--  effect(TEMP_DEF dst);
--  format %{ "vmsgtu.vi v0, $shift, 15\t#@vlsrS\n\t"
--            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
--            "vmnot.m v0, v0\n\t"
--            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    // if shift > BitsPerShort - 1, clear the element
--    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
--    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($src$$reg), Assembler::v0_t);
--    // otherwise, shift
--    __ vmnot_m(v0, v0);
--    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($shift$$reg), Assembler::v0_t);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--
--instruct vlsrI(vReg dst, vReg src, vReg shift) %{
--  match(Set dst (URShiftVI src shift));
--  ins_cost(VEC_COST);
--  format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrI" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($shift$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--
--instruct vlsrL(vReg dst, vReg src, vReg shift) %{
--  match(Set dst (URShiftVL src shift));
--  ins_cost(VEC_COST);
--  format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrL" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($shift$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vasrB_imm(vReg dst, vReg src, immI shift) %{
--  match(Set dst (RShiftVB src (RShiftCntV shift)));
--  ins_cost(VEC_COST);
--  format %{ "vsra.vi $dst, $src, $shift\t#@vasrB_imm" %}
--  ins_encode %{
--    uint32_t con = (unsigned)$shift$$constant & 0x1f;
--    __ vsetvli(t0, x0, Assembler::e8);
--    if (con == 0) {
--      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--                as_VectorRegister($src$$reg));
--      return;
--    }
--    if (con >= BitsPerByte) con = BitsPerByte - 1;
--    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vasrS_imm(vReg dst, vReg src, immI shift) %{
--  match(Set dst (RShiftVS src (RShiftCntV shift)));
--  ins_cost(VEC_COST);
--  format %{ "vsra.vi $dst, $src, $shift\t#@vasrS_imm" %}
--  ins_encode %{
--    uint32_t con = (unsigned)$shift$$constant & 0x1f;
--    __ vsetvli(t0, x0, Assembler::e16);
--    if (con == 0) {
--      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--                as_VectorRegister($src$$reg));
--      return;
--    }
--    if (con >= BitsPerShort) con = BitsPerShort - 1;
--    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vasrI_imm(vReg dst, vReg src, immI shift) %{
--  match(Set dst (RShiftVI src (RShiftCntV shift)));
--  ins_cost(VEC_COST);
--  format %{ "vsrl.vi $dst, $src, $shift\t#@vasrI_imm" %}
--  ins_encode %{
--    uint32_t con = (unsigned)$shift$$constant & 0x1f;
--    __ vsetvli(t0, x0, Assembler::e32);
--    if (con == 0) {
--      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--                as_VectorRegister($src$$reg));
--      return;
--    }
--    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vasrL_imm(vReg dst, vReg src, immI shift) %{
--  predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32);
--  match(Set dst (RShiftVL src (RShiftCntV shift)));
--  ins_cost(VEC_COST);
--  format %{ "vsrl.vi $dst, $src, $shift\t#@vasrL_imm" %}
--  ins_encode %{
--    uint32_t con = (unsigned)$shift$$constant & 0x1f;
--    __ vsetvli(t0, x0, Assembler::e64);
--    if (con == 0) {
--      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--                as_VectorRegister($src$$reg));
--      return;
--    }
--    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{
--  match(Set dst (URShiftVB src (RShiftCntV shift)));
--  ins_cost(VEC_COST);
--  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrB_imm" %}
--  ins_encode %{
--    uint32_t con = (unsigned)$shift$$constant & 0x1f;
--    __ vsetvli(t0, x0, Assembler::e8);
--    if (con == 0) {
--      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--                as_VectorRegister($src$$reg));
--      return;
--    }
--    if (con >= BitsPerByte) {
--      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--                 as_VectorRegister($src$$reg));
--      return;
--    }
--    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{
--  match(Set dst (URShiftVS src (RShiftCntV shift)));
--  ins_cost(VEC_COST);
--  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrS_imm" %}
--  ins_encode %{
--    uint32_t con = (unsigned)$shift$$constant & 0x1f;
--    __ vsetvli(t0, x0, Assembler::e16);
--    if (con == 0) {
--      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--                as_VectorRegister($src$$reg));
--      return;
--    }
--    if (con >= BitsPerShort) {
--      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--                 as_VectorRegister($src$$reg));
--      return;
--    }
--    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{
--  match(Set dst (URShiftVI src (RShiftCntV shift)));
--  ins_cost(VEC_COST);
--  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrI_imm" %}
--  ins_encode %{
--    uint32_t con = (unsigned)$shift$$constant & 0x1f;
--    __ vsetvli(t0, x0, Assembler::e32);
--    if (con == 0) {
--      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--                as_VectorRegister($src$$reg));
--      return;
--    }
--    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{
--  predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32);
--  match(Set dst (URShiftVL src (RShiftCntV shift)));
--  ins_cost(VEC_COST);
--  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrL_imm" %}
--  ins_encode %{
--    uint32_t con = (unsigned)$shift$$constant & 0x1f;
--    __ vsetvli(t0, x0, Assembler::e64);
--    if (con == 0) {
--      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--                as_VectorRegister($src$$reg));
--      return;
--    }
--    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vlslB_imm(vReg dst, vReg src, immI shift) %{
--  match(Set dst (LShiftVB src (LShiftCntV shift)));
--  ins_cost(VEC_COST);
--  format %{ "vsll.vi $dst, $src, $shift\t#@vlslB_imm" %}
--  ins_encode %{
--    uint32_t con = (unsigned)$shift$$constant & 0x1f;
--    __ vsetvli(t0, x0, Assembler::e8);
--    if (con >= BitsPerByte) {
--      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--                 as_VectorRegister($src$$reg));
--      return;
--    }
--    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vlslS_imm(vReg dst, vReg src, immI shift) %{
--  match(Set dst (LShiftVS src (LShiftCntV shift)));
--  ins_cost(VEC_COST);
--  format %{ "vsll.vi $dst, $src, $shift\t#@vlslS_imm" %}
--  ins_encode %{
--    uint32_t con = (unsigned)$shift$$constant & 0x1f;
--    __ vsetvli(t0, x0, Assembler::e16);
--    if (con >= BitsPerShort) {
--      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--                 as_VectorRegister($src$$reg));
--      return;
--    }
--    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vlslI_imm(vReg dst, vReg src, immI shift) %{
--  match(Set dst (LShiftVI src (LShiftCntV shift)));
--  ins_cost(VEC_COST);
--  format %{ "vsll.vi $dst, $src, $shift\t#@vlslI_imm" %}
--  ins_encode %{
--    uint32_t con = (unsigned)$shift$$constant & 0x1f;
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vlslL_imm(vReg dst, vReg src, immI shift) %{
--  predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32);
--  match(Set dst (LShiftVL src (LShiftCntV shift)));
--  ins_cost(VEC_COST);
--  format %{ "vsll.vi $dst, $src, $shift\t#@vlslL_imm" %}
--  ins_encode %{
--    uint32_t con = (unsigned)$shift$$constant & 0x1f;
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{
--  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
--  match(Set dst (LShiftCntV cnt));
--  match(Set dst (RShiftCntV cnt));
--  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntB" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{
--  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
--            n->bottom_type()->is_vect()->element_basic_type() == T_CHAR);
--  match(Set dst (LShiftCntV cnt));
--  match(Set dst (RShiftCntV cnt));
--  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntS" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{
--  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_INT);
--  match(Set dst (LShiftCntV cnt));
--  match(Set dst (RShiftCntV cnt));
--  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntI" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{
--  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
--  match(Set dst (LShiftCntV cnt));
--  match(Set dst (RShiftCntV cnt));
--  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntL" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector sqrt
--
--instruct vsqrtF(vReg dst, vReg src) %{
--  match(Set dst (SqrtVF src));
--  ins_cost(VEC_COST);
--  format %{ "vfsqrt.v $dst, $src\t#@vsqrtF" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vsqrtD(vReg dst, vReg src) %{
--  match(Set dst (SqrtVD src));
--  ins_cost(VEC_COST);
--  format %{ "vfsqrt.v $dst, $src\t#@vsqrtD" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector sub
--
--instruct vsubB(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (SubVB src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubB" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
--               as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vsubS(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (SubVS src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubS" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
--               as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vsubI(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (SubVI src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubI" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
--               as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vsubL(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (SubVL src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubL" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
--               as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vsubF(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (SubVF src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vfsub.vv $dst, $src1, $src2\t@vsubF" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
--                as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vsubD(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (SubVD src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vfsub.vv $dst, $src1, $src2\t#@vsubD" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
--                as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vstring_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
--                         iRegI_R10 result, vReg_V1 v1,
--                         vReg_V2 v2, vReg_V3 v3, rFlagsReg cr)
--%{
--  predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
--  match(Set result (StrEquals (Binary str1 str2) cnt));
--  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr);
--
--  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %}
--  ins_encode %{
--    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
--    __ string_equals_v($str1$$Register, $str2$$Register,
--                       $result$$Register, $cnt$$Register, 1);
--  %}
--  ins_pipe(pipe_class_memory);
--%}
--
--instruct vstring_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
--                         iRegI_R10 result, vReg_V1 v1,
--                         vReg_V2 v2, vReg_V3 v3, rFlagsReg cr)
--%{
--  predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
--  match(Set result (StrEquals (Binary str1 str2) cnt));
--  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr);
--
--  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %}
--  ins_encode %{
--    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
--    __ string_equals_v($str1$$Register, $str2$$Register,
--                       $result$$Register, $cnt$$Register, 2);
--  %}
--  ins_pipe(pipe_class_memory);
--%}
--
--instruct varray_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
--                        vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr)
--%{
--  predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
--  match(Set result (AryEq ary1 ary2));
--  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr);
--
--  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %}
--  ins_encode %{
--    __ arrays_equals_v($ary1$$Register, $ary2$$Register,
--                       $result$$Register, $tmp$$Register, 1);
--    %}
--  ins_pipe(pipe_class_memory);
--%}
--
--instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
--                        vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr)
--%{
--  predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
--  match(Set result (AryEq ary1 ary2));
--  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr);
--
--  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %}
--  ins_encode %{
--    __ arrays_equals_v($ary1$$Register, $ary2$$Register,
--                       $result$$Register, $tmp$$Register, 2);
--  %}
--  ins_pipe(pipe_class_memory);
--%}
--
--instruct vstring_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
--                          iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
--                          iRegP_R28 tmp1, iRegL_R29 tmp2)
--%{
--  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
--  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
--  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
--         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
--
--  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %}
--  ins_encode %{
--    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
--    __ string_compare_v($str1$$Register, $str2$$Register,
--                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
--                        $tmp1$$Register, $tmp2$$Register,
--                        StrIntrinsicNode::UU);
--  %}
--  ins_pipe(pipe_class_memory);
--%}
--instruct vstring_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
--                          iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
--                          iRegP_R28 tmp1, iRegL_R29 tmp2)
--%{
--  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
--  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
--  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
--         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
--
--  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %}
--  ins_encode %{
--    __ string_compare_v($str1$$Register, $str2$$Register,
--                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
--                        $tmp1$$Register, $tmp2$$Register,
--                        StrIntrinsicNode::LL);
--  %}
--  ins_pipe(pipe_class_memory);
--%}
--
--instruct vstring_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
--                           iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
--                           iRegP_R28 tmp1, iRegL_R29 tmp2)
--%{
--  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
--  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
--  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
--         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
--
--  format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %}
--  ins_encode %{
--    __ string_compare_v($str1$$Register, $str2$$Register,
--                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
--                        $tmp1$$Register, $tmp2$$Register,
--                        StrIntrinsicNode::UL);
--  %}
--  ins_pipe(pipe_class_memory);
--%}
--instruct vstring_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
--                           iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
--                           iRegP_R28 tmp1, iRegL_R29 tmp2)
--%{
--  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
--  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
--  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
--         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
--
--  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %}
--  ins_encode %{
--    __ string_compare_v($str1$$Register, $str2$$Register,
--                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
--                        $tmp1$$Register, $tmp2$$Register,
--                        StrIntrinsicNode::LU);
--  %}
--  ins_pipe(pipe_class_memory);
--%}
--
--// fast byte[] to char[] inflation
--instruct vstring_inflate(Universe dummy, iRegP_R10 src, iRegP_R11 dst, iRegI_R12 len,
--                         vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
--%{
--  predicate(UseRVV);
--  match(Set dummy (StrInflatedCopy src (Binary dst len)));
--  effect(TEMP v1, TEMP v2, TEMP v3, TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len);
--
--  format %{ "String Inflate $src,$dst" %}
--  ins_encode %{
--    __ byte_array_inflate_v($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register);
--  %}
--  ins_pipe(pipe_class_memory);
--%}
--
--// encode char[] to byte[] in ISO_8859_1
--instruct vencode_iso_array(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result,
--                           vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
--%{
--  predicate(UseRVV);
--  match(Set result (EncodeISOArray src (Binary dst len)));
--  effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len,
--         TEMP v1, TEMP v2, TEMP v3, TEMP tmp);
--
--  format %{ "Encode array $src,$dst,$len -> $result" %}
--  ins_encode %{
--    __ encode_iso_array_v($src$$Register, $dst$$Register, $len$$Register,
--                          $result$$Register, $tmp$$Register);
--  %}
--  ins_pipe( pipe_class_memory );
--%}
--
--// fast char[] to byte[] compression
--instruct vstring_compress(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result,
--                          vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
--%{
--  predicate(UseRVV);
--  match(Set result (StrCompressedCopy src (Binary dst len)));
--  effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len,
--         TEMP v1, TEMP v2, TEMP v3, TEMP tmp);
--
--  format %{ "String Compress $src,$dst -> $result    // KILL R11, R12, R13" %}
--  ins_encode %{
--    __ char_array_compress_v($src$$Register, $dst$$Register, $len$$Register,
--                             $result$$Register, $tmp$$Register);
--  %}
--  ins_pipe( pipe_slow );
--%}
--
--instruct vcount_positives(iRegP_R11 ary, iRegI_R12 len, iRegI_R10 result, iRegL tmp)
--%{
--  predicate(UseRVV);
--  match(Set result (CountPositives ary len));
--  effect(USE_KILL ary, USE_KILL len, TEMP tmp);
--
--  format %{ "count positives byte[] $ary, $len -> $result" %}
--  ins_encode %{
--    __ count_positives_v($ary$$Register, $len$$Register, $result$$Register, $tmp$$Register);
--  %}
--
--  ins_pipe(pipe_slow);
--%}
--
--instruct vstringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
--                               iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
--                               vReg_V1 v1, vReg_V2 v2, vReg_V3 v3)
--%{
--  predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
--  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
--  effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
--         TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3);
--
--  format %{ "StringUTF16 IndexOf char[] $str1, $cnt1, $ch -> $result" %}
--
--  ins_encode %{
--    __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register,
--                             $result$$Register, $tmp1$$Register, $tmp2$$Register,
--                             false /* isL */);
--  %}
--
--  ins_pipe(pipe_class_memory);
--%}
--
--instruct vstringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
--                               iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
--                               vReg_V1 v1, vReg_V2 v2, vReg_V3 v3)
--%{
--  predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
--  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
--  effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
--         TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3);
--
--  format %{ "StringLatin1 IndexOf char[] $str1, $cnt1, $ch -> $result" %}
--
--  ins_encode %{
--    __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register,
--                             $result$$Register, $tmp1$$Register, $tmp2$$Register,
--                             true /* isL */);
--  %}
--
--  ins_pipe(pipe_class_memory);
--%}
--
--// clearing of an array
--instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy,
--                             vReg_V1 vReg1, vReg_V2 vReg2, vReg_V3 vReg3)
--%{
--  predicate(UseRVV);
--  match(Set dummy (ClearArray cnt base));
--  effect(USE_KILL cnt, USE_KILL base, TEMP vReg1, TEMP vReg2, TEMP vReg3);
--
--  format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %}
--
--  ins_encode %{
--    __ clear_array_v($base$$Register, $cnt$$Register);
--  %}
--
--  ins_pipe(pipe_class_memory);
--%}
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-index f85d4b25a76..4daed17df10 100644
---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -80,9 +80,8 @@ class SimpleRuntimeFrame {
- };
- 
- class RegisterSaver {
--  const bool _save_vectors;
-  public:
--  RegisterSaver(bool save_vectors) : _save_vectors(UseRVV && save_vectors) {}
-+  RegisterSaver() {}
-   ~RegisterSaver() {}
-   OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
-   void restore_live_registers(MacroAssembler* masm);
-@@ -91,11 +90,7 @@ class RegisterSaver {
-   // Used by deoptimization when it is managing result register
-   // values on its own
-   // gregs:28, float_register:32; except: x1(ra) & x2(sp) & gp(x3) & tp(x4)
--  // |---v0---|<---SP
--  // |---v1---|save vectors only in generate_handler_blob
--  // |-- .. --|
--  // |---v31--|-----
--  // |---f0---|
-+  // |---f0---|<---SP
-   // |---f1---|
-   // |   ..   |
-   // |---f31--|
-@@ -106,16 +101,8 @@ class RegisterSaver {
-   // |---x31--|
-   // |---fp---|
-   // |---ra---|
--  int v0_offset_in_bytes(void) { return 0; }
-   int f0_offset_in_bytes(void) {
--    int f0_offset = 0;
--#ifdef COMPILER2
--    if (_save_vectors) {
--      f0_offset += Matcher::scalable_vector_reg_size(T_INT) * VectorRegisterImpl::number_of_registers *
--                   BytesPerInt;
--    }
--#endif
--    return f0_offset;
-+    return 0;
-   }
-   int reserved_slot_offset_in_bytes(void) {
-     return f0_offset_in_bytes() +
-@@ -142,15 +129,6 @@ class RegisterSaver {
- };
- 
- OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
--  int vector_size_in_bytes = 0;
--  int vector_size_in_slots = 0;
--#ifdef COMPILER2
--  if (_save_vectors) {
--    vector_size_in_bytes += Matcher::scalable_vector_reg_size(T_BYTE);
--    vector_size_in_slots += Matcher::scalable_vector_reg_size(T_INT);
--  }
--#endif
--
-   assert_cond(masm != NULL && total_frame_words != NULL);
-   int frame_size_in_bytes = align_up(additional_frame_words * wordSize + ra_offset_in_bytes() + wordSize, 16);
-   // OopMap frame size is in compiler stack slots (jint's) not bytes or words
-@@ -161,9 +139,9 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
-   int frame_size_in_words = frame_size_in_bytes / wordSize;
-   *total_frame_words = frame_size_in_words;
- 
--  // Save Integer, Float and Vector registers.
-+  // Save Integer and Float registers.
-   __ enter();
--  __ push_CPU_state(_save_vectors, vector_size_in_bytes);
-+  __ push_CPU_state();
- 
-   // Set an oopmap for the call site.  This oopmap will map all
-   // oop-registers and debug-info registers as callee-saved.  This
-@@ -176,13 +154,6 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
- 
-   int sp_offset_in_slots = 0;
-   int step_in_slots = 0;
--  if (_save_vectors) {
--    step_in_slots = vector_size_in_slots;
--    for (int i = 0; i < VectorRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
--      VectorRegister r = as_VectorRegister(i);
--      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg());
--    }
--  }
- 
-   step_in_slots = FloatRegisterImpl::max_slots_per_register;
-   for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
-@@ -207,18 +178,13 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
- 
- void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
-   assert_cond(masm != NULL);
--#ifdef COMPILER2
--  __ pop_CPU_state(_save_vectors, Matcher::scalable_vector_reg_size(T_BYTE));
--#else
--  __ pop_CPU_state(_save_vectors);
--#endif
-+  __ pop_CPU_state();
-   __ leave();
- }
- 
- // Is vector's size (in bytes) bigger than a size saved by default?
--// riscv does not ovlerlay the floating-point registers on vector registers like aarch64.
- bool SharedRuntime::is_wide_vector(int size) {
--  return UseRVV;
-+  return false;
- }
- 
- // The java_calling_convention describes stack locations as ideal slots on
-@@ -674,13 +640,6 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
-   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry);
- }
- 
--int SharedRuntime::vector_calling_convention(VMRegPair *regs,
--                                             uint num_bits,
--                                             uint total_args_passed) {
--  Unimplemented();
--  return 0;
--}
--
- int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
-                                          VMRegPair *regs,
-                                          VMRegPair *regs2,
-@@ -1891,7 +1850,7 @@ void SharedRuntime::generate_deopt_blob() {
-   OopMap* map = NULL;
-   OopMapSet *oop_maps = new OopMapSet();
-   assert_cond(masm != NULL && oop_maps != NULL);
--  RegisterSaver reg_saver(COMPILER2_OR_JVMCI != 0);
-+  RegisterSaver reg_saver;
- 
-   // -------------
-   // This code enters when returning to a de-optimized nmethod.  A return
-@@ -2423,7 +2382,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
-   address call_pc = NULL;
-   int frame_size_in_words = -1;
-   bool cause_return = (poll_type == POLL_AT_RETURN);
--  RegisterSaver reg_saver(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */);
-+  RegisterSaver reg_saver;
- 
-   // Save Integer and Float registers.
-   map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
-@@ -2542,7 +2501,7 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha
-   assert_cond(masm != NULL);
- 
-   int frame_size_in_words = -1;
--  RegisterSaver reg_saver(false /* save_vectors */);
-+  RegisterSaver reg_saver;
- 
-   OopMapSet *oop_maps = new OopMapSet();
-   assert_cond(oop_maps != NULL);
-diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-index b05edf7172c..39416441bdf 100644
---- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-@@ -2843,111 +2843,6 @@ class StubGenerator: public StubCodeGenerator {
- 
-     return entry;
-   }
--
--  // Arguments:
--  //
--  // Input:
--  //   c_rarg0   - newArr address
--  //   c_rarg1   - oldArr address
--  //   c_rarg2   - newIdx
--  //   c_rarg3   - shiftCount
--  //   c_rarg4   - numIter
--  //
--  address generate_bigIntegerLeftShift() {
--    __ align(CodeEntryAlignment);
--    StubCodeMark mark(this, "StubRoutines", "bigIntegerLeftShiftWorker");
--    address entry = __ pc();
--
--    Label loop, exit;
--
--    Register newArr        = c_rarg0;
--    Register oldArr        = c_rarg1;
--    Register newIdx        = c_rarg2;
--    Register shiftCount    = c_rarg3;
--    Register numIter       = c_rarg4;
--
--    Register shiftRevCount = c_rarg5;
--    Register oldArrNext    = t1;
--
--    __ beqz(numIter, exit);
--    __ shadd(newArr, newIdx, newArr, t0, 2);
--
--    __ li(shiftRevCount, 32);
--    __ sub(shiftRevCount, shiftRevCount, shiftCount);
--
--    __ bind(loop);
--    __ addi(oldArrNext, oldArr, 4);
--    __ vsetvli(t0, numIter, Assembler::e32, Assembler::m4);
--    __ vle32_v(v0, oldArr);
--    __ vle32_v(v4, oldArrNext);
--    __ vsll_vx(v0, v0, shiftCount);
--    __ vsrl_vx(v4, v4, shiftRevCount);
--    __ vor_vv(v0, v0, v4);
--    __ vse32_v(v0, newArr);
--    __ sub(numIter, numIter, t0);
--    __ shadd(oldArr, t0, oldArr, t1, 2);
--    __ shadd(newArr, t0, newArr, t1, 2);
--    __ bnez(numIter, loop);
--
--    __ bind(exit);
--    __ ret();
--
--    return entry;
--  }
--
--  // Arguments:
--  //
--  // Input:
--  //   c_rarg0   - newArr address
--  //   c_rarg1   - oldArr address
--  //   c_rarg2   - newIdx
--  //   c_rarg3   - shiftCount
--  //   c_rarg4   - numIter
--  //
--  address generate_bigIntegerRightShift() {
--    __ align(CodeEntryAlignment);
--    StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
--    address entry = __ pc();
--
--    Label loop, exit;
--
--    Register newArr        = c_rarg0;
--    Register oldArr        = c_rarg1;
--    Register newIdx        = c_rarg2;
--    Register shiftCount    = c_rarg3;
--    Register numIter       = c_rarg4;
--    Register idx           = numIter;
--
--    Register shiftRevCount = c_rarg5;
--    Register oldArrNext    = c_rarg6;
--    Register newArrCur     = t0;
--    Register oldArrCur     = t1;
--
--    __ beqz(idx, exit);
--    __ shadd(newArr, newIdx, newArr, t0, 2);
--
--    __ li(shiftRevCount, 32);
--    __ sub(shiftRevCount, shiftRevCount, shiftCount);
--
--    __ bind(loop);
--    __ vsetvli(t0, idx, Assembler::e32, Assembler::m4);
--    __ sub(idx, idx, t0);
--    __ shadd(oldArrNext, idx, oldArr, t1, 2);
--    __ shadd(newArrCur, idx, newArr, t1, 2);
--    __ addi(oldArrCur, oldArrNext, 4);
--    __ vle32_v(v0, oldArrCur);
--    __ vle32_v(v4, oldArrNext);
--    __ vsrl_vx(v0, v0, shiftCount);
--    __ vsll_vx(v4, v4, shiftRevCount);
--    __ vor_vv(v0, v0, v4);
--    __ vse32_v(v0, newArrCur);
--    __ bnez(idx, loop);
--
--    __ bind(exit);
--    __ ret();
--
--    return entry;
--  }
- #endif
- 
- #ifdef COMPILER2
-@@ -3813,11 +3708,6 @@ class StubGenerator: public StubCodeGenerator {
-       MontgomeryMultiplyGenerator g(_masm, /*squaring*/true);
-       StubRoutines::_montgomerySquare = g.generate_square();
-     }
--
--    if (UseRVVForBigIntegerShiftIntrinsics) {
--      StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift();
--      StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift();
--    }
- #endif
- 
-     generate_compare_long_strings();
-diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-index 768c7633ca6..2c15a834542 100644
---- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-@@ -167,10 +167,6 @@ void VM_Version::c2_initialize() {
-     FLAG_SET_DEFAULT(MaxVectorSize, 0);
-   }
- 
--  if (!UseRVV) {
--    FLAG_SET_DEFAULT(UseRVVForBigIntegerShiftIntrinsics, false);
--  }
--
-   if (UseRVV) {
-     if (FLAG_IS_DEFAULT(MaxVectorSize)) {
-       MaxVectorSize = _initial_vector_length;
-diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
-index aa7222dc64a..1f6eff96cba 100644
---- a/src/hotspot/cpu/riscv/vmreg_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
-@@ -45,16 +45,8 @@ void VMRegImpl::set_regName() {
-     freg = freg->successor();
-   }
- 
--  VectorRegister vreg = ::as_VectorRegister(0);
--  for ( ; i < ConcreteRegisterImpl::max_vpr ; ) {
--    for (int j = 0 ; j < VectorRegisterImpl::max_slots_per_register ; j++) {
--      regName[i++] = reg->name();
--    }
--    vreg = vreg->successor();
--  }
--
-   for ( ; i < ConcreteRegisterImpl::number_of_registers ; i++) {
--    regName[i] = "NON-GPR-FPR-VPR";
-+    regName[i] = "NON-GPR-FPR";
-   }
- }
- 
-diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.hpp
-index 9e611b1f671..6f613a8f11a 100644
---- a/src/hotspot/cpu/riscv/vmreg_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/vmreg_riscv.hpp
-@@ -34,10 +34,6 @@ inline bool is_FloatRegister() {
-   return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr;
- }
- 
--inline bool is_VectorRegister() {
--  return value() >= ConcreteRegisterImpl::max_fpr && value() < ConcreteRegisterImpl::max_vpr;
--}
--
- inline Register as_Register() {
-   assert(is_Register(), "must be");
-   return ::as_Register(value() / RegisterImpl::max_slots_per_register);
-@@ -49,20 +45,9 @@ inline FloatRegister as_FloatRegister() {
-                             FloatRegisterImpl::max_slots_per_register);
- }
- 
--inline VectorRegister as_VectorRegister() {
--  assert(is_VectorRegister() && ((value() & (VectorRegisterImpl::max_slots_per_register - 1)) == 0), "must be");
--  return ::as_VectorRegister((value() - ConcreteRegisterImpl::max_fpr) /
--                             VectorRegisterImpl::max_slots_per_register);
--}
--
- inline bool is_concrete() {
-   assert(is_reg(), "must be");
--  if (is_VectorRegister()) {
--    int base = value() - ConcreteRegisterImpl::max_fpr;
--    return (base % VectorRegisterImpl::max_slots_per_register) == 0;
--  } else {
--    return is_even(value());
--  }
-+  return is_even(value());
- }
- 
- #endif // CPU_RISCV_VMREG_RISCV_HPP
-
-From b2011bad9b7404c1f6d0c1aa3176569d7f07d7a9 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Mon, 27 Mar 2023 16:05:55 +0800
-Subject: [PATCH 004/140] Revert: JDK-8253180: ZGC: Implementation of JEP 376:
- ZGC: Concurrent Thread-Stack Processing JDK-8220051: Remove global safepoint
- code
-
----
- src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp  | 14 ------
- .../cpu/riscv/c1_LIRAssembler_riscv.cpp       | 14 +++---
- .../riscv/c2_safepointPollStubTable_riscv.cpp | 47 ------------------
- src/hotspot/cpu/riscv/frame_riscv.cpp         |  9 +---
- src/hotspot/cpu/riscv/interp_masm_riscv.cpp   | 19 +-------
- .../cpu/riscv/macroAssembler_riscv.cpp        | 48 +++++++++++--------
- .../cpu/riscv/macroAssembler_riscv.hpp        |  5 +-
- src/hotspot/cpu/riscv/riscv.ad                | 14 ++----
- src/hotspot/cpu/riscv/vm_version_riscv.hpp    |  2 -
- 9 files changed, 45 insertions(+), 127 deletions(-)
- delete mode 100644 src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp
-
-diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
-index dcd0472c540..af7bd067f33 100644
---- a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
-@@ -39,20 +39,6 @@
- 
- #define __ ce->masm()->
- 
--void C1SafepointPollStub::emit_code(LIR_Assembler* ce) {
--  __ bind(_entry);
--  InternalAddress safepoint_pc(__ pc() - __ offset() + safepoint_offset());
--  __ code_section()->relocate(__ pc(), safepoint_pc.rspec());
--  __ la(t0, safepoint_pc.target());
--  __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset()));
--
--  assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
--         "polling page return stub not created yet");
--  address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
--
--  __ far_jump(RuntimeAddress(stub));
--}
--
- void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
-   __ bind(_entry);
-   Metadata *m = _method->as_constant_ptr()->as_metadata();
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-index bba3bd4709c..0e383a3c139 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -424,7 +424,7 @@ int LIR_Assembler::emit_deopt_handler() {
-   return offset;
- }
- 
--void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) {
-+void LIR_Assembler::return_op(LIR_Opr result) {
-   assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == x10, "word returns are in x10");
- 
-   // Pop the stack before the safepoint code
-@@ -434,18 +434,20 @@ void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) {
-     __ reserved_stack_check();
-   }
- 
--  code_stub->set_safepoint_offset(__ offset());
--  __ relocate(relocInfo::poll_return_type);
--  __ safepoint_poll(*code_stub->entry(), true /* at_return */, false /* acquire */, true /* in_nmethod */);
-+  address polling_page(os::get_polling_page());
-+  __ read_polling_page(t0, polling_page, relocInfo::poll_return_type);
-   __ ret();
- }
- 
- int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
-+  address polling_page(os::get_polling_page());
-   guarantee(info != NULL, "Shouldn't be NULL");
--  __ get_polling_page(t0, relocInfo::poll_type);
-+  assert(os::is_poll_address(polling_page), "should be");
-+  int32_t offset = 0;
-+  __ get_polling_page(t0, polling_page, offset, relocInfo::poll_type);
-   add_debug_info_for_branch(info);  // This isn't just debug info:
-                                     // it's the oop map
--  __ read_polling_page(t0, 0, relocInfo::poll_type);
-+  __ read_polling_page(t0, offset, relocInfo::poll_type);
-   return __ offset();
- }
- 
-diff --git a/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp b/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp
-deleted file mode 100644
-index a90d9fdc160..00000000000
---- a/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp
-+++ /dev/null
-@@ -1,47 +0,0 @@
--/*
-- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#include "precompiled.hpp"
--#include "asm/macroAssembler.hpp"
--#include "opto/compile.hpp"
--#include "opto/node.hpp"
--#include "opto/output.hpp"
--#include "runtime/sharedRuntime.hpp"
--
--#define __ masm.
--void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointPollStub* entry) const {
--  assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
--         "polling page return stub not created yet");
--  address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
--  RuntimeAddress callback_addr(stub);
--
--  __ bind(entry->_stub_label);
--  InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset);
--  masm.code_section()->relocate(masm.pc(), safepoint_pc.rspec());
--  __ la(t0, safepoint_pc.target());
--  __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset()));
--  __ far_jump(callback_addr);
--}
--#undef __
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
-index 6e38960598a..41e52a4d491 100644
---- a/src/hotspot/cpu/riscv/frame_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
-@@ -39,7 +39,6 @@
- #include "runtime/monitorChunk.hpp"
- #include "runtime/os.inline.hpp"
- #include "runtime/signature.hpp"
--#include "runtime/stackWatermarkSet.hpp"
- #include "runtime/stubCodeGenerator.hpp"
- #include "runtime/stubRoutines.hpp"
- #include "vmreg_riscv.inline.hpp"
-@@ -509,13 +508,7 @@ frame frame::sender_raw(RegisterMap* map) const {
- }
- 
- frame frame::sender(RegisterMap* map) const {
--  frame result = sender_raw(map);
--
--  if (map->process_frames()) {
--    StackWatermarkSet::on_iteration(map->thread(), result);
--  }
--
--  return result;
-+  return sender_raw(map);
- }
- 
- bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
-diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-index d12dcb2af19..9090ad0c058 100644
---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-@@ -519,7 +519,7 @@ void InterpreterMacroAssembler::dispatch_base(TosState state,
- 
-   if (needs_thread_local_poll) {
-     NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
--    ld(t1, Address(xthread, JavaThread::polling_word_offset()));
-+    ld(t1, Address(xthread, Thread::polling_page_offset()));
-     andi(t1, t1, SafepointMechanism::poll_bit());
-     bnez(t1, safepoint);
-   }
-@@ -591,23 +591,6 @@ void InterpreterMacroAssembler::remove_activation(
-   // result check if synchronized method
-   Label unlocked, unlock, no_unlock;
- 
--  // The below poll is for the stack watermark barrier. It allows fixing up frames lazily,
--  // that would normally not be safe to use. Such bad returns into unsafe territory of
--  // the stack, will call InterpreterRuntime::at_unwind.
--  Label slow_path;
--  Label fast_path;
--  safepoint_poll(slow_path, true /* at_return */, false /* acquire */, false /* in_nmethod */);
--  j(fast_path);
--
--  bind(slow_path);
--  push(state);
--  set_last_Java_frame(esp, fp, (address)pc(), t0);
--  super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), xthread);
--  reset_last_Java_frame(true);
--  pop(state);
--
--  bind(fast_path);
--
-   // get the value of _do_not_unlock_if_synchronized into x13
-   const Address do_not_unlock_if_synchronized(xthread,
-     in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index 8b8d126f6c9..4b6136ae36b 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -2122,15 +2122,16 @@ void MacroAssembler::check_klass_subtype(Register sub_klass,
- }
- 
- void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) {
--  ld(t0, Address(xthread, JavaThread::polling_word_offset()));
--  if (acquire) {
--    membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
--  }
--  if (at_return) {
--    bgtu(in_nmethod ? sp : fp, t0, slow_path, true /* is_far */);
-+  if (SafepointMechanism::uses_thread_local_poll()) {
-+    ld(t1, Address(xthread, Thread::polling_page_offset()));
-+    andi(t0, t1, SafepointMechanism::poll_bit());
-+    bnez(t0, slow_path);
-   } else {
--    andi(t0, t0, SafepointMechanism::poll_bit());
--    bnez(t0, slow_path, true /* is_far */);
-+    int32_t offset = 0;
-+    la_patchable(t0, ExternalAddress(SafepointSynchronize::address_of_state()), offset);
-+    lwu(t0, Address(t0, offset));
-+    assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code");
-+    bnez(t0, slow_path);
-   }
- }
- 
-@@ -2752,22 +2753,29 @@ void MacroAssembler::reserved_stack_check() {
- }
- 
- // Move the address of the polling page into dest.
--void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) {
--  ld(dest, Address(xthread, JavaThread::polling_page_offset()));
-+void MacroAssembler::get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype) {
-+  if (SafepointMechanism::uses_thread_local_poll()) {
-+    ld(dest, Address(xthread, Thread::polling_page_offset()));
-+  } else {
-+    uint64_t align = (uint64_t)page & 0xfff;
-+    assert(align == 0, "polling page must be page aligned");
-+    la_patchable(dest, Address(page, rtype), offset);
-+  }
- }
- 
- // Read the polling page.  The address of the polling page must
- // already be in r.
--address MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) {
--  address mark;
--  {
--    InstructionMark im(this);
--    code_section()->relocate(inst_mark(), rtype);
--    lwu(zr, Address(r, offset));
--    mark = inst_mark();
--  }
--  verify_cross_modify_fence_not_required();
--  return mark;
-+void MacroAssembler::read_polling_page(Register dest, address page, relocInfo::relocType rtype) {
-+  int32_t offset = 0;
-+  get_polling_page(dest, page, offset, rtype);
-+  read_polling_page(dest, offset, rtype);
-+}
-+
-+// Read the polling page.  The address of the polling page must
-+// already be in r.
-+void MacroAssembler::read_polling_page(Register dest, int32_t offset, relocInfo::relocType rtype) {
-+  code_section()->relocate(pc(), rtype);
-+  lwu(zr, Address(dest, offset));
- }
- 
- void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index b43131514c1..041c696add6 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -625,8 +625,9 @@ class MacroAssembler: public Assembler {
- 
-   void reserved_stack_check();
- 
--  void get_polling_page(Register dest, relocInfo::relocType rtype);
--  address read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
-+  void get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype);
-+  void read_polling_page(Register r, address page, relocInfo::relocType rtype);
-+  void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
- 
-   address trampoline_call(Address entry, CodeBuffer* cbuf = NULL);
-   address ic_call(address entry, jint method_index = 0);
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 85593a942e9..996fa1fb68f 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1132,9 +1132,9 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
-   }
- 
-   if (do_polling() && C->is_method_compilation()) {
--    st->print("# test polling word\n\t");
--    st->print("ld t0, [xthread,#%d]\n\t", in_bytes(JavaThread::polling_word_offset()));
--    st->print("bgtu sp, t0, #slow_path");
-+    st->print("# touch polling page\n\t");
-+    st->print("li  t0, #0x%lx\n\t", p2i(os::get_polling_page()));
-+    st->print("ld  zr, [t0]");
-   }
- }
- #endif
-@@ -1153,13 +1153,7 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-   }
- 
-   if (do_polling() && C->is_method_compilation()) {
--    Label dummy_label;
--    Label* code_stub = &dummy_label;
--    if (!C->output()->in_scratch_emit_size()) {
--      code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
--    }
--    __ relocate(relocInfo::poll_return_type);
--    __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
-+    __ read_polling_page(t0, os::get_polling_page(), relocInfo::poll_return_type);
-   }
- }
- 
-diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
-index 8e35530359a..7586af01d99 100644
---- a/src/hotspot/cpu/riscv/vm_version_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
-@@ -48,8 +48,6 @@ class VM_Version : public Abstract_VM_Version {
-   // Initialization
-   static void initialize();
- 
--  constexpr static bool supports_stack_watermark_barrier() { return true; }
--
-   enum Feature_Flag {
- #define CPU_FEATURE_FLAGS(decl)               \
-     decl(I,            "i",            8)     \
-
-From a032c615883fe2bd557baf40f1439cbae55be206 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Mon, 1 May 2023 15:42:09 +0800
-Subject: [PATCH 005/140] Revert JDK-8221554: aarch64 cross-modifying code
-
----
- .../cpu/riscv/macroAssembler_riscv.cpp        | 22 -------------------
- .../cpu/riscv/macroAssembler_riscv.hpp        |  2 --
- 2 files changed, 24 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index 4b6136ae36b..269d76ba69e 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -2716,7 +2716,6 @@ void MacroAssembler::build_frame(int framesize) {
-   sd(fp, Address(sp, framesize - 2 * wordSize));
-   sd(ra, Address(sp, framesize - wordSize));
-   if (PreserveFramePointer) { add(fp, sp, framesize); }
--  verify_cross_modify_fence_not_required();
- }
- 
- void MacroAssembler::remove_frame(int framesize) {
-@@ -3935,26 +3934,5 @@ void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Registe
- 
- void MacroAssembler::safepoint_ifence() {
-   ifence();
--#ifndef PRODUCT
--  if (VerifyCrossModifyFence) {
--    // Clear the thread state.
--    sb(zr, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset())));
--  }
--#endif
- }
- 
--#ifndef PRODUCT
--void MacroAssembler::verify_cross_modify_fence_not_required() {
--  if (VerifyCrossModifyFence) {
--    // Check if thread needs a cross modify fence.
--    lbu(t0, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset())));
--    Label fence_not_required;
--    beqz(t0, fence_not_required);
--    // If it does then fail.
--    la(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::verify_cross_modify_fence_failure)));
--    mv(c_rarg0, xthread);
--    jalr(t0);
--    bind(fence_not_required);
--  }
--}
--#endif
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index 041c696add6..b59bdadb8bf 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -821,8 +821,6 @@ class MacroAssembler: public Assembler {
-   void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire);
-   void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release);
- 
--  // Check the current thread doesn't need a cross modify fence.
--  void verify_cross_modify_fence_not_required() PRODUCT_RETURN;
- };
- 
- #ifdef ASSERT
-
-From fd89cf689015649a5cb850e1e24dcbb7bb59735a Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 21:11:30 +0800
-Subject: [PATCH 006/140] Revert JDK-8242263: Diagnose synchronization on
- primitive wrappers
-
----
- src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp | 7 -------
- src/hotspot/cpu/riscv/interp_masm_riscv.cpp       | 7 -------
- src/hotspot/cpu/riscv/riscv.ad                    | 7 -------
- 3 files changed, 21 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-index 6f656c8c533..348546a9ea0 100644
---- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-@@ -64,13 +64,6 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr
- 
-   null_check_offset = offset();
- 
--  if (DiagnoseSyncOnValueBasedClasses != 0) {
--    load_klass(hdr, obj);
--    lwu(hdr, Address(hdr, Klass::access_flags_offset()));
--    andi(t0, hdr, JVM_ACC_IS_VALUE_BASED_CLASS);
--    bnez(t0, slow_case, true /* is_far */);
--  }
--
-   // Load object header
-   ld(hdr, Address(obj, hdr_offset));
-   // and mark it as unlocked
-diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-index 9090ad0c058..8adc7b1320d 100644
---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-@@ -782,13 +782,6 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg)
-     // Load object pointer into obj_reg c_rarg3
-     ld(obj_reg, Address(lock_reg, obj_offset));
- 
--    if (DiagnoseSyncOnValueBasedClasses != 0) {
--      load_klass(tmp, obj_reg);
--      lwu(tmp, Address(tmp, Klass::access_flags_offset()));
--      andi(tmp, tmp, JVM_ACC_IS_VALUE_BASED_CLASS);
--      bnez(tmp, slow_case);
--    }
--
-     // Load (object->mark() | 1) into swap_reg
-     ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-     ori(swap_reg, t0, 1);
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 996fa1fb68f..2eefc71dde0 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1965,13 +1965,6 @@ encode %{
-     // Load markWord from object into displaced_header.
-     __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
- 
--    if (DiagnoseSyncOnValueBasedClasses != 0) {
--      __ load_klass(flag, oop);
--      __ lwu(flag, Address(flag, Klass::access_flags_offset()));
--      __ andi(flag, flag, JVM_ACC_IS_VALUE_BASED_CLASS, tmp /* tmp */);
--      __ bnez(flag, cont, true /* is_far */);
--    }
--
-     // Check for existing monitor
-     __ andi(t0, disp_hdr, markWord::monitor_value);
-     __ bnez(t0, object_has_monitor);
-
-From feea78c5a227c0a57e57d6d1d544a14682310053 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 16:24:12 +0800
-Subject: [PATCH 007/140] Revert JDK-8278104: C1 should support the compiler
- directive 'BreakAtExecute'
-
----
- src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-index 348546a9ea0..e5ed25616d6 100644
---- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-@@ -311,7 +311,7 @@ void C1_MacroAssembler::remove_frame(int framesize) {
- }
- 
- 
--void C1_MacroAssembler::verified_entry(bool breakAtEntry) {
-+void C1_MacroAssembler::verified_entry() {
-   // If we have to make this method not-entrant we'll overwrite its
-   // first instruction with a jump. For this action to be legal we
-   // must ensure that this first instruction is a J, JAL or NOP.
-
-From 651009a5783f6f5150b3e75a50069dc841622d33 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sat, 29 Apr 2023 15:57:14 +0800
-Subject: [PATCH 008/140] Revert: JDK-8234562: Move
- OrderAccess::release_store*/load_acquire to Atomic JDK-8234736: Harmonize
- parameter order in Atomic - store JDK-8234737: Harmonize parameter order in
- Atomic - add JDK-8234740: Harmonize parameter order in Atomic - cmpxchg
- JDK-8234739: Harmonize parameter order in Atomic - xchg JDK-8236778: Add
- Atomic::fetch_and_add
-
----
- .../os_cpu/linux_riscv/atomic_linux_riscv.hpp | 51 +++++++------------
- .../linux_riscv/orderAccess_linux_riscv.hpp   | 31 +++++++----
- 2 files changed, 39 insertions(+), 43 deletions(-)
-
-diff --git a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
-index 761da5d743e..9b8b1a31774 100644
---- a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
-+++ b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
-@@ -33,25 +33,31 @@
- // Note that memory_order_conservative requires a full barrier after atomic stores.
- // See https://patchwork.kernel.org/patch/3575821/
- 
-+#define FULL_MEM_BARRIER  __sync_synchronize()
-+#define READ_MEM_BARRIER  __atomic_thread_fence(__ATOMIC_ACQUIRE);
-+#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
-+
- template<size_t byte_size>
--struct Atomic::PlatformAdd {
--  template<typename D, typename I>
--  D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const {
-+struct Atomic::PlatformAdd
-+  : Atomic::FetchAndAdd<Atomic::PlatformAdd<byte_size> >
-+{
-+  template<typename I, typename D>
-+  D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const {
-     D res = __atomic_add_fetch(dest, add_value, __ATOMIC_RELEASE);
-     FULL_MEM_BARRIER;
-     return res;
-   }
- 
--  template<typename D, typename I>
--  D fetch_and_add(D volatile* dest, I add_value, atomic_memory_order order) const {
--    return add_and_fetch(dest, add_value, order) - add_value;
-+  template<typename I, typename D>
-+  D fetch_and_add(I add_value, D volatile* dest, atomic_memory_order order) const {
-+    return add_and_fetch(add_value, dest, order) - add_value;
-   }
- };
- 
- template<size_t byte_size>
- template<typename T>
--inline T Atomic::PlatformXchg<byte_size>::operator()(T volatile* dest,
--                                                     T exchange_value,
-+inline T Atomic::PlatformXchg<byte_size>::operator()(T exchange_value,
-+                                                     T volatile* dest,
-                                                      atomic_memory_order order) const {
-   STATIC_ASSERT(byte_size == sizeof(T));
-   T res = __atomic_exchange_n(dest, exchange_value, __ATOMIC_RELEASE);
-@@ -62,9 +68,9 @@ inline T Atomic::PlatformXchg<byte_size>::operator()(T volatile* dest,
- // __attribute__((unused)) on dest is to get rid of spurious GCC warnings.
- template<size_t byte_size>
- template<typename T>
--inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T volatile* dest __attribute__((unused)),
-+inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T exchange_value,
-+                                                        T volatile* dest __attribute__((unused)),
-                                                         T compare_value,
--                                                        T exchange_value,
-                                                         atomic_memory_order order) const {
-   STATIC_ASSERT(byte_size == sizeof(T));
-   T value = compare_value;
-@@ -83,9 +89,9 @@ inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T volatile* dest __attri
- 
- template<>
- template<typename T>
--inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest __attribute__((unused)),
-+inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
-+                                                T volatile* dest __attribute__((unused)),
-                                                 T compare_value,
--                                                T exchange_value,
-                                                 atomic_memory_order order) const {
-   STATIC_ASSERT(4 == sizeof(T));
-   if (order != memory_order_relaxed) {
-@@ -110,25 +116,4 @@ inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest __attribute__((
-   return rv;
- }
- 
--template<size_t byte_size>
--struct Atomic::PlatformOrderedLoad<byte_size, X_ACQUIRE>
--{
--  template <typename T>
--  T operator()(const volatile T* p) const { T data; __atomic_load(const_cast<T*>(p), &data, __ATOMIC_ACQUIRE); return data; }
--};
--
--template<size_t byte_size>
--struct Atomic::PlatformOrderedStore<byte_size, RELEASE_X>
--{
--  template <typename T>
--  void operator()(volatile T* p, T v) const { __atomic_store(const_cast<T*>(p), &v, __ATOMIC_RELEASE); }
--};
--
--template<size_t byte_size>
--struct Atomic::PlatformOrderedStore<byte_size, RELEASE_X_FENCE>
--{
--  template <typename T>
--  void operator()(volatile T* p, T v) const { release_store(p, v); OrderAccess::fence(); }
--};
--
- #endif // OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
-index 1c33dc1e87f..5b5d35553f7 100644
---- a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
-+++ b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
-@@ -37,10 +37,6 @@ inline void OrderAccess::storestore() { release(); }
- inline void OrderAccess::loadstore()  { acquire(); }
- inline void OrderAccess::storeload()  { fence(); }
- 
--#define FULL_MEM_BARRIER  __sync_synchronize()
--#define READ_MEM_BARRIER  __atomic_thread_fence(__ATOMIC_ACQUIRE);
--#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
--
- inline void OrderAccess::acquire() {
-   READ_MEM_BARRIER;
- }
-@@ -53,11 +49,26 @@ inline void OrderAccess::fence() {
-   FULL_MEM_BARRIER;
- }
- 
--inline void OrderAccess::cross_modify_fence_impl() {
--  asm volatile("fence.i" : : : "memory");
--  if (UseConservativeFence) {
--    asm volatile("fence ir, ir" : : : "memory");
--  }
--}
-+
-+template<size_t byte_size>
-+struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE>
-+{
-+  template <typename T>
-+  T operator()(const volatile T* p) const { T data; __atomic_load(const_cast<T*>(p), &data, __ATOMIC_ACQUIRE); return data; }
-+};
-+
-+template<size_t byte_size>
-+struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X>
-+{
-+  template <typename T>
-+  void operator()(T v, volatile T* p) const { __atomic_store(const_cast<T*>(p), &v, __ATOMIC_RELEASE); }
-+};
-+
-+template<size_t byte_size>
-+struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X_FENCE>
-+{
-+  template <typename T>
-+  void operator()(T v, volatile T* p) const { release_store(p, v); OrderAccess::fence(); }
-+};
- 
- #endif // OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
-
-From b078a2ec01598fbcd99aea61af15d44f9c884aaa Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 21:07:42 +0800
-Subject: [PATCH 009/140] Revert JDK-8229258: Rework markOop and markOopDesc
- into a simpler mark word value carrier
-
----
- .../cpu/riscv/c1_MacroAssembler_riscv.cpp     |  4 ++--
- .../shenandoahBarrierSetAssembler_riscv.cpp   |  4 ++--
- src/hotspot/cpu/riscv/riscv.ad                | 22 +++++++++----------
- src/hotspot/cpu/riscv/templateTable_riscv.cpp |  2 +-
- 4 files changed, 16 insertions(+), 16 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-index e5ed25616d6..2d52343587e 100644
---- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-@@ -67,7 +67,7 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr
-   // Load object header
-   ld(hdr, Address(obj, hdr_offset));
-   // and mark it as unlocked
--  ori(hdr, hdr, markWord::unlocked_value);
-+  ori(hdr, hdr, markOopDesc::unlocked_value);
-   // save unlocked object header into the displaced header location on the stack
-   sd(hdr, Address(disp_hdr, 0));
-   // test if object header is still the same (i.e. unlocked), and if so, store the
-@@ -141,7 +141,7 @@ void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, i
- void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2) {
-   assert_different_registers(obj, klass, len);
-   // This assumes that all prototype bits fitr in an int32_t
--  mv(tmp1, (int32_t)(intptr_t)markWord::prototype().value());
-+  mv(tmp1, (int32_t)(intptr_t)markOopDesc::prototype());
-   sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes()));
- 
-   if (UseCompressedClassPointers) { // Take care not to kill klass
-diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
-index d0ac6e52436..84e1205bc25 100644
---- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
-@@ -216,9 +216,9 @@ void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssemb
-   Label done;
-   __ ld(tmp, Address(dst, oopDesc::mark_offset_in_bytes()));
-   __ xori(tmp, tmp, -1); // eon with 0 is equivalent to XOR with -1
--  __ andi(t2, tmp, markWord::lock_mask_in_place);
-+  __ andi(t2, tmp, markOopDesc::lock_mask_in_place);
-   __ bnez(t2, done);
--  __ ori(tmp, tmp, markWord::marked_value);
-+  __ ori(tmp, tmp, markOopDesc::marked_value);
-   __ xori(dst, tmp, -1); // eon with 0 is equivalent to XOR with -1
-   __ bind(done);
- 
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 2eefc71dde0..44ab44dece1 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1966,12 +1966,12 @@ encode %{
-     __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
- 
-     // Check for existing monitor
--    __ andi(t0, disp_hdr, markWord::monitor_value);
-+    __ andi(t0, disp_hdr, markOopDesc::monitor_value);
-     __ bnez(t0, object_has_monitor);
- 
-     if (!UseHeavyMonitors) {
-       // Set tmp to be (markWord of object | UNLOCK_VALUE).
--      __ ori(tmp, disp_hdr, markWord::unlocked_value);
-+      __ ori(tmp, disp_hdr, markOopDesc::unlocked_value);
- 
-       // Initialize the box. (Must happen before we update the object mark!)
-       __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
-@@ -1993,7 +1993,7 @@ encode %{
-       // Check if the owner is self by comparing the value in the
-       // markWord of object (disp_hdr) with the stack pointer.
-       __ sub(disp_hdr, disp_hdr, sp);
--      __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markWord::lock_mask_in_place));
-+      __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place));
-       // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont,
-       // hence we can store 0 as the displaced header in the box, which indicates that it is a
-       // recursive lock.
-@@ -2012,15 +2012,15 @@ encode %{
-     // otherwise m->owner may contain a thread or a stack address.
-     //
-     // Try to CAS m->owner from NULL to current thread.
--    __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markWord::monitor_value));
-+    __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value));
-     __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq,
-              Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected)
- 
-     // Store a non-null value into the box to avoid looking like a re-entrant
-     // lock. The fast-path monitor unlock code checks for
--    // markWord::monitor_value so use markWord::unused_mark which has the
-+    // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the
-     // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
--    __ mv(tmp, (address)markWord::unused_mark().value());
-+    __ mv(tmp, (address)markOopDesc::unused_mark());
-     __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
- 
-     __ beqz(flag, cont); // CAS success means locking succeeded
-@@ -2029,9 +2029,9 @@ encode %{
- 
-     // Recursive lock case
-     __ mv(flag, zr);
--    __ ld(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value));
-+    __ ld(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markOopDesc::monitor_value));
-     __ add(tmp, tmp, 1u);
--    __ sd(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value));
-+    __ sd(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markOopDesc::monitor_value));
- 
-     __ bind(cont);
-   %}
-@@ -2060,7 +2060,7 @@ encode %{
- 
-     // Handle existing monitor.
-     __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
--    __ andi(t0, disp_hdr, markWord::monitor_value);
-+    __ andi(t0, disp_hdr, markOopDesc::monitor_value);
-     __ bnez(t0, object_has_monitor);
- 
-     if (!UseHeavyMonitors) {
-@@ -2080,8 +2080,8 @@ encode %{
- 
-     // Handle existing monitor.
-     __ bind(object_has_monitor);
--    STATIC_ASSERT(markWord::monitor_value <= INT_MAX);
--    __ add(tmp, tmp, -(int)markWord::monitor_value); // monitor
-+    STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX);
-+    __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor
-     __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
- 
-     Label notRecursive;
-diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-index d2a301c6e74..4e388ac4eaa 100644
---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-@@ -3559,7 +3559,7 @@ void TemplateTable::_new() {
- 
-     // initialize object hader only.
-     __ bind(initialize_header);
--    __ mv(t0, (intptr_t)markWord::prototype().value());
-+    __ mv(t0, (intptr_t)markOopDesc::prototype());
-     __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes()));
-     __ store_klass_gap(x10, zr);   // zero klass gap for compressed oops
-     __ store_klass(x10, x14);      // store klass last
-
-From 4b27cd8d4cfa8fb5f0f78aecaebb17d19362f300 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Fri, 31 Mar 2023 16:24:36 +0800
-Subject: [PATCH 010/140] Revert: JDK-8239895: assert(_stack_base != 0LL)
- failed: Sanity check JDK-8238988: Rename thread "in stack" methods and add
- in_stack_range JDK-8234372: Investigate use of Thread::stack_base() and
- queries for "in stack" JDK-8203481: Incorrect constraint for unextended_sp in
- frame:safe_for_sender
-
----
- src/hotspot/cpu/riscv/frame_riscv.cpp | 32 +++++++++++++++++++--------
- 1 file changed, 23 insertions(+), 9 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
-index 41e52a4d491..8e7babe2c61 100644
---- a/src/hotspot/cpu/riscv/frame_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
-@@ -56,13 +56,21 @@ void RegisterMap::check_location_valid() {
- // Profiling/safepoint support
- 
- bool frame::safe_for_sender(JavaThread *thread) {
--  address   addr_sp = (address)_sp;
--  address   addr_fp = (address)_fp;
-+  address   sp = (address)_sp;
-+  address   fp = (address)_fp;
-   address   unextended_sp = (address)_unextended_sp;
- 
-   // consider stack guards when trying to determine "safe" stack pointers
-+  static size_t stack_guard_size = os::uses_stack_guard_pages() ?
-+                                   (JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size()) : 0;
-+  size_t usable_stack_size = thread->stack_size() - stack_guard_size;
-+
-   // sp must be within the usable part of the stack (not in guards)
--  if (!thread->is_in_usable_stack(addr_sp)) {
-+  bool sp_safe = (sp < thread->stack_base()) &&
-+                 (sp >= thread->stack_base() - usable_stack_size);
-+
-+
-+  if (!sp_safe) {
-     return false;
-   }
- 
-@@ -79,14 +87,15 @@ bool frame::safe_for_sender(JavaThread *thread) {
-   // So unextended sp must be within the stack but we need not to check
-   // that unextended sp >= sp
- 
--  if (!thread->is_in_full_stack_checked(unextended_sp)) {
-+  bool unextended_sp_safe = (unextended_sp < thread->stack_base());
-+
-+  if (!unextended_sp_safe) {
-     return false;
-   }
- 
-   // an fp must be within the stack and above (but not equal) sp
-   // second evaluation on fp+ is added to handle situation where fp is -1
--  bool fp_safe = thread->is_in_stack_range_excl(addr_fp, addr_sp) &&
--                 thread->is_in_full_stack_checked(addr_fp + (return_addr_offset * sizeof(void*)));
-+  bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base())));
- 
-   // We know sp/unextended_sp are safe only fp is questionable here
- 
-@@ -147,7 +156,7 @@ bool frame::safe_for_sender(JavaThread *thread) {
- 
-       sender_sp = _unextended_sp + _cb->frame_size();
-       // Is sender_sp safe?
--      if (!thread->is_in_full_stack_checked((address)sender_sp)) {
-+      if ((address)sender_sp >= thread->stack_base()) {
-         return false;
-       }
- 
-@@ -163,7 +172,10 @@ bool frame::safe_for_sender(JavaThread *thread) {
-       // fp is always saved in a recognizable place in any code we generate. However
-       // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp
-       // is really a frame pointer.
--      if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) {
-+
-+      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
-+
-+      if (!saved_fp_safe) {
-         return false;
-       }
- 
-@@ -196,7 +208,9 @@ bool frame::safe_for_sender(JavaThread *thread) {
- 
-     // Could be the call_stub
-     if (StubRoutines::returns_to_call_stub(sender_pc)) {
--      if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) {
-+      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
-+
-+      if (!saved_fp_safe) {
-         return false;
-       }
- 
-
-From d1b463b6c00c75664a49719f75bef8e6408f12df Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Fri, 31 Mar 2023 17:10:33 +0800
-Subject: [PATCH 011/140] Revert JDK-8173585: Intrinsify
- StringLatin1.indexOf(char)
-
----
- src/hotspot/cpu/riscv/riscv.ad | 19 -------------------
- 1 file changed, 19 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 44ab44dece1..8c7a8ede815 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -9826,7 +9826,6 @@ instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
-                               iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
- %{
-   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
--  predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U);
-   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
-          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
- 
-@@ -9840,24 +9839,6 @@ instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
- %}
- 
- 
--instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
--                              iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
--                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
--%{
--  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
--  predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L);
--  effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
--         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
--
--  format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
--  ins_encode %{
--    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
--                           $result$$Register, $tmp1$$Register, $tmp2$$Register,
--                           $tmp3$$Register, $tmp4$$Register, true /* isL */);
--  %}
--  ins_pipe(pipe_class_memory);
--%}
--
- // clearing of an array
- instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy)
- %{
-
-From a0cdf8dfb05dbff34d2ca23104d08ae21b2d7f70 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 4 Apr 2023 12:25:36 +0800
-Subject: [PATCH 012/140] Revert JDK-8281632: riscv: Improve interpreter stack
- banging, and change the register t1->t0
-
----
- .../templateInterpreterGenerator_riscv.cpp    | 42 ++++---------------
- 1 file changed, 8 insertions(+), 34 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-index 6537b2dbd94..76ae6f89e27 100644
---- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-@@ -895,42 +895,16 @@ address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(Abstract
- }
- 
- void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
--  // See more discussion in stackOverflow.hpp.
--
--  const int shadow_zone_size = checked_cast<int>(StackOverflow::stack_shadow_zone_size());
-+  // Bang each page in the shadow zone. We can't assume it's been done for
-+  // an interpreter frame with greater than a page of locals, so each page
-+  // needs to be checked.  Only true for non-native.
-+  const int n_shadow_pages = JavaThread::stack_shadow_zone_size() / os::vm_page_size();
-+  const int start_page = native_call ? n_shadow_pages : 1;
-   const int page_size = os::vm_page_size();
--  const int n_shadow_pages = shadow_zone_size / page_size;
--
--#ifdef ASSERT
--  Label L_good_limit;
--  __ ld(t0, Address(xthread, JavaThread::shadow_zone_safe_limit()));
--  __ bnez(t0, L_good_limit);
--  __ stop("shadow zone safe limit is not initialized");
--  __ bind(L_good_limit);
--
--  Label L_good_watermark;
--  __ ld(t0, Address(xthread, JavaThread::shadow_zone_growth_watermark()));
--  __ bnez(t0, L_good_watermark);
--  __ stop("shadow zone growth watermark is not initialized");
--  __ bind(L_good_watermark);
--#endif
--
--  Label L_done;
--
--  __ ld(t0, Address(xthread, JavaThread::shadow_zone_growth_watermark()));
--  __ bgtu(sp, t0, L_done);
--
--  for (int p = 1; p <= n_shadow_pages; p++) {
--    __ bang_stack_with_offset(p * page_size);
-+  for (int pages = start_page; pages <= n_shadow_pages ; pages++) {
-+    __ sub(t0, sp, pages * page_size);
-+    __ sd(zr, Address(t0));
-   }
--
--  // Record the new watermark, but only if the update is above the safe limit.
--  // Otherwise, the next time around the check above would pass the safe limit.
--  __ ld(t0, Address(xthread, JavaThread::shadow_zone_safe_limit()));
--  __ bleu(sp, t0, L_done);
--  __ sd(sp, Address(xthread, JavaThread::shadow_zone_growth_watermark()));
--
--  __ bind(L_done);
- }
- 
- // Interpreter stub for calling a native method. (asm interpreter)
-
-From 8db4bf1400d92c80a0adef8a5ec12adbf595c03f Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 4 Apr 2023 14:56:25 +0800
-Subject: [PATCH 013/140] Port aarch64 style sig handler from
- os_linux_aarch64.cpp
-
----
- .../os_cpu/linux_riscv/os_linux_riscv.cpp     | 224 +++++++++++++-----
- 1 file changed, 168 insertions(+), 56 deletions(-)
-
-diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
-index 1f46bbab0a2..db15f1946e2 100644
---- a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
-+++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
-@@ -48,7 +48,6 @@
- #include "runtime/stubRoutines.hpp"
- #include "runtime/thread.inline.hpp"
- #include "runtime/timer.hpp"
--#include "signals_posix.hpp"
- #include "utilities/debug.hpp"
- #include "utilities/events.hpp"
- #include "utilities/vmError.hpp"
-@@ -172,31 +171,138 @@ NOINLINE frame os::current_frame() {
- }
- 
- // Utility functions
--bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
--                                             ucontext_t* uc, JavaThread* thread) {
-+extern "C" JNIEXPORT int
-+JVM_handle_linux_signal(int sig,
-+                        siginfo_t* info,
-+                        void* ucVoid,
-+                        int abort_if_unrecognized) {
-+  ucontext_t* uc = (ucontext_t*) ucVoid;
-+
-+  Thread* t = Thread::current_or_null_safe();
-+
-+  // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away
-+  // (no destructors can be run)
-+  os::ThreadCrashProtection::check_crash_protection(sig, t);
-+
-+  SignalHandlerMark shm(t);
-+
-+  // Note: it's not uncommon that JNI code uses signal/sigset to install
-+  // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
-+  // or have a SIGILL handler when detecting CPU type). When that happens,
-+  // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
-+  // avoid unnecessary crash when libjsig is not preloaded, try handle signals
-+  // that do not require siginfo/ucontext first.
-+
-+  if (sig == SIGPIPE || sig == SIGXFSZ) {
-+    // allow chained handler to go first
-+    if (os::Linux::chained_handler(sig, info, ucVoid)) {
-+      return true;
-+    } else {
-+      // Ignoring SIGPIPE/SIGXFSZ - see bugs 4229104 or 6499219
-+      return true;
-+    }
-+  }
-+
-+#ifdef CAN_SHOW_REGISTERS_ON_ASSERT
-+  if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) {
-+    if (handle_assert_poison_fault(ucVoid, info->si_addr)) {
-+      return 1;
-+    }
-+  }
-+#endif
-+
-+  JavaThread* thread = NULL;
-+  VMThread* vmthread = NULL;
-+  if (os::Linux::signal_handlers_are_installed) {
-+    if (t != NULL ){
-+      if(t->is_Java_thread()) {
-+        thread = (JavaThread *) t;
-+      }
-+      else if(t->is_VM_thread()){
-+        vmthread = (VMThread *)t;
-+      }
-+    }
-+  }
-+
-+  // Handle SafeFetch faults
-+  if ((sig == SIGSEGV || sig == SIGBUS) && uc != NULL) {
-+    address const pc = (address) os::Linux::ucontext_get_pc(uc);
-+    if (pc && StubRoutines::is_safefetch_fault(pc)) {
-+      os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc));
-+      return 1;
-+    }
-+  }
- 
-   // decide if this trap can be handled by a stub
-   address stub = NULL;
- 
--  address pc = NULL;
-+  address pc          = NULL;
- 
-   //%note os_trap_1
-   if (info != NULL && uc != NULL && thread != NULL) {
--    pc = (address) os::Posix::ucontext_get_pc(uc);
--
--    address addr = (address) info->si_addr;
--
--    // Make sure the high order byte is sign extended, as it may be masked away by the hardware.
--    if ((uintptr_t(addr) & (uintptr_t(1) << 55)) != 0) {
--      addr = address(uintptr_t(addr) | (uintptr_t(0xFF) << 56));
--    }
-+    pc = (address) os::Linux::ucontext_get_pc(uc);
- 
-     // Handle ALL stack overflow variations here
-     if (sig == SIGSEGV) {
-+      address addr = (address) info->si_addr;
-+
-       // check if fault address is within thread stack
--      if (thread->is_in_full_stack(addr)) {
--        if (os::Posix::handle_stack_overflow(thread, addr, pc, uc, &stub)) {
--          return true; // continue
-+      if (thread->on_local_stack(addr)) {
-+        // stack overflow
-+        if (thread->in_stack_yellow_reserved_zone(addr)) {
-+          if (thread->thread_state() == _thread_in_Java) {
-+            if (thread->in_stack_reserved_zone(addr)) {
-+              frame fr;
-+              if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) {
-+                assert(fr.is_java_frame(), "Must be a Java frame");
-+                frame activation =
-+                  SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr);
-+                if (activation.sp() != NULL) {
-+                  thread->disable_stack_reserved_zone();
-+                  if (activation.is_interpreted_frame()) {
-+                    thread->set_reserved_stack_activation((address)(
-+                      activation.fp() + frame::interpreter_frame_initial_sp_offset));
-+                  } else {
-+                    thread->set_reserved_stack_activation((address)activation.unextended_sp());
-+                  }
-+                  return 1;
-+                }
-+              }
-+            }
-+            // Throw a stack overflow exception.  Guard pages will be reenabled
-+            // while unwinding the stack.
-+            thread->disable_stack_yellow_reserved_zone();
-+            stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
-+          } else {
-+            // Thread was in the vm or native code.  Return and try to finish.
-+            thread->disable_stack_yellow_reserved_zone();
-+            return 1;
-+          }
-+        } else if (thread->in_stack_red_zone(addr)) {
-+          // Fatal red zone violation.  Disable the guard pages and fall through
-+          // to handle_unexpected_exception way down below.
-+          thread->disable_stack_red_zone();
-+          tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
-+
-+          // This is a likely cause, but hard to verify. Let's just print
-+          // it as a hint.
-+          tty->print_raw_cr("Please check if any of your loaded .so files has "
-+                            "enabled executable stack (see man page execstack(8))");
-+        } else {
-+          // Accessing stack address below sp may cause SEGV if current
-+          // thread has MAP_GROWSDOWN stack. This should only happen when
-+          // current thread was created by user code with MAP_GROWSDOWN flag
-+          // and then attached to VM. See notes in os_linux.cpp.
-+          if (thread->osthread()->expanding_stack() == 0) {
-+             thread->osthread()->set_expanding_stack();
-+             if (os::Linux::manually_expand_stack(thread, addr)) {
-+               thread->osthread()->clear_expanding_stack();
-+               return 1;
-+             }
-+             thread->osthread()->clear_expanding_stack();
-+          } else {
-+             fatal("recursive segv. expanding stack.");
-+          }
-         }
-       }
-     }
-@@ -212,7 +318,7 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
-           tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL");
-         }
-         stub = SharedRuntime::get_handle_wrong_method_stub();
--      } else if (sig == SIGSEGV && SafepointMechanism::is_poll_address((address)info->si_addr)) {
-+      } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) {
-         stub = SharedRuntime::get_poll_stub(pc);
-       } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
-         // BugId 4454115: A read from a MappedByteBuffer can fault
-@@ -220,34 +326,12 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
-         // Do not crash the VM in such a case.
-         CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
-         CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
--        bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc));
--        if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) {
-+        if (nm != NULL && nm->has_unsafe_access()) {
-           address next_pc = pc + NativeCall::instruction_size;
--          if (is_unsafe_arraycopy) {
--            next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
--          }
-           stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
-         }
--      } else if (sig == SIGILL && nativeInstruction_at(pc)->is_stop()) {
--        // Pull a pointer to the error message out of the instruction
--        // stream.
--        const uint64_t *detail_msg_ptr
--          = (uint64_t*)(pc + NativeInstruction::instruction_size);
--        const char *detail_msg = (const char *)*detail_msg_ptr;
--        const char *msg = "stop";
--        if (TraceTraps) {
--          tty->print_cr("trap: %s: (SIGILL)", msg);
--        }
--
--        // End life with a fatal error, message and detail message and the context.
--        // Note: no need to do any post-processing here (e.g. signal chaining)
--        va_list va_dummy;
--        VMError::report_and_die(thread, uc, NULL, 0, msg, detail_msg, va_dummy);
--        va_end(va_dummy);
--
--        ShouldNotReachHere();
-       } else if (sig == SIGFPE  &&
--          (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) {
-+                 (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) {
-         stub =
-           SharedRuntime::
-           continuation_for_implicit_exception(thread,
-@@ -255,42 +339,70 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
-                                               SharedRuntime::
-                                               IMPLICIT_DIVIDE_BY_ZERO);
-       } else if (sig == SIGSEGV &&
--                 MacroAssembler::uses_implicit_null_check((void*)addr)) {
-+               !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) {
-           // Determination of interpreter/vtable stub/compiled code null exception
-           stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
-       }
--    } else if ((thread->thread_state() == _thread_in_vm ||
--                thread->thread_state() == _thread_in_native) &&
--                sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
--                thread->doing_unsafe_access()) {
-+    } else if (thread->thread_state() == _thread_in_vm &&
-+               sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
-+               thread->doing_unsafe_access()) {
-       address next_pc = pc + NativeCall::instruction_size;
--      if (UnsafeCopyMemory::contains_pc(pc)) {
--        next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
--      }
-       stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
-     }
- 
-     // jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
-     // and the heap gets shrunk before the field access.
-     if ((sig == SIGSEGV) || (sig == SIGBUS)) {
--      address addr_slow = JNI_FastGetField::find_slowcase_pc(pc);
--      if (addr_slow != (address)-1) {
--        stub = addr_slow;
-+      address addr = JNI_FastGetField::find_slowcase_pc(pc);
-+      if (addr != (address)-1) {
-+        stub = addr;
-       }
-     }
-+
-+    // Check to see if we caught the safepoint code in the
-+    // process of write protecting the memory serialization page.
-+    // It write enables the page immediately after protecting it
-+    // so we can just return to retry the write.
-+    if ((sig == SIGSEGV) &&
-+        os::is_memory_serialize_page(thread, (address) info->si_addr)) {
-+      // Block current thread until the memory serialize page permission restored.
-+      os::block_on_serialize_page_trap();
-+      return true;
-+    }
-   }
- 
-   if (stub != NULL) {
-     // save all thread context in case we need to restore it
--    if (thread != NULL) {
--      thread->set_saved_exception_pc(pc);
--    }
-+    if (thread != NULL) thread->set_saved_exception_pc(pc);
- 
--    os::Posix::ucontext_set_pc(uc, stub);
-+    os::Linux::ucontext_set_pc(uc, stub);
-     return true;
-   }
- 
--  return false; // Mute compiler
-+  // signal-chaining
-+  if (os::Linux::chained_handler(sig, info, ucVoid)) {
-+     return true;
-+  }
-+
-+  if (!abort_if_unrecognized) {
-+    // caller wants another chance, so give it to him
-+    return false;
-+  }
-+
-+  if (pc == NULL && uc != NULL) {
-+    pc = os::Linux::ucontext_get_pc(uc);
-+  }
-+
-+  // unmask current signal
-+  sigset_t newset;
-+  sigemptyset(&newset);
-+  sigaddset(&newset, sig);
-+  sigprocmask(SIG_UNBLOCK, &newset, NULL);
-+
-+  VMError::report_and_die(t, sig, pc, info, ucVoid);
-+
-+  ShouldNotReachHere();
-+  return true; // Mute compiler
- }
- 
- void os::Linux::init_thread_fpu_state(void) {
-
-From fd3897410308e2fc54d84a9bd453b1b375e6aace Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 4 Apr 2023 15:24:57 +0800
-Subject: [PATCH 014/140] Revert: JDK-8248240: Remove extendedPC.hpp and
- fetch_frame_from_ucontext JDK-8253742: POSIX signal code cleanup
-
----
- .../os_cpu/linux_riscv/os_linux_riscv.cpp     | 38 ++++++++++++++-----
- .../os_cpu/linux_riscv/thread_linux_riscv.cpp |  9 +++--
- 2 files changed, 33 insertions(+), 14 deletions(-)
-
-diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
-index db15f1946e2..4f1c84c60a0 100644
---- a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
-+++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
-@@ -37,6 +37,7 @@
- #include "prims/jniFastGetField.hpp"
- #include "prims/jvm_misc.hpp"
- #include "runtime/arguments.hpp"
-+#include "runtime/extendedPC.hpp"
- #include "runtime/frame.inline.hpp"
- #include "runtime/interfaceSupport.inline.hpp"
- #include "runtime/java.hpp"
-@@ -85,11 +86,11 @@ char* os::non_memory_address_word() {
-   return (char*) -1;
- }
- 
--address os::Posix::ucontext_get_pc(const ucontext_t * uc) {
-+address os::Linux::ucontext_get_pc(const ucontext_t * uc) {
-   return (address)uc->uc_mcontext.__gregs[REG_PC];
- }
- 
--void os::Posix::ucontext_set_pc(ucontext_t * uc, address pc) {
-+void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) {
-   uc->uc_mcontext.__gregs[REG_PC] = (intptr_t)pc;
- }
- 
-@@ -101,13 +102,29 @@ intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) {
-   return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP];
- }
- 
--address os::fetch_frame_from_context(const void* ucVoid,
--                                     intptr_t** ret_sp, intptr_t** ret_fp) {
--  address epc;
-+// For Forte Analyzer AsyncGetCallTrace profiling support - thread
-+// is currently interrupted by SIGPROF.
-+// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal
-+// frames. Currently we don't do that on Linux, so it's the same as
-+// os::fetch_frame_from_context().
-+ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread,
-+  const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) {
-+
-+  assert(thread != NULL, "just checking");
-+  assert(ret_sp != NULL, "just checking");
-+  assert(ret_fp != NULL, "just checking");
-+
-+  return os::fetch_frame_from_context(uc, ret_sp, ret_fp);
-+}
-+
-+ExtendedPC os::fetch_frame_from_context(const void* ucVoid,
-+                    intptr_t** ret_sp, intptr_t** ret_fp) {
-+
-+  ExtendedPC epc;
-   const ucontext_t* uc = (const ucontext_t*)ucVoid;
- 
-   if (uc != NULL) {
--    epc = os::Posix::ucontext_get_pc(uc);
-+    epc = ExtendedPC(os::Linux::ucontext_get_pc(uc));
-     if (ret_sp != NULL) {
-       *ret_sp = os::Linux::ucontext_get_sp(uc);
-     }
-@@ -115,7 +132,8 @@ address os::fetch_frame_from_context(const void* ucVoid,
-       *ret_fp = os::Linux::ucontext_get_fp(uc);
-     }
-   } else {
--    epc = NULL;
-+    // construct empty ExtendedPC for return value checking
-+    epc = ExtendedPC(NULL);
-     if (ret_sp != NULL) {
-       *ret_sp = (intptr_t *)NULL;
-     }
-@@ -142,8 +160,8 @@ frame os::fetch_compiled_frame_from_context(const void* ucVoid) {
- frame os::fetch_frame_from_context(const void* ucVoid) {
-   intptr_t* frame_sp = NULL;
-   intptr_t* frame_fp = NULL;
--  address epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp);
--  return frame(frame_sp, frame_fp, epc);
-+  ExtendedPC epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp);
-+  return frame(frame_sp, frame_fp, epc.pc());
- }
- 
- // By default, gcc always saves frame pointer rfp on this stack. This
-@@ -465,7 +483,7 @@ void os::print_context(outputStream *st, const void *context) {
-   // Note: it may be unsafe to inspect memory near pc. For example, pc may
-   // point to garbage if entry point in an nmethod is corrupted. Leave
-   // this at the end, and hope for the best.
--  address pc = os::Posix::ucontext_get_pc(uc);
-+  address pc = os::Linux::ucontext_get_pc(uc);
-   print_instructions(st, pc, sizeof(char));
-   st->cr();
- }
-diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
-index 3100572e9fd..e46efc420b0 100644
---- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
-+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
-@@ -61,16 +61,17 @@ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava)
- 
-     intptr_t* ret_fp = NULL;
-     intptr_t* ret_sp = NULL;
--    address addr = os::fetch_frame_from_context(uc, &ret_sp, &ret_fp);
--    if (addr == NULL || ret_sp == NULL ) {
-+    ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc,
-+      &ret_sp, &ret_fp);
-+    if (addr.pc() == NULL || ret_sp == NULL ) {
-       // ucontext wasn't useful
-       return false;
-     }
- 
--    frame ret_frame(ret_sp, ret_fp, addr);
-+    frame ret_frame(ret_sp, ret_fp, addr.pc());
-     if (!ret_frame.safe_for_sender(this)) {
- #ifdef COMPILER2
--      frame ret_frame2(ret_sp, NULL, addr);
-+      frame ret_frame2(ret_sp, NULL, addr.pc());
-       if (!ret_frame2.safe_for_sender(this)) {
-         // nothing else to try if the frame isn't good
-         return false;
-
-From 892b40a435ae3f7e85659100ef68db1aeda7ef23 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 4 Apr 2023 15:33:50 +0800
-Subject: [PATCH 015/140] Revert JDK-8263002: Remove CDS MiscCode region
-
----
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp         | 10 ++++++++++
- src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp |  6 ++++++
- 2 files changed, 16 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-index 4daed17df10..21aa3b58c09 100644
---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -187,6 +187,16 @@ bool SharedRuntime::is_wide_vector(int size) {
-   return false;
- }
- 
-+size_t SharedRuntime::trampoline_size() {
-+  return 6 * NativeInstruction::instruction_size;
-+}
-+
-+void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
-+  int32_t offset = 0;
-+  __ movptr_with_offset(t0, destination, offset);
-+  __ jalr(x0, t0, offset);
-+}
-+
- // The java_calling_convention describes stack locations as ideal slots on
- // a frame with no abi restrictions. Since we must observe abi restrictions
- // (like the placement of the register window) the slots must be biased by
-diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
-index e46efc420b0..31d9254d8ad 100644
---- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
-+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
-@@ -68,6 +68,12 @@ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava)
-       return false;
-     }
- 
-+    if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) {
-+      // In the middle of a trampoline call. Bail out for safety.
-+      // This happens rarely so shouldn't affect profiling.
-+      return false;
-+    }
-+
-     frame ret_frame(ret_sp, ret_fp, addr.pc());
-     if (!ret_frame.safe_for_sender(this)) {
- #ifdef COMPILER2
-
-From 945a317797bc96efe3f0717ca7258f081b96b14d Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 4 Apr 2023 15:52:43 +0800
-Subject: [PATCH 016/140] Revert JDK-8254158: Consolidate per-platform stack
- overflow handling code
-
----
- .../os_cpu/linux_riscv/os_linux_riscv.cpp     | 52 ++++++++++++++-----
- 1 file changed, 40 insertions(+), 12 deletions(-)
-
-diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
-index 4f1c84c60a0..8b772892b4b 100644
---- a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
-+++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
-@@ -145,18 +145,6 @@ ExtendedPC os::fetch_frame_from_context(const void* ucVoid,
-   return epc;
- }
- 
--frame os::fetch_compiled_frame_from_context(const void* ucVoid) {
--  const ucontext_t* uc = (const ucontext_t*)ucVoid;
--  // In compiled code, the stack banging is performed before RA
--  // has been saved in the frame. RA is live, and SP and FP
--  // belong to the caller.
--  intptr_t* frame_fp = os::Linux::ucontext_get_fp(uc);
--  intptr_t* frame_sp = os::Linux::ucontext_get_sp(uc);
--  address frame_pc = (address)(uc->uc_mcontext.__gregs[REG_LR]
--                         - NativeInstruction::instruction_size);
--  return frame(frame_sp, frame_fp, frame_pc);
--}
--
- frame os::fetch_frame_from_context(const void* ucVoid) {
-   intptr_t* frame_sp = NULL;
-   intptr_t* frame_fp = NULL;
-@@ -164,6 +152,46 @@ frame os::fetch_frame_from_context(const void* ucVoid) {
-   return frame(frame_sp, frame_fp, epc.pc());
- }
- 
-+bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) {
-+  address pc = (address) os::Linux::ucontext_get_pc(uc);
-+  if (Interpreter::contains(pc)) {
-+    // interpreter performs stack banging after the fixed frame header has
-+    // been generated while the compilers perform it before. To maintain
-+    // semantic consistency between interpreted and compiled frames, the
-+    // method returns the Java sender of the current frame.
-+    *fr = os::fetch_frame_from_context(uc);
-+    if (!fr->is_first_java_frame()) {
-+      assert(fr->safe_for_sender(thread), "Safety check");
-+      *fr = fr->java_sender();
-+    }
-+  } else {
-+    // more complex code with compiled code
-+    assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above");
-+    CodeBlob* cb = CodeCache::find_blob(pc);
-+    if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) {
-+      // Not sure where the pc points to, fallback to default
-+      // stack overflow handling
-+      return false;
-+    } else {
-+      // In compiled code, the stack banging is performed before RA
-+      // has been saved in the frame.  RA is live, and SP and FP
-+      // belong to the caller.
-+      intptr_t* fp = os::Linux::ucontext_get_fp(uc);
-+      intptr_t* sp = os::Linux::ucontext_get_sp(uc);
-+      address pc = (address)(uc->uc_mcontext.__gregs[REG_LR]
-+                         - NativeInstruction::instruction_size);
-+      *fr = frame(sp, fp, pc);
-+      if (!fr->is_java_frame()) {
-+        assert(fr->safe_for_sender(thread), "Safety check");
-+        assert(!fr->is_first_frame(), "Safety check");
-+        *fr = fr->java_sender();
-+      }
-+    }
-+  }
-+  assert(fr->is_java_frame(), "Safety check");
-+  return true;
-+}
-+
- // By default, gcc always saves frame pointer rfp on this stack. This
- // may get turned off by -fomit-frame-pointer.
- frame os::get_sender_for_C_frame(frame* fr) {
-
-From c1a03e0a376cc2c8748d83d66b576b66ee2e6962 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 4 Apr 2023 16:14:19 +0800
-Subject: [PATCH 017/140] Revert JDK-8202579: Revisit VM_Version and
- VM_Version_ext for overlap and consolidation
-
----
- .../cpu/riscv/vm_version_ext_riscv.cpp        | 87 +++++++++++++++++++
- .../cpu/riscv/vm_version_ext_riscv.hpp        | 55 ++++++++++++
- src/hotspot/cpu/riscv/vm_version_riscv.cpp    | 14 ---
- 3 files changed, 142 insertions(+), 14 deletions(-)
- create mode 100644 src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp
-
-diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp
-new file mode 100644
-index 00000000000..6bdce51506e
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp
-@@ -0,0 +1,87 @@
-+/*
-+ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#include "precompiled.hpp"
-+#include "memory/allocation.hpp"
-+#include "memory/allocation.inline.hpp"
-+#include "runtime/os.inline.hpp"
-+#include "vm_version_ext_riscv.hpp"
-+
-+// VM_Version_Ext statics
-+int VM_Version_Ext::_no_of_threads = 0;
-+int VM_Version_Ext::_no_of_cores = 0;
-+int VM_Version_Ext::_no_of_sockets = 0;
-+bool VM_Version_Ext::_initialized = false;
-+char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0};
-+char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0};
-+
-+void VM_Version_Ext::initialize_cpu_information(void) {
-+  // do nothing if cpu info has been initialized
-+  if (_initialized) {
-+    return;
-+  }
-+
-+  _no_of_cores  = os::processor_count();
-+  _no_of_threads = _no_of_cores;
-+  _no_of_sockets = _no_of_cores;
-+  snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64");
-+  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string);
-+  _initialized = true;
-+}
-+
-+int VM_Version_Ext::number_of_threads(void) {
-+  initialize_cpu_information();
-+  return _no_of_threads;
-+}
-+
-+int VM_Version_Ext::number_of_cores(void) {
-+  initialize_cpu_information();
-+  return _no_of_cores;
-+}
-+
-+int VM_Version_Ext::number_of_sockets(void) {
-+  initialize_cpu_information();
-+  return _no_of_sockets;
-+}
-+
-+const char* VM_Version_Ext::cpu_name(void) {
-+  initialize_cpu_information();
-+  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing);
-+  if (NULL == tmp) {
-+    return NULL;
-+  }
-+  strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE);
-+  return tmp;
-+}
-+
-+const char* VM_Version_Ext::cpu_description(void) {
-+  initialize_cpu_information();
-+  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing);
-+  if (NULL == tmp) {
-+    return NULL;
-+  }
-+  strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
-+  return tmp;
-+}
-diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp
-new file mode 100644
-index 00000000000..711e4aeaf68
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp
-@@ -0,0 +1,55 @@
-+/*
-+ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#ifndef CPU_RISCV_VM_VERSION_EXT_RISCV_HPP
-+#define CPU_RISCV_VM_VERSION_EXT_RISCV_HPP
-+
-+#include "runtime/vm_version.hpp"
-+#include "utilities/macros.hpp"
-+
-+class VM_Version_Ext : public VM_Version {
-+ private:
-+  static const size_t      CPU_TYPE_DESC_BUF_SIZE = 256;
-+  static const size_t      CPU_DETAILED_DESC_BUF_SIZE = 4096;
-+
-+  static int               _no_of_threads;
-+  static int               _no_of_cores;
-+  static int               _no_of_sockets;
-+  static bool              _initialized;
-+  static char              _cpu_name[CPU_TYPE_DESC_BUF_SIZE];
-+  static char              _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE];
-+
-+ public:
-+  static int number_of_threads(void);
-+  static int number_of_cores(void);
-+  static int number_of_sockets(void);
-+
-+  static const char* cpu_name(void);
-+  static const char* cpu_description(void);
-+  static void initialize_cpu_information(void);
-+
-+};
-+
-+#endif // CPU_RISCV_VM_VERSION_EXT_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-index 2c15a834542..dd65f32277f 100644
---- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-@@ -210,17 +210,3 @@ void VM_Version::c2_initialize() {
-   }
- }
- #endif // COMPILER2
--
--void VM_Version::initialize_cpu_information(void) {
--  // do nothing if cpu info has been initialized
--  if (_initialized) {
--    return;
--  }
--
--  _no_of_cores  = os::processor_count();
--  _no_of_threads = _no_of_cores;
--  _no_of_sockets = _no_of_cores;
--  snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64");
--  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string);
--  _initialized = true;
--}
-
-From 0cfdbd8595c710b71be008bb531b59acf9c4b016 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 4 Apr 2023 17:16:05 +0800
-Subject: [PATCH 018/140] Revert JDK-8191278: MappedByteBuffer bulk access
- memory failures are not handled gracefully
-
----
- src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 19 ++-----------------
- 1 file changed, 2 insertions(+), 17 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-index 39416441bdf..8392b768847 100644
---- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-@@ -1049,12 +1049,7 @@ class StubGenerator: public StubCodeGenerator {
-       __ push_reg(RegSet::of(d, count), sp);
-     }
- 
--    {
--      // UnsafeCopyMemory page error: continue after ucm
--      bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size);
--      UnsafeCopyMemoryMark ucmm(this, add_entry, true);
--      copy_memory(aligned, s, d, count, t0, size);
--    }
-+    copy_memory(aligned, s, d, count, t0, size);
- 
-     if (is_oop) {
-       __ pop_reg(RegSet::of(d, count), sp);
-@@ -1122,12 +1117,7 @@ class StubGenerator: public StubCodeGenerator {
-       __ push_reg(RegSet::of(d, count), sp);
-     }
- 
--    {
--      // UnsafeCopyMemory page error: continue after ucm
--      bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size);
--      UnsafeCopyMemoryMark ucmm(this, add_entry, true);
--      copy_memory(aligned, s, d, count, t0, -size);
--    }
-+    copy_memory(aligned, s, d, count, t0, -size);
- 
-     if (is_oop) {
-       __ pop_reg(RegSet::of(d, count), sp);
-@@ -3734,11 +3724,6 @@ class StubGenerator: public StubCodeGenerator {
-   ~StubGenerator() {}
- }; // end class declaration
- 
--#define UCM_TABLE_MAX_ENTRIES 8
- void StubGenerator_generate(CodeBuffer* code, bool all) {
--  if (UnsafeCopyMemory::_table == NULL) {
--    UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
--  }
--
-   StubGenerator g(code, all);
- }
-
-From dd6a7c520a5adeef5b6686c161554adcba61113f Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 15:55:09 +0800
-Subject: [PATCH 019/140] Revert JDK-8282085: The REGISTER_DEFINITION macro is
- useless after JDK-8269122
-
----
- .../cpu/riscv/register_definitions_riscv.cpp  | 192 ++++++++++++++++++
- 1 file changed, 192 insertions(+)
- create mode 100644 src/hotspot/cpu/riscv/register_definitions_riscv.cpp
-
-diff --git a/src/hotspot/cpu/riscv/register_definitions_riscv.cpp b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp
-new file mode 100644
-index 00000000000..583f67573ca
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp
-@@ -0,0 +1,192 @@
-+/*
-+ * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#include "precompiled.hpp"
-+#include "asm/assembler.hpp"
-+#include "asm/register.hpp"
-+#include "interp_masm_riscv.hpp"
-+#include "register_riscv.hpp"
-+
-+REGISTER_DEFINITION(Register, noreg);
-+
-+REGISTER_DEFINITION(Register, x0);
-+REGISTER_DEFINITION(Register, x1);
-+REGISTER_DEFINITION(Register, x2);
-+REGISTER_DEFINITION(Register, x3);
-+REGISTER_DEFINITION(Register, x4);
-+REGISTER_DEFINITION(Register, x5);
-+REGISTER_DEFINITION(Register, x6);
-+REGISTER_DEFINITION(Register, x7);
-+REGISTER_DEFINITION(Register, x8);
-+REGISTER_DEFINITION(Register, x9);
-+REGISTER_DEFINITION(Register, x10);
-+REGISTER_DEFINITION(Register, x11);
-+REGISTER_DEFINITION(Register, x12);
-+REGISTER_DEFINITION(Register, x13);
-+REGISTER_DEFINITION(Register, x14);
-+REGISTER_DEFINITION(Register, x15);
-+REGISTER_DEFINITION(Register, x16);
-+REGISTER_DEFINITION(Register, x17);
-+REGISTER_DEFINITION(Register, x18);
-+REGISTER_DEFINITION(Register, x19);
-+REGISTER_DEFINITION(Register, x20);
-+REGISTER_DEFINITION(Register, x21);
-+REGISTER_DEFINITION(Register, x22);
-+REGISTER_DEFINITION(Register, x23);
-+REGISTER_DEFINITION(Register, x24);
-+REGISTER_DEFINITION(Register, x25);
-+REGISTER_DEFINITION(Register, x26);
-+REGISTER_DEFINITION(Register, x27);
-+REGISTER_DEFINITION(Register, x28);
-+REGISTER_DEFINITION(Register, x29);
-+REGISTER_DEFINITION(Register, x30);
-+REGISTER_DEFINITION(Register, x31);
-+
-+REGISTER_DEFINITION(FloatRegister, fnoreg);
-+
-+REGISTER_DEFINITION(FloatRegister, f0);
-+REGISTER_DEFINITION(FloatRegister, f1);
-+REGISTER_DEFINITION(FloatRegister, f2);
-+REGISTER_DEFINITION(FloatRegister, f3);
-+REGISTER_DEFINITION(FloatRegister, f4);
-+REGISTER_DEFINITION(FloatRegister, f5);
-+REGISTER_DEFINITION(FloatRegister, f6);
-+REGISTER_DEFINITION(FloatRegister, f7);
-+REGISTER_DEFINITION(FloatRegister, f8);
-+REGISTER_DEFINITION(FloatRegister, f9);
-+REGISTER_DEFINITION(FloatRegister, f10);
-+REGISTER_DEFINITION(FloatRegister, f11);
-+REGISTER_DEFINITION(FloatRegister, f12);
-+REGISTER_DEFINITION(FloatRegister, f13);
-+REGISTER_DEFINITION(FloatRegister, f14);
-+REGISTER_DEFINITION(FloatRegister, f15);
-+REGISTER_DEFINITION(FloatRegister, f16);
-+REGISTER_DEFINITION(FloatRegister, f17);
-+REGISTER_DEFINITION(FloatRegister, f18);
-+REGISTER_DEFINITION(FloatRegister, f19);
-+REGISTER_DEFINITION(FloatRegister, f20);
-+REGISTER_DEFINITION(FloatRegister, f21);
-+REGISTER_DEFINITION(FloatRegister, f22);
-+REGISTER_DEFINITION(FloatRegister, f23);
-+REGISTER_DEFINITION(FloatRegister, f24);
-+REGISTER_DEFINITION(FloatRegister, f25);
-+REGISTER_DEFINITION(FloatRegister, f26);
-+REGISTER_DEFINITION(FloatRegister, f27);
-+REGISTER_DEFINITION(FloatRegister, f28);
-+REGISTER_DEFINITION(FloatRegister, f29);
-+REGISTER_DEFINITION(FloatRegister, f30);
-+REGISTER_DEFINITION(FloatRegister, f31);
-+
-+REGISTER_DEFINITION(VectorRegister, vnoreg);
-+
-+REGISTER_DEFINITION(VectorRegister, v0);
-+REGISTER_DEFINITION(VectorRegister, v1);
-+REGISTER_DEFINITION(VectorRegister, v2);
-+REGISTER_DEFINITION(VectorRegister, v3);
-+REGISTER_DEFINITION(VectorRegister, v4);
-+REGISTER_DEFINITION(VectorRegister, v5);
-+REGISTER_DEFINITION(VectorRegister, v6);
-+REGISTER_DEFINITION(VectorRegister, v7);
-+REGISTER_DEFINITION(VectorRegister, v8);
-+REGISTER_DEFINITION(VectorRegister, v9);
-+REGISTER_DEFINITION(VectorRegister, v10);
-+REGISTER_DEFINITION(VectorRegister, v11);
-+REGISTER_DEFINITION(VectorRegister, v12);
-+REGISTER_DEFINITION(VectorRegister, v13);
-+REGISTER_DEFINITION(VectorRegister, v14);
-+REGISTER_DEFINITION(VectorRegister, v15);
-+REGISTER_DEFINITION(VectorRegister, v16);
-+REGISTER_DEFINITION(VectorRegister, v17);
-+REGISTER_DEFINITION(VectorRegister, v18);
-+REGISTER_DEFINITION(VectorRegister, v19);
-+REGISTER_DEFINITION(VectorRegister, v20);
-+REGISTER_DEFINITION(VectorRegister, v21);
-+REGISTER_DEFINITION(VectorRegister, v22);
-+REGISTER_DEFINITION(VectorRegister, v23);
-+REGISTER_DEFINITION(VectorRegister, v24);
-+REGISTER_DEFINITION(VectorRegister, v25);
-+REGISTER_DEFINITION(VectorRegister, v26);
-+REGISTER_DEFINITION(VectorRegister, v27);
-+REGISTER_DEFINITION(VectorRegister, v28);
-+REGISTER_DEFINITION(VectorRegister, v29);
-+REGISTER_DEFINITION(VectorRegister, v30);
-+REGISTER_DEFINITION(VectorRegister, v31);
-+
-+REGISTER_DEFINITION(Register, c_rarg0);
-+REGISTER_DEFINITION(Register, c_rarg1);
-+REGISTER_DEFINITION(Register, c_rarg2);
-+REGISTER_DEFINITION(Register, c_rarg3);
-+REGISTER_DEFINITION(Register, c_rarg4);
-+REGISTER_DEFINITION(Register, c_rarg5);
-+REGISTER_DEFINITION(Register, c_rarg6);
-+REGISTER_DEFINITION(Register, c_rarg7);
-+
-+REGISTER_DEFINITION(FloatRegister, c_farg0);
-+REGISTER_DEFINITION(FloatRegister, c_farg1);
-+REGISTER_DEFINITION(FloatRegister, c_farg2);
-+REGISTER_DEFINITION(FloatRegister, c_farg3);
-+REGISTER_DEFINITION(FloatRegister, c_farg4);
-+REGISTER_DEFINITION(FloatRegister, c_farg5);
-+REGISTER_DEFINITION(FloatRegister, c_farg6);
-+REGISTER_DEFINITION(FloatRegister, c_farg7);
-+
-+REGISTER_DEFINITION(Register, j_rarg0);
-+REGISTER_DEFINITION(Register, j_rarg1);
-+REGISTER_DEFINITION(Register, j_rarg2);
-+REGISTER_DEFINITION(Register, j_rarg3);
-+REGISTER_DEFINITION(Register, j_rarg4);
-+REGISTER_DEFINITION(Register, j_rarg5);
-+REGISTER_DEFINITION(Register, j_rarg6);
-+REGISTER_DEFINITION(Register, j_rarg7);
-+
-+REGISTER_DEFINITION(FloatRegister, j_farg0);
-+REGISTER_DEFINITION(FloatRegister, j_farg1);
-+REGISTER_DEFINITION(FloatRegister, j_farg2);
-+REGISTER_DEFINITION(FloatRegister, j_farg3);
-+REGISTER_DEFINITION(FloatRegister, j_farg4);
-+REGISTER_DEFINITION(FloatRegister, j_farg5);
-+REGISTER_DEFINITION(FloatRegister, j_farg6);
-+REGISTER_DEFINITION(FloatRegister, j_farg7);
-+
-+REGISTER_DEFINITION(Register, zr);
-+REGISTER_DEFINITION(Register, gp);
-+REGISTER_DEFINITION(Register, tp);
-+REGISTER_DEFINITION(Register, xmethod);
-+REGISTER_DEFINITION(Register, ra);
-+REGISTER_DEFINITION(Register, sp);
-+REGISTER_DEFINITION(Register, fp);
-+REGISTER_DEFINITION(Register, xheapbase);
-+REGISTER_DEFINITION(Register, xcpool);
-+REGISTER_DEFINITION(Register, xmonitors);
-+REGISTER_DEFINITION(Register, xlocals);
-+REGISTER_DEFINITION(Register, xthread);
-+REGISTER_DEFINITION(Register, xbcp);
-+REGISTER_DEFINITION(Register, xdispatch);
-+REGISTER_DEFINITION(Register, esp);
-+
-+REGISTER_DEFINITION(Register, t0);
-+REGISTER_DEFINITION(Register, t1);
-+REGISTER_DEFINITION(Register, t2);
-
-From 561261b051d88ddb0053733f03cbefc75dedcea8 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 16:41:03 +0800
-Subject: [PATCH 020/140] Revert JDK-7175279: Don't use x87 FPU on x86-64
-
----
- src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp | 12 ++++++++++++
- 1 file changed, 12 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-index 0e383a3c139..977563fe5f4 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -2019,6 +2019,18 @@ address LIR_Assembler::int_constant(jlong n) {
-   }
- }
- 
-+void LIR_Assembler::set_24bit_FPU() { Unimplemented(); }
-+
-+void LIR_Assembler::reset_FPU() { Unimplemented(); }
-+
-+void LIR_Assembler::fpop() { Unimplemented(); }
-+
-+void LIR_Assembler::fxch(int i) { Unimplemented(); }
-+
-+void LIR_Assembler::fld(int i) { Unimplemented(); }
-+
-+void LIR_Assembler::ffree(int i) { Unimplemented(); }
-+
- void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) {
-   __ cmpxchg(addr, cmpval, newval, Assembler::int32, Assembler::aq /* acquire */,
-              Assembler::rl /* release */, t0, true /* result as bool */);
-
-From ff4e1443fd000208714b506d52c0fab1c91e4ac8 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 16:41:15 +0800
-Subject: [PATCH 021/140] Revert JDK-8255909: Remove unused delayed_value
- methods
-
----
- src/hotspot/cpu/riscv/assembler_riscv.hpp      |  7 +++++++
- src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 16 ++++++++++++++++
- src/hotspot/cpu/riscv/macroAssembler_riscv.hpp |  4 ++++
- 3 files changed, 27 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
-index 4923962a496..44e8d4b4ff1 100644
---- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
-@@ -3027,6 +3027,13 @@ enum Nf {
-   Assembler(CodeBuffer* code) : AbstractAssembler(code), _in_compressible_region(false) {
-   }
- 
-+  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
-+                                                Register tmp,
-+                                                int offset) {
-+    ShouldNotCallThis();
-+    return RegisterOrConstant();
-+  }
-+
-   // Stack overflow checking
-   virtual void bang_stack_with_offset(int offset) { Unimplemented(); }
- 
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index 269d76ba69e..878957cbede 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -191,6 +191,22 @@ void MacroAssembler::call_VM(Register oop_result,
- void MacroAssembler::check_and_handle_earlyret(Register java_thread) {}
- void MacroAssembler::check_and_handle_popframe(Register java_thread) {}
- 
-+RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
-+                                                      Register tmp,
-+                                                      int offset) {
-+  intptr_t value = *delayed_value_addr;
-+  if (value != 0)
-+    return RegisterOrConstant(value + offset);
-+
-+  // load indirectly to solve generation ordering problem
-+  ld(tmp, ExternalAddress((address) delayed_value_addr));
-+
-+  if (offset != 0)
-+    add(tmp, tmp, offset);
-+
-+  return RegisterOrConstant(tmp);
-+}
-+
- // Calls to C land
- //
- // When entering C land, the fp, & esp of the last Java frame have to be recorded
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index b59bdadb8bf..f23f7e7d1e6 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -625,6 +625,10 @@ class MacroAssembler: public Assembler {
- 
-   void reserved_stack_check();
- 
-+  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
-+                                                Register tmp,
-+                                                int offset);
-+
-   void get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype);
-   void read_polling_page(Register r, address page, relocInfo::relocType rtype);
-   void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
-
-From afe35a3fdc705645bfe2a2e797a95ce1d5203872 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 16:51:39 +0800
-Subject: [PATCH 022/140] Revert JDK-8263679: C1: Remove vtable call
-
----
- src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp | 5 +++++
- 1 file changed, 5 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-index 977563fe5f4..a0ecc63d851 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -1382,6 +1382,11 @@ void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
-   add_call_info(code_offset(), op->info());
- }
- 
-+/* Currently, vtable-dispatch is only enabled for sparc platforms */
-+void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) {
-+  ShouldNotReachHere();
-+}
-+
- void LIR_Assembler::emit_static_call_stub() {
-   address call_pc = __ pc();
-   assert((__ offset() % 4) == 0, "bad alignment");
-
-From 655b34c00ec5ff6fa7e82de96a78a0c58ba91985 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 16:55:57 +0800
-Subject: [PATCH 023/140] Revert JDK-8264063: Outer Safepoint poll load should
- not reference the head of inner strip mined loop.
-
----
- src/hotspot/cpu/riscv/riscv.ad | 14 ++++++++++++++
- 1 file changed, 14 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 8c7a8ede815..fcddf752564 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -952,6 +952,20 @@ int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
-   return align_up(current_offset, alignment_required()) - current_offset;
- }
- 
-+// Indicate if the safepoint node needs the polling page as an input
-+
-+// the shared code plants the oop data at the start of the generated
-+// code for the safepoint node and that needs ot be at the load
-+// instruction itself. so we cannot plant a mov of the safepoint poll
-+// address followed by a load. setting this to true means the mov is
-+// scheduled as a prior instruction. that's better for scheduling
-+// anyway.
-+
-+bool SafePointNode::needs_polling_address_input()
-+{
-+  return true;
-+}
-+
- //=============================================================================
- 
- #ifndef PRODUCT
-
-From 4a6f7dafdb4e0cf054b7867de60f789d4ca1d9f3 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 17:26:29 +0800
-Subject: [PATCH 024/140] Revert: JDK-8266810: Move trivial Matcher code to
- cpu-specific header files JDK-8254966: Remove unused code from Matcher
-
----
- src/hotspot/cpu/riscv/matcher_riscv.hpp | 129 ------------------------
- src/hotspot/cpu/riscv/riscv.ad          | 108 +++++++++++++++++++-
- 2 files changed, 107 insertions(+), 130 deletions(-)
- delete mode 100644 src/hotspot/cpu/riscv/matcher_riscv.hpp
-
-diff --git a/src/hotspot/cpu/riscv/matcher_riscv.hpp b/src/hotspot/cpu/riscv/matcher_riscv.hpp
-deleted file mode 100644
-index 4c7fabd7240..00000000000
---- a/src/hotspot/cpu/riscv/matcher_riscv.hpp
-+++ /dev/null
-@@ -1,129 +0,0 @@
--/*
-- * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2021, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#ifndef CPU_RISCV_MATCHER_RISCV_HPP
--#define CPU_RISCV_MATCHER_RISCV_HPP
--
--  // Defined within class Matcher
--
--  // false => size gets scaled to BytesPerLong, ok.
--  static const bool init_array_count_is_in_bytes = false;
--
--  // riscv doesn't support misaligned vectors store/load on JDK11.
--  static constexpr bool misaligned_vectors_ok() {
--    return false;
--  }
--
--  // Whether code generation need accurate ConvI2L types.
--  static const bool convi2l_type_required = false;
--
--  // Does the CPU require late expand (see block.cpp for description of late expand)?
--  static const bool require_postalloc_expand = false;
--
--  // Do we need to mask the count passed to shift instructions or does
--  // the cpu only look at the lower 5/6 bits anyway?
--  static const bool need_masked_shift_count = false;
--
--  static constexpr bool isSimpleConstant64(jlong value) {
--    // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
--    // Probably always true, even if a temp register is required.
--    return true;
--  }
--
--  // Use conditional move (CMOVL)
--  static constexpr int long_cmove_cost() {
--    // long cmoves are no more expensive than int cmoves
--    return 0;
--  }
--
--  static constexpr int float_cmove_cost() {
--    // float cmoves are no more expensive than int cmoves
--    return 0;
--  }
--
--  // This affects two different things:
--  //  - how Decode nodes are matched
--  //  - how ImplicitNullCheck opportunities are recognized
--  // If true, the matcher will try to remove all Decodes and match them
--  // (as operands) into nodes. NullChecks are not prepared to deal with
--  // Decodes by final_graph_reshaping().
--  // If false, final_graph_reshaping() forces the decode behind the Cmp
--  // for a NullCheck. The matcher matches the Decode node into a register.
--  // Implicit_null_check optimization moves the Decode along with the
--  // memory operation back up before the NullCheck.
--  static bool narrow_oop_use_complex_address() {
--    return CompressedOops::shift() == 0;
--  }
--
--  static bool narrow_klass_use_complex_address() {
--    return false;
--  }
--
--  static bool const_oop_prefer_decode() {
--    // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
--    return CompressedOops::base() == NULL;
--  }
--
--  static bool const_klass_prefer_decode() {
--    // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
--    return CompressedKlassPointers::base() == NULL;
--  }
--
--  // Is it better to copy float constants, or load them directly from
--  // memory?  Intel can load a float constant from a direct address,
--  // requiring no extra registers.  Most RISCs will have to materialize
--  // an address into a register first, so they would do better to copy
--  // the constant from stack.
--  static const bool rematerialize_float_constants = false;
--
--  // If CPU can load and store mis-aligned doubles directly then no
--  // fixup is needed.  Else we split the double into 2 integer pieces
--  // and move it piece-by-piece.  Only happens when passing doubles into
--  // C code as the Java calling convention forces doubles to be aligned.
--  static const bool misaligned_doubles_ok = true;
--
--  // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
--  static const bool strict_fp_requires_explicit_rounding = false;
--
--  // Are floats converted to double when stored to stack during
--  // deoptimization?
--  static constexpr bool float_in_double() { return false; }
--
--  // Do ints take an entire long register or just half?
--  // The relevant question is how the int is callee-saved:
--  // the whole long is written but de-opt'ing will have to extract
--  // the relevant 32 bits.
--  static const bool int_in_long = true;
--
--  // true means we have fast l2f convers
--  // false means that conversion is done by runtime call
--  static constexpr bool convL2FSupported(void) {
--      return true;
--  }
--
--  // Implements a variant of EncodeISOArrayNode that encode ASCII only
--  static const bool supports_encode_ascii_array = false;
--
--#endif // CPU_RISCV_MATCHER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index fcddf752564..a9e5f2e6841 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -330,7 +330,9 @@ alloc_class chunk2(RFLAGS);
- // Several register classes are automatically defined based upon information in
- // this architecture description.
- // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
--// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
-+// 2) reg_class compiler_method_reg        ( /* as def'd in frame section */ )
-+// 2) reg_class interpreter_method_reg     ( /* as def'd in frame section */ )
-+// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
- //
- 
- // Class for all 32 bit general purpose registers
-@@ -1548,6 +1550,17 @@ bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
-   return (-4096 <= offs && offs < 4096);
- }
- 
-+const bool Matcher::isSimpleConstant64(jlong value) {
-+  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
-+  // Probably always true, even if a temp register is required.
-+  return true;
-+}
-+
-+// true just means we have fast l2f conversion
-+const bool Matcher::convL2FSupported(void) {
-+  return true;
-+}
-+
- // Vector width in bytes.
- const int Matcher::vector_width_in_bytes(BasicType bt) {
-   return 0;
-@@ -1567,6 +1580,94 @@ const uint Matcher::vector_ideal_reg(int len) {
-   return 0;
- }
- 
-+// RISC-V supports misaligned vectors store/load.
-+const bool Matcher::misaligned_vectors_ok() {
-+  return true;
-+}
-+
-+// false => size gets scaled to BytesPerLong, ok.
-+const bool Matcher::init_array_count_is_in_bytes = false;
-+
-+// Use conditional move (CMOVL)
-+const int Matcher::long_cmove_cost() {
-+  // long cmoves are no more expensive than int cmoves
-+  return 0;
-+}
-+
-+const int Matcher::float_cmove_cost() {
-+  // float cmoves are no more expensive than int cmoves
-+  return 0;
-+}
-+
-+// Does the CPU require late expand (see block.cpp for description of late expand)?
-+const bool Matcher::require_postalloc_expand = false;
-+
-+// Do we need to mask the count passed to shift instructions or does
-+// the cpu only look at the lower 5/6 bits anyway?
-+const bool Matcher::need_masked_shift_count = false;
-+
-+// This affects two different things:
-+//  - how Decode nodes are matched
-+//  - how ImplicitNullCheck opportunities are recognized
-+// If true, the matcher will try to remove all Decodes and match them
-+// (as operands) into nodes. NullChecks are not prepared to deal with
-+// Decodes by final_graph_reshaping().
-+// If false, final_graph_reshaping() forces the decode behind the Cmp
-+// for a NullCheck. The matcher matches the Decode node into a register.
-+// Implicit_null_check optimization moves the Decode along with the
-+// memory operation back up before the NullCheck.
-+bool Matcher::narrow_oop_use_complex_address() {
-+  return Universe::narrow_oop_shift() == 0;
-+}
-+
-+bool Matcher::narrow_klass_use_complex_address() {
-+// TODO
-+// decide whether we need to set this to true
-+  return false;
-+}
-+
-+bool Matcher::const_oop_prefer_decode() {
-+  // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
-+  return Universe::narrow_oop_base() == NULL;
-+}
-+
-+bool Matcher::const_klass_prefer_decode() {
-+  // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
-+  return Universe::narrow_klass_base() == NULL;
-+}
-+
-+// Is it better to copy float constants, or load them directly from
-+// memory?  Intel can load a float constant from a direct address,
-+// requiring no extra registers.  Most RISCs will have to materialize
-+// an address into a register first, so they would do better to copy
-+// the constant from stack.
-+const bool Matcher::rematerialize_float_constants = false;
-+
-+// If CPU can load and store mis-aligned doubles directly then no
-+// fixup is needed.  Else we split the double into 2 integer pieces
-+// and move it piece-by-piece.  Only happens when passing doubles into
-+// C code as the Java calling convention forces doubles to be aligned.
-+const bool Matcher::misaligned_doubles_ok = true;
-+
-+// No-op on amd64
-+void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
-+  Unimplemented();
-+}
-+
-+// Advertise here if the CPU requires explicit rounding operations to
-+// implement the UseStrictFP mode.
-+const bool Matcher::strict_fp_requires_explicit_rounding = false;
-+
-+// Are floats converted to double when stored to stack during
-+// deoptimization?
-+bool Matcher::float_in_double() { return false; }
-+
-+// Do ints take an entire long register or just half?
-+// The relevant question is how the int is callee-saved:
-+// the whole long is written but de-opt'ing will have to extract
-+// the relevant 32 bits.
-+const bool Matcher::int_in_long = true;
-+
- // Return whether or not this register is ever used as an argument.
- // This function is used on startup to build the trampoline stubs in
- // generateOptoStub.  Registers not mentioned will be killed by the VM
-@@ -1671,6 +1772,8 @@ bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
-   return true;
- }
- 
-+const bool Matcher::convi2l_type_required = false;
-+
- // Should the Matcher clone input 'm' of node 'n'?
- bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
-   assert_cond(m != NULL);
-@@ -2250,6 +2353,9 @@ frame %{
-   // Inline Cache Register or methodOop for I2C.
-   inline_cache_reg(R31);
- 
-+  // Method Oop Register when calling interpreter.
-+  interpreter_method_oop_reg(R31);
-+
-   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
-   cisc_spilling_operand_name(indOffset);
- 
-
-From 4b0f20882cd9b5e5da92d61c2fa02e0cbea0ef0c Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 17:30:42 +0800
-Subject: [PATCH 025/140] Revert JDK-8256238: Remove
- Matcher::pass_original_key_for_aes
-
----
- src/hotspot/cpu/riscv/riscv.ad | 5 +++++
- 1 file changed, 5 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index a9e5f2e6841..0d1afd5584a 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1580,6 +1580,11 @@ const uint Matcher::vector_ideal_reg(int len) {
-   return 0;
- }
- 
-+// AES support not yet implemented
-+const bool Matcher::pass_original_key_for_aes() {
-+  return false;
-+}
-+
- // RISC-V supports misaligned vectors store/load.
- const bool Matcher::misaligned_vectors_ok() {
-   return true;
-
-From 36d7ecedbcd95911d1b355bbab3e8fdf81b36e7d Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 17:42:37 +0800
-Subject: [PATCH 026/140] Revert JDK-8242492: C2: Remove
- Matcher::vector_shift_count_ideal_reg()
-
----
- src/hotspot/cpu/riscv/riscv.ad | 5 +++++
- 1 file changed, 5 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 0d1afd5584a..c10e91633a5 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1580,6 +1580,11 @@ const uint Matcher::vector_ideal_reg(int len) {
-   return 0;
- }
- 
-+const uint Matcher::vector_shift_count_ideal_reg(int size) {
-+  fatal("vector shift is not supported");
-+  return Node::NotAMachineReg;
-+}
-+
- // AES support not yet implemented
- const bool Matcher::pass_original_key_for_aes() {
-   return false;
-
-From b78e448a460fcdc66553e66342e93e5ac87c0c61 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 17:47:13 +0800
-Subject: [PATCH 027/140] Revert JDK-8266937: Remove Compile::reshape_address
-
----
- src/hotspot/cpu/riscv/riscv.ad | 3 +++
- 1 file changed, 3 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index c10e91633a5..2c5ec0451b8 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1801,6 +1801,9 @@ bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack,
-   return clone_base_plus_offset_address(m, mstack, address_visited);
- }
- 
-+void Compile::reshape_address(AddPNode* addp) {
-+}
-+
- %}
- 
- 
-
-From cd34a5ce5d120cdac939217976d1e7b7e98bf654 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 17:49:09 +0800
-Subject: [PATCH 028/140] Revert JDK-8272771: frame::pd_ps() is not implemented
- on any platform
-
----
- src/hotspot/cpu/riscv/frame_riscv.cpp | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
-index 8e7babe2c61..8e4f20fe561 100644
---- a/src/hotspot/cpu/riscv/frame_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
-@@ -683,6 +683,7 @@ frame::frame(void* ptr_sp, void* ptr_fp, void* pc) {
-   init((intptr_t*)ptr_sp, (intptr_t*)ptr_fp, (address)pc);
- }
- 
-+void frame::pd_ps() {}
- #endif
- 
- void JavaFrameAnchor::make_walkable(JavaThread* thread) {
-
-From bdb16daf6d809d0c38256be99ecbe922d24b889b Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 17:56:27 +0800
-Subject: [PATCH 029/140] Revert JDK-8268858: Determine register pressure
- automatically by the number of available registers for allocation
-
----
- src/hotspot/cpu/riscv/riscv.ad | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 2c5ec0451b8..a6aa52de29e 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1527,6 +1527,10 @@ const bool Matcher::has_predicated_vectors(void) {
-   return false;
- }
- 
-+const int Matcher::float_pressure(int default_pressure_threshold) {
-+  return default_pressure_threshold;
-+}
-+
- // Is this branch offset short enough that a short branch can be used?
- //
- // NOTE: If the platform does not provide any short branch variants, then
-
-From bbaa7a97b5d8110ead9dc44f31e2c5fe3bcd83d5 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 17:58:16 +0800
-Subject: [PATCH 030/140] Revert JDK-8253040: Remove unused
- Matcher::regnum_to_fpu_offset()
-
----
- src/hotspot/cpu/riscv/riscv.ad | 6 ++++++
- 1 file changed, 6 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index a6aa52de29e..2d847cb6454 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1531,6 +1531,12 @@ const int Matcher::float_pressure(int default_pressure_threshold) {
-   return default_pressure_threshold;
- }
- 
-+int Matcher::regnum_to_fpu_offset(int regnum)
-+{
-+  Unimplemented();
-+  return 0;
-+}
-+
- // Is this branch offset short enough that a short branch can be used?
- //
- // NOTE: If the platform does not provide any short branch variants, then
-
-From ce9ad0af72e405153534369bff1b1725697f3e40 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 18:03:23 +0800
-Subject: [PATCH 031/140] Revert JDK-8254084: Remove
- TemplateTable::pd_initialize
-
----
- src/hotspot/cpu/riscv/templateTable_riscv.cpp | 6 ++++++
- 1 file changed, 6 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-index 4e388ac4eaa..c9d399ccdaf 100644
---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-@@ -48,6 +48,12 @@
- 
- #define __ _masm->
- 
-+// Platform-dependent initialization
-+
-+void TemplateTable::pd_initialize() {
-+  // No RISC-V specific initialization
-+}
-+
- // Address computation: local variables
- 
- static inline Address iaddress(int n) {
-
-From 49429187846e6f2b00ab2853e27097eae274a947 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 20:17:07 +0800
-Subject: [PATCH 032/140] Revert JDK-8224815: 8224815: Remove non-GC uses of
- CollectedHeap::is_in_reserved()
-
----
- src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index 878957cbede..cf01d7d74bb 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -1632,7 +1632,7 @@ void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
- #ifdef ASSERT
-     {
-       ThreadInVMfromUnknown tiv;
--      assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
-+      assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop");
-     }
- #endif
-     oop_index = oop_recorder()->find_index(obj);
-@@ -2800,7 +2800,7 @@ void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
-     assert (UseCompressedOops, "should only be used for compressed oops");
-     assert (Universe::heap() != NULL, "java heap should be initialized");
-     assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
--    assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
-+    assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop");
-   }
- #endif
-   int oop_index = oop_recorder()->find_index(obj);
-@@ -2815,7 +2815,7 @@ void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
-   assert (UseCompressedClassPointers, "should only be used for compressed headers");
-   assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
-   int index = oop_recorder()->find_index(k);
--  assert(!Universe::heap()->is_in(k), "should not be an oop");
-+  assert(!Universe::heap()->is_in_reserved(k), "should not be an oop");
- 
-   InstructionMark im(this);
-   RelocationHolder rspec = metadata_Relocation::spec(index);
-
-From a71fabb1ff05db9955557a888be6cd1b5f87deea Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 21:14:30 +0800
-Subject: [PATCH 033/140] Revert JDK-8253540: InterpreterRuntime::monitorexit
- should be a JRT_LEAF function
-
----
- src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 8 ++++++--
- 1 file changed, 6 insertions(+), 2 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-index 8adc7b1320d..48957803fdc 100644
---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-@@ -839,7 +839,9 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg)
-   assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1");
- 
-   if (UseHeavyMonitors) {
--    call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
-+    call_VM(noreg,
-+            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
-+            lock_reg);
-   } else {
-     Label done;
- 
-@@ -871,7 +873,9 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg)
- 
-     // Call the runtime routine for slow case.
-     sd(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj
--    call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
-+    call_VM(noreg,
-+            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
-+            lock_reg);
- 
-     bind(done);
- 
-
-From a0b18eea3c83ef8f1de2c1b3cd55452f0f6b9af2 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Fri, 7 Apr 2023 12:51:33 +0800
-Subject: [PATCH 034/140] Revert JDK-8278387: riscv: Implement UseHeavyMonitors
- consistently && JDK-8279826: riscv: Preserve result in native wrapper with
- +UseHeavyMonitors
-
----
- .../cpu/riscv/c1_LIRAssembler_riscv.cpp       |  8 +-
- src/hotspot/cpu/riscv/riscv.ad                | 92 +++++++++----------
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 85 ++++++++---------
- 3 files changed, 80 insertions(+), 105 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-index a0ecc63d851..dd657963438 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -367,11 +367,7 @@ int LIR_Assembler::emit_unwind_handler() {
-   if (method()->is_synchronized()) {
-     monitor_address(0, FrameMap::r10_opr);
-     stub = new MonitorExitStub(FrameMap::r10_opr, true, 0);
--    if (UseHeavyMonitors) {
--      __ j(*stub->entry());
--    } else {
--      __ unlock_object(x15, x14, x10, *stub->entry());
--    }
-+    __ unlock_object(x15, x14, x10, *stub->entry());
-     __ bind(*stub->continuation());
-   }
- 
-@@ -1512,7 +1508,7 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) {
-   Register obj = op->obj_opr()->as_register();  // may not be an oop
-   Register hdr = op->hdr_opr()->as_register();
-   Register lock = op->lock_opr()->as_register();
--  if (UseHeavyMonitors) {
-+  if (!UseFastLocking) {
-     __ j(*op->stub()->entry());
-   } else if (op->code() == lir_lock) {
-     assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 2d847cb6454..29027d594a0 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -2109,40 +2109,36 @@ encode %{
-     __ andi(t0, disp_hdr, markOopDesc::monitor_value);
-     __ bnez(t0, object_has_monitor);
- 
--    if (!UseHeavyMonitors) {
--      // Set tmp to be (markWord of object | UNLOCK_VALUE).
--      __ ori(tmp, disp_hdr, markOopDesc::unlocked_value);
--
--      // Initialize the box. (Must happen before we update the object mark!)
--      __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
--
--      // Compare object markWord with an unlocked value (tmp) and if
--      // equal exchange the stack address of our box with object markWord.
--      // On failure disp_hdr contains the possibly locked markWord.
--      __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq,
--                 Assembler::rl, /*result*/disp_hdr);
--      __ mv(flag, zr);
--      __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas
--
--      assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
--
--      // If the compare-and-exchange succeeded, then we found an unlocked
--      // object, will have now locked it will continue at label cont
--      // We did not see an unlocked object so try the fast recursive case.
--
--      // Check if the owner is self by comparing the value in the
--      // markWord of object (disp_hdr) with the stack pointer.
--      __ sub(disp_hdr, disp_hdr, sp);
--      __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place));
--      // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont,
--      // hence we can store 0 as the displaced header in the box, which indicates that it is a
--      // recursive lock.
--      __ andr(tmp/*==0?*/, disp_hdr, tmp);
--      __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
--      __ mv(flag, tmp); // we can use the value of tmp as the result here
--    } else {
--      __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow-path
--    }
-+    // Set tmp to be (markWord of object | UNLOCK_VALUE).
-+    __ ori(tmp, disp_hdr, markOopDesc::unlocked_value);
-+
-+    // Initialize the box. (Must happen before we update the object mark!)
-+    __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
-+
-+    // Compare object markWord with an unlocked value (tmp) and if
-+    // equal exchange the stack address of our box with object markWord.
-+    // On failure disp_hdr contains the possibly locked markWord.
-+    __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq,
-+               Assembler::rl, /*result*/disp_hdr);
-+    __ mv(flag, zr);
-+    __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas
-+
-+    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
-+
-+    // If the compare-and-exchange succeeded, then we found an unlocked
-+    // object, will have now locked it will continue at label cont
-+    // We did not see an unlocked object so try the fast recursive case.
-+
-+    // Check if the owner is self by comparing the value in the
-+    // markWord of object (disp_hdr) with the stack pointer.
-+    __ sub(disp_hdr, disp_hdr, sp);
-+    __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place));
-+    // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont,
-+    // hence we can store 0 as the displaced header in the box, which indicates that it is a
-+    // recursive lock.
-+    __ andr(tmp/*==0?*/, disp_hdr, tmp);
-+    __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
-+    __ mv(flag, tmp); // we can use the value of tmp as the result here
- 
-     __ j(cont);
- 
-@@ -2189,31 +2185,25 @@ encode %{
- 
-     assert_different_registers(oop, box, tmp, disp_hdr, flag);
- 
--    if (!UseHeavyMonitors) {
--      // Find the lock address and load the displaced header from the stack.
--      __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
-+    // Find the lock address and load the displaced header from the stack.
-+    __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
- 
--      // If the displaced header is 0, we have a recursive unlock.
--      __ mv(flag, disp_hdr);
--      __ beqz(disp_hdr, cont);
--    }
-+    // If the displaced header is 0, we have a recursive unlock.
-+    __ mv(flag, disp_hdr);
-+    __ beqz(disp_hdr, cont);
- 
-     // Handle existing monitor.
-     __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
-     __ andi(t0, disp_hdr, markOopDesc::monitor_value);
-     __ bnez(t0, object_has_monitor);
- 
--    if (!UseHeavyMonitors) {
--      // Check if it is still a light weight lock, this is true if we
--      // see the stack address of the basicLock in the markWord of the
--      // object.
-+    // Check if it is still a light weight lock, this is true if we
-+    // see the stack address of the basicLock in the markWord of the
-+    // object.
- 
--      __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed,
--                 Assembler::rl, /*result*/tmp);
--      __ xorr(flag, box, tmp); // box == tmp if cas succeeds
--    } else {
--      __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow path
--    }
-+    __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed,
-+               Assembler::rl, /*result*/tmp);
-+    __ xorr(flag, box, tmp); // box == tmp if cas succeeds
-     __ j(cont);
- 
-     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-index 21aa3b58c09..5203200b068 100644
---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -1488,39 +1488,35 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
-     // Load the oop from the handle
-     __ ld(obj_reg, Address(oop_handle_reg, 0));
- 
--    if (!UseHeavyMonitors) {
--      // Load (object->mark() | 1) into swap_reg % x10
--      __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
--      __ ori(swap_reg, t0, 1);
--
--      // Save (object->mark() | 1) into BasicLock's displaced header
--      __ sd(swap_reg, Address(lock_reg, mark_word_offset));
--
--      // src -> dest if dest == x10 else x10 <- dest
--      {
--        Label here;
--        __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL);
--      }
-+    // Load (object->mark() | 1) into swap_reg % x10
-+    __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-+    __ ori(swap_reg, t0, 1);
- 
--      // Test if the oopMark is an obvious stack pointer, i.e.,
--      //  1) (mark & 3) == 0, and
--      //  2) sp <= mark < mark + os::pagesize()
--      // These 3 tests can be done by evaluating the following
--      // expression: ((mark - sp) & (3 - os::vm_page_size())),
--      // assuming both stack pointer and pagesize have their
--      // least significant 2 bits clear.
--      // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg
--
--      __ sub(swap_reg, swap_reg, sp);
--      __ andi(swap_reg, swap_reg, 3 - os::vm_page_size());
--
--      // Save the test result, for recursive case, the result is zero
--      __ sd(swap_reg, Address(lock_reg, mark_word_offset));
--      __ bnez(swap_reg, slow_path_lock);
--    } else {
--      __ j(slow_path_lock);
-+    // Save (object->mark() | 1) into BasicLock's displaced header
-+    __ sd(swap_reg, Address(lock_reg, mark_word_offset));
-+
-+    // src -> dest if dest == x10 else x10 <- dest
-+    {
-+      Label here;
-+      __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL);
-     }
- 
-+    // Test if the oopMark is an obvious stack pointer, i.e.,
-+    //  1) (mark & 3) == 0, and
-+    //  2) sp <= mark < mark + os::pagesize()
-+    // These 3 tests can be done by evaluating the following
-+    // expression: ((mark - sp) & (3 - os::vm_page_size())),
-+    // assuming both stack pointer and pagesize have their
-+    // least significant 2 bits clear.
-+    // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg
-+
-+    __ sub(swap_reg, swap_reg, sp);
-+    __ andi(swap_reg, swap_reg, 3 - os::vm_page_size());
-+
-+    // Save the test result, for recursive case, the result is zero
-+    __ sd(swap_reg, Address(lock_reg, mark_word_offset));
-+    __ bnez(swap_reg, slow_path_lock);
-+
-     // Slow path will re-enter here
-     __ bind(lock_done);
-   }
-@@ -1608,31 +1604,24 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
- 
-     Label done;
- 
--    if (!UseHeavyMonitors) {
--      // Simple recursive lock?
--      __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
--      __ beqz(t0, done);
--    }
--
-+    // Simple recursive lock?
-+    __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
-+    __ beqz(t0, done);
- 
-     // Must save x10 if if it is live now because cmpxchg must use it
-     if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
-       save_native_result(masm, ret_type, stack_slots);
-     }
- 
--    if (!UseHeavyMonitors) {
--      // get address of the stack lock
--      __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
--      //  get old displaced header
--      __ ld(old_hdr, Address(x10, 0));
-+    // get address of the stack lock
-+    __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
-+    //  get old displaced header
-+    __ ld(old_hdr, Address(x10, 0));
- 
--      // Atomic swap old header if oop still contains the stack lock
--      Label succeed;
--      __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock);
--      __ bind(succeed);
--    } else {
--      __ j(slow_path_unlock);
--    }
-+    // Atomic swap old header if oop still contains the stack lock
-+    Label succeed;
-+    __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock);
-+    __ bind(succeed);
- 
-     // slow path re-enters here
-     __ bind(unlock_done);
-
-From 1e844b8019cb3516c0843826de2bd3fcd2222f41 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 4 Apr 2023 16:49:19 +0800
-Subject: [PATCH 035/140] Revert JDK-8258192: Obsolete the CriticalJNINatives
- flag. CriticalJNINatives is unimplemented() even on AArch64. See
- https://bugs.openjdk.org/browse/JDK-8254694.
-
-Also following up 8191129: AARCH64: Invalid value passed to critical JNI function
----
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp                  | 3 ++-
- src/hotspot/cpu/riscv/vm_version_riscv.cpp                     | 2 ++
- .../criticalnatives/argumentcorruption/CheckLongArgs.java      | 2 +-
- .../jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java  | 2 +-
- 4 files changed, 6 insertions(+), 3 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-index 5203200b068..f8585afbdc2 100644
---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -1111,7 +1111,8 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
-                                                 int compile_id,
-                                                 BasicType* in_sig_bt,
-                                                 VMRegPair* in_regs,
--                                                BasicType ret_type) {
-+                                                BasicType ret_type,
-+                                                address critical_entry) {
-   if (method->is_method_handle_intrinsic()) {
-     vmIntrinsics::ID iid = method->intrinsic_id();
-     intptr_t start = (intptr_t)__ pc();
-diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-index dd65f32277f..c0491d23fa6 100644
---- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-@@ -147,6 +147,8 @@ void VM_Version::initialize() {
- #ifdef COMPILER2
-   c2_initialize();
- #endif // COMPILER2
-+
-+  UNSUPPORTED_OPTION(CriticalJNINatives);
- }
- 
- #ifdef COMPILER2
-diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
-index acb86812d25..2c866f26f08 100644
---- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
-+++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
-@@ -24,7 +24,7 @@
- 
- /* @test
-  * @bug 8167409
-- * @requires (os.arch != "aarch64") & (os.arch != "arm")
-+ * @requires (os.arch != "aarch64") & (os.arch != "riscv64") & (os.arch != "arm")
-  * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.argumentcorruption.CheckLongArgs
-  */
- package compiler.runtime.criticalnatives.argumentcorruption;
-diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
-index eab36f93113..1da369fde23 100644
---- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
-+++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
-@@ -24,7 +24,7 @@
- 
- /* @test
-  * @bug 8167408
-- * @requires (os.arch != "aarch64") & (os.arch != "arm")
-+ * @requires (os.arch != "aarch64") & (os.arch != "riscv64") & (os.arch != "arm")
-  * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.lookup.LookUp
-  */
- package compiler.runtime.criticalnatives.lookup;
-
-From 58ad930e78501c6fad024e7ef05066ec19eb6219 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 11 Apr 2023 11:45:04 +0800
-Subject: [PATCH 036/140] 8202976: Add C1 lea patching support for x86 (RISC-V
- part)
-
----
- src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-index dd657963438..46a20a64194 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -1818,6 +1818,7 @@ void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, C
-     return;
-   }
- 
-+  assert(patch_code == lir_patch_none, "Patch code not supported");
-   LIR_Address* adr = addr->as_address_ptr();
-   Register dst = dest->as_register_lo();
- 
-
-From 2074b8ec0ea3562f3999b4f4010b3f5b57dbe502 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 11 Apr 2023 12:15:44 +0800
-Subject: [PATCH 037/140] Revert 8232365: Implementation for JEP 363: Remove
- the Concurrent Mark Sweep (CMS) Garbage Collector
-
----
- src/hotspot/cpu/riscv/globals_riscv.hpp |  3 +++
- src/hotspot/cpu/riscv/riscv.ad          | 27 +++++++++++++++++++++++++
- 2 files changed, 30 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
-index 845064d6cbc..50bbb6a77b8 100644
---- a/src/hotspot/cpu/riscv/globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
-@@ -63,6 +63,9 @@ define_pd_global(bool, RewriteFrequentPairs, true);
- 
- define_pd_global(bool, PreserveFramePointer, false);
- 
-+// GC Ergo Flags
-+define_pd_global(uintx, CMSYoungGenPerWorker, 64*M);  // default max size of CMS young gen, per GC worker thread
-+
- define_pd_global(uintx, TypeProfileLevel, 111);
- 
- define_pd_global(bool, CompactStrings, true);
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 29027d594a0..386ef731696 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -752,6 +752,9 @@ bool is_CAS(int opcode, bool maybe_volatile);
- // predicate controlling translation of CompareAndSwapX
- bool needs_acquiring_load_reserved(const Node *load);
- 
-+// predicate controlling translation of StoreCM
-+bool unnecessary_storestore(const Node *storecm);
-+
- // predicate controlling addressing modes
- bool size_fits_all_mem_uses(AddPNode* addp, int shift);
- %}
-@@ -874,6 +877,29 @@ bool needs_acquiring_load_reserved(const Node *n)
-   // so we can just return true here
-   return true;
- }
-+
-+// predicate controlling translation of StoreCM
-+//
-+// returns true if a StoreStore must precede the card write otherwise
-+// false
-+
-+bool unnecessary_storestore(const Node *storecm)
-+{
-+  assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
-+
-+  // we need to generate a dmb ishst between an object put and the
-+  // associated card mark when we are using CMS without conditional
-+  // card marking
-+
-+  if (UseConcMarkSweepGC && !UseCondCardMark) {
-+    return false;
-+  }
-+
-+  // a storestore is unnecesary in all other cases
-+
-+  return true;
-+}
-+
- #define __ _masm.
- 
- // advance declarations for helper functions to convert register
-@@ -4566,6 +4592,7 @@ instruct loadConD0(fRegD dst, immD0 con) %{
- instruct storeimmCM0(immI0 zero, memory mem)
- %{
-   match(Set mem (StoreCM mem zero));
-+  predicate(unnecessary_storestore(n));
- 
-   ins_cost(STORE_COST);
-   format %{ "storestore (elided)\n\t"
-
-From f838cf41b48c6bc17d052531ab5594de236b1302 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 11 Apr 2023 22:06:58 +0800
-Subject: [PATCH 038/140] Revert 8220051: Remove global safepoint code
-
----
- src/hotspot/cpu/riscv/interp_masm_riscv.cpp   |  3 +-
- .../cpu/riscv/macroAssembler_riscv.cpp        | 26 ++++++++++-
- .../cpu/riscv/macroAssembler_riscv.hpp        |  3 +-
- src/hotspot/cpu/riscv/riscv.ad                | 43 +++++++++++++++++++
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp |  4 +-
- .../templateInterpreterGenerator_riscv.cpp    |  2 +-
- 6 files changed, 75 insertions(+), 6 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-index 48957803fdc..74dded77d19 100644
---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-@@ -515,7 +515,8 @@ void InterpreterMacroAssembler::dispatch_base(TosState state,
- 
-   Label safepoint;
-   address* const safepoint_table = Interpreter::safept_table(state);
--  bool needs_thread_local_poll = generate_poll && table != safepoint_table;
-+  bool needs_thread_local_poll = generate_poll &&
-+    SafepointMechanism::uses_thread_local_poll() && table != safepoint_table;
- 
-   if (needs_thread_local_poll) {
-     NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index cf01d7d74bb..73629e3dba3 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -264,6 +264,30 @@ void MacroAssembler::set_last_Java_frame(Register last_java_sp,
-   }
- }
- 
-+// Just like safepoint_poll, but use an acquiring load for thread-
-+// local polling.
-+//
-+// We need an acquire here to ensure that any subsequent load of the
-+// global SafepointSynchronize::_state flag is ordered after this load
-+// of the local Thread::_polling page.  We don't want this poll to
-+// return false (i.e. not safepointing) and a later poll of the global
-+// SafepointSynchronize::_state spuriously to return true.
-+//
-+// This is to avoid a race when we're in a native->Java transition
-+// racing the code which wakes up from a safepoint.
-+//
-+void MacroAssembler::safepoint_poll_acquire(Label& slow_path) {
-+  if (SafepointMechanism::uses_thread_local_poll()) {
-+    membar(MacroAssembler::AnyAny);
-+    ld(t1, Address(xthread, Thread::polling_page_offset()));
-+    membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
-+    andi(t0, t1, SafepointMechanism::poll_bit());
-+    bnez(t0, slow_path);
-+  } else {
-+    safepoint_poll(slow_path);
-+  }
-+}
-+
- void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
-   // we must set sp to zero to clear frame
-   sd(zr, Address(xthread, JavaThread::last_Java_sp_offset()));
-@@ -2137,7 +2161,7 @@ void MacroAssembler::check_klass_subtype(Register sub_klass,
-   bind(L_failure);
- }
- 
--void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) {
-+void MacroAssembler::safepoint_poll(Label& slow_path) {
-   if (SafepointMechanism::uses_thread_local_poll()) {
-     ld(t1, Address(xthread, Thread::polling_page_offset()));
-     andi(t0, t1, SafepointMechanism::poll_bit());
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index f23f7e7d1e6..8a2c6e07d88 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -44,7 +44,8 @@ class MacroAssembler: public Assembler {
-   }
-   virtual ~MacroAssembler() {}
- 
--  void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod);
-+  void safepoint_poll(Label& slow_path);
-+  void safepoint_poll_acquire(Label& slow_path);
- 
-   // Place a fence.i after code may have been modified due to a safepoint.
-   void safepoint_ifence();
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 386ef731696..2dde4453dac 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1213,6 +1213,14 @@ const Pipeline * MachEpilogNode::pipeline() const {
-   return MachNode::pipeline_class();
- }
- 
-+// This method seems to be obsolete. It is declared in machnode.hpp
-+// and defined in all *.ad files, but it is never called. Should we
-+// get rid of it?
-+int MachEpilogNode::safepoint_offset() const {
-+  assert(do_polling(), "no return for this epilog node");
-+  return 4;
-+}
-+
- //=============================================================================
- 
- // Figure out which register class each belongs in: rc_int, rc_float or
-@@ -1907,6 +1915,17 @@ encode %{
-     __ li(dst_reg, 1);
-   %}
- 
-+  enc_class riscv_enc_mov_poll_page(iRegP dst, immPollPage src) %{
-+    MacroAssembler _masm(&cbuf);
-+    int32_t offset = 0;
-+    address page = (address)$src$$constant;
-+    unsigned long align = (unsigned long)page & 0xfff;
-+    assert(align == 0, "polling page must be page aligned");
-+    Register dst_reg = as_Register($dst$$reg);
-+    __ la_patchable(dst_reg, Address(page, relocInfo::poll_type), offset);
-+    __ addi(dst_reg, dst_reg, offset);
-+  %}
-+
-   enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{
-     C2_MacroAssembler _masm(&cbuf);
-     __ load_byte_map_base($dst$$Register);
-@@ -2688,6 +2707,17 @@ operand immP_1()
-   interface(CONST_INTER);
- %}
- 
-+// Polling Page Pointer Immediate
-+operand immPollPage()
-+%{
-+  predicate((address)n->get_ptr() == os::get_polling_page());
-+  match(ConP);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
- // Card Table Byte Map Base
- operand immByteMapBase()
- %{
-@@ -4476,6 +4506,19 @@ instruct loadConP1(iRegPNoSp dst, immP_1 con)
-   ins_pipe(ialu_imm);
- %}
- 
-+// Load Poll Page Constant
-+instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
-+%{
-+  match(Set dst con);
-+
-+  ins_cost(ALU_COST * 6);
-+  format %{ "movptr  $dst, $con\t# Poll Page Ptr, #@loadConPollPage" %}
-+
-+  ins_encode(riscv_enc_mov_poll_page(dst, con));
-+
-+  ins_pipe(ialu_imm);
-+%}
-+
- // Load Byte Map Base Constant
- instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
- %{
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-index f8585afbdc2..c501c8f7bac 100644
---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -1573,7 +1573,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
-     // This is to avoid a race when we're in a native->Java transition
-     // racing the code which wakes up from a safepoint.
- 
--    __ safepoint_poll(safepoint_in_progress, true /* at_return */, true /* acquire */, false /* in_nmethod */);
-+    __ safepoint_poll_acquire(safepoint_in_progress);
-     __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset()));
-     __ bnez(t0, safepoint_in_progress);
-     __ bind(safepoint_in_progress_done);
-@@ -2439,7 +2439,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
-   __ bind(noException);
- 
-   Label no_adjust, bail;
--  if (!cause_return) {
-+  if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
-     // If our stashed return pc was modified by the runtime we avoid touching it
-     __ ld(t0, Address(fp, frame::return_addr_offset * wordSize));
-     __ bne(x18, t0, no_adjust);
-diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-index 76ae6f89e27..2d4baab2ab7 100644
---- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-@@ -1143,7 +1143,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
-     //
-     // This is to avoid a race when we're in a native->Java transition
-     // racing the code which wakes up from a safepoint.
--    __ safepoint_poll(L, true /* at_return */, true /* acquire */, false /* in_nmethod */);
-+    __ safepoint_poll_acquire(L);
-     __ lwu(t1, Address(xthread, JavaThread::suspend_flags_offset()));
-     __ beqz(t1, Continue);
-     __ bind(L);
-
-From 13faeae35312c59a1366d4f9c84da7157f06efc7 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 11 Apr 2023 22:15:14 +0800
-Subject: [PATCH 039/140] Revert 8253180: ZGC: Implementation of JEP 376: ZGC:
- Concurrent Thread-Stack Processing
-
----
- src/hotspot/cpu/riscv/frame_riscv.cpp                    | 8 ++------
- src/hotspot/cpu/riscv/frame_riscv.hpp                    | 3 ---
- src/hotspot/cpu/riscv/interp_masm_riscv.cpp              | 1 -
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp            | 8 --------
- .../cpu/riscv/templateInterpreterGenerator_riscv.cpp     | 9 ---------
- 5 files changed, 2 insertions(+), 27 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
-index 8e4f20fe561..b056eb2488a 100644
---- a/src/hotspot/cpu/riscv/frame_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
-@@ -495,8 +495,8 @@ frame frame::sender_for_compiled_frame(RegisterMap* map) const {
- }
- 
- //------------------------------------------------------------------------------
--// frame::sender_raw
--frame frame::sender_raw(RegisterMap* map) const {
-+// frame::sender
-+frame frame::sender(RegisterMap* map) const {
-   // Default is we done have to follow them. The sender_for_xxx will
-   // update it accordingly
-   assert(map != NULL, "map must be set");
-@@ -521,10 +521,6 @@ frame frame::sender_raw(RegisterMap* map) const {
-   return frame(sender_sp(), link(), sender_pc());
- }
- 
--frame frame::sender(RegisterMap* map) const {
--  return sender_raw(map);
--}
--
- bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
-   assert(is_interpreted_frame(), "Not an interpreted frame");
-   // These are reasonable sanity checks
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp
-index c06aaa9e391..3b88f6d5a1a 100644
---- a/src/hotspot/cpu/riscv/frame_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/frame_riscv.hpp
-@@ -196,7 +196,4 @@
- 
-   static jint interpreter_frame_expression_stack_direction() { return -1; }
- 
--  // returns the sending frame, without applying any barriers
--  frame sender_raw(RegisterMap* map) const;
--
- #endif // CPU_RISCV_FRAME_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-index 74dded77d19..4e642af87c4 100644
---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-@@ -571,7 +571,6 @@ void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
- 
- // remove activation
- //
--// Apply stack watermark barrier.
- // Unlock the receiver if this is a synchronized method.
- // Unlock any Java monitors from syncronized blocks.
- // Remove the activation from the stack.
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-index c501c8f7bac..d740c99c979 100644
---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -1565,14 +1565,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
- 
-   // check for safepoint operation in progress and/or pending suspend requests
-   {
--    // We need an acquire here to ensure that any subsequent load of the
--    // global SafepointSynchronize::_state flag is ordered after this load
--    // of the thread-local polling word. We don't want this poll to
--    // return false (i.e. not safepointing) and a later poll of the global
--    // SafepointSynchronize::_state spuriously to return true.
--    // This is to avoid a race when we're in a native->Java transition
--    // racing the code which wakes up from a safepoint.
--
-     __ safepoint_poll_acquire(safepoint_in_progress);
-     __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset()));
-     __ bnez(t0, safepoint_in_progress);
-diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-index 2d4baab2ab7..a07dea35b73 100644
---- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-@@ -1134,15 +1134,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
-   // check for safepoint operation in progress and/or pending suspend requests
-   {
-     Label L, Continue;
--
--    // We need an acquire here to ensure that any subsequent load of the
--    // global SafepointSynchronize::_state flag is ordered after this load
--    // of the thread-local polling word. We don't want this poll to
--    // return false (i.e. not safepointing) and a later poll of the global
--    // SafepointSynchronize::_state spuriously to return true.
--    //
--    // This is to avoid a race when we're in a native->Java transition
--    // racing the code which wakes up from a safepoint.
-     __ safepoint_poll_acquire(L);
-     __ lwu(t1, Address(xthread, JavaThread::suspend_flags_offset()));
-     __ beqz(t1, Continue);
-
-From 99ca43f1e7e74f161b40466f49fc61aa734d334d Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Wed, 12 Apr 2023 12:35:33 +0800
-Subject: [PATCH 040/140] JDK-8243155: AArch64: Add support for SqrtVF
-
----
- src/hotspot/cpu/riscv/riscv.ad | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 2dde4453dac..9da8a76c190 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -7206,7 +7206,7 @@ instruct absD_reg(fRegD dst, fRegD src) %{
- %}
- 
- instruct sqrtF_reg(fRegF dst, fRegF src) %{
--  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
-+  match(Set dst (SqrtF src));
- 
-   ins_cost(FSQRT_COST);
-   format %{ "fsqrt.s  $dst, $src\t#@sqrtF_reg" %}
-
-From 4bbd814dfbc33d3f1277dbb64f19a18f9f8c1a81 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Wed, 12 Apr 2023 15:11:49 +0800
-Subject: [PATCH 041/140] Revert JDK-8267098: AArch64: C1 StubFrames end
- confusingly
-
----
- src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp | 52 ++++++++++-----------
- 1 file changed, 24 insertions(+), 28 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
-index f523c9ed50a..1f58bde4df5 100644
---- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
-@@ -167,19 +167,14 @@ int StubAssembler::call_RT(Register oop_result, Register metadata_result, addres
-   return call_RT(oop_result, metadata_result, entry, arg_num);
- }
- 
--enum return_state_t {
--  does_not_return, requires_return
--};
--
- // Implementation of StubFrame
- 
- class StubFrame: public StackObj {
-  private:
-   StubAssembler* _sasm;
--  bool _return_state;
- 
-  public:
--  StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state=requires_return);
-+  StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments);
-   void load_argument(int offset_in_words, Register reg);
- 
-   ~StubFrame();
-@@ -197,9 +192,8 @@ void StubAssembler::epilogue() {
- 
- #define __ _sasm->
- 
--StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state) {
-+StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) {
-   _sasm = sasm;
--  _return_state = return_state;
-   __ prologue(name, must_gc_arguments);
- }
- 
-@@ -211,11 +205,7 @@ void StubFrame::load_argument(int offset_in_words, Register reg) {
- 
- 
- StubFrame::~StubFrame() {
--  if (_return_state == requires_return) {
--    __ epilogue();
--  } else {
--    __ should_not_reach_here();
--  }
-+  __ epilogue();
-   _sasm = NULL;
- }
- 
-@@ -378,6 +368,7 @@ OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address targe
-   assert_cond(oop_maps != NULL);
-   oop_maps->add_gc_map(call_offset, oop_map);
- 
-+  __ should_not_reach_here();
-   return oop_maps;
- }
- 
-@@ -425,7 +416,9 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
-       sasm->set_frame_size(frame_size);
-       break;
-     }
--    default: ShouldNotReachHere();
-+    default:
-+      __ should_not_reach_here();
-+      break;
-   }
- 
-   // verify that only x10 and x13 are valid at this time
-@@ -481,6 +474,9 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
-       restore_live_registers(sasm, id != handle_exception_nofpu_id);
-       break;
-     case handle_exception_from_callee_id:
-+      // Pop the return address.
-+      __ leave();
-+      __ ret();  // jump to exception handler
-       break;
-     default: ShouldNotReachHere();
-   }
-@@ -641,13 +637,13 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
- 
-     case throw_div0_exception_id:
-       {
--        StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments, does_not_return);
-+        StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments);
-         oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false);
-       }
-       break;
- 
-     case throw_null_pointer_exception_id:
--      { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments, does_not_return);
-+      { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments);
-         oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false);
-       }
-       break;
-@@ -926,14 +922,14 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
- 
-     case throw_class_cast_exception_id:
-       {
--        StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments, does_not_return);
-+        StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments);
-         oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true);
-       }
-       break;
- 
-     case throw_incompatible_class_change_error_id:
-       {
--        StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments, does_not_return);
-+        StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments);
-         oop_maps = generate_exception_throw(sasm,
-                                             CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
-       }
-@@ -1027,7 +1023,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
- 
-     case deoptimize_id:
-       {
--        StubFrame f(sasm, "deoptimize", dont_gc_arguments, does_not_return);
-+        StubFrame f(sasm, "deoptimize", dont_gc_arguments);
-         OopMap* oop_map = save_live_registers(sasm);
-         assert_cond(oop_map != NULL);
-         f.load_argument(0, c_rarg1);
-@@ -1046,7 +1042,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
- 
-     case throw_range_check_failed_id:
-       {
--        StubFrame f(sasm, "range_check_failed", dont_gc_arguments, does_not_return);
-+        StubFrame f(sasm, "range_check_failed", dont_gc_arguments);
-         oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true);
-       }
-       break;
-@@ -1062,7 +1058,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
- 
-     case access_field_patching_id:
-       {
--        StubFrame f(sasm, "access_field_patching", dont_gc_arguments, does_not_return);
-+        StubFrame f(sasm, "access_field_patching", dont_gc_arguments);
-         // we should set up register map
-         oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
-       }
-@@ -1070,7 +1066,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
- 
-     case load_klass_patching_id:
-       {
--        StubFrame f(sasm, "load_klass_patching", dont_gc_arguments, does_not_return);
-+        StubFrame f(sasm, "load_klass_patching", dont_gc_arguments);
-         // we should set up register map
-         oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching));
-       }
-@@ -1078,7 +1074,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
- 
-     case load_mirror_patching_id:
-       {
--        StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments, does_not_return);
-+        StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments);
-         // we should set up register map
-         oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
-       }
-@@ -1086,7 +1082,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
- 
-     case load_appendix_patching_id:
-       {
--        StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments, does_not_return);
-+        StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments);
-         // we should set up register map
-         oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
-       }
-@@ -1109,14 +1105,14 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
- 
-     case throw_index_exception_id:
-       {
--        StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments, does_not_return);
-+        StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments);
-         oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
-       }
-       break;
- 
-     case throw_array_store_exception_id:
-       {
--        StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments, does_not_return);
-+        StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments);
-         // tos + 0: link
-         //     + 1: return address
-         oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true);
-@@ -1125,7 +1121,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
- 
-     case predicate_failed_trap_id:
-       {
--        StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments, does_not_return);
-+        StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments);
- 
-         OopMap* map = save_live_registers(sasm);
-         assert_cond(map != NULL);
-@@ -1156,7 +1152,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
- 
-     default:
-       {
--        StubFrame f(sasm, "unimplemented entry", dont_gc_arguments, does_not_return);
-+        StubFrame f(sasm, "unimplemented entry", dont_gc_arguments);
-         __ li(x10, (int) id);
-         __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), x10);
-         __ should_not_reach_here();
-
-From eb37cfd42e7801c5ce64666c3cd25d40cfb22e76 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Wed, 12 Apr 2023 18:06:40 +0800
-Subject: [PATCH 042/140] Revert JDK-8247691: [aarch64] Incorrect handling of
- VM exceptions in C1 deopt stub/traps
-
----
- src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp | 87 +++++++++++++++------
- 1 file changed, 65 insertions(+), 22 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
-index 1f58bde4df5..1f45fba9de0 100644
---- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
-@@ -581,37 +581,80 @@ OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
- #endif
-   __ reset_last_Java_frame(true);
- 
--#ifdef ASSERT
--  // Check that fields in JavaThread for exception oop and issuing pc are empty
--  Label oop_empty;
--  __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
--  __ beqz(t0, oop_empty);
--  __ stop("exception oop must be empty");
--  __ bind(oop_empty);
-+  // check for pending exceptions
-+  { Label L;
-+    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
-+    __ beqz(t0, L);
-+    // exception pending => remove activation and forward to exception handler
- 
--  Label pc_empty;
--  __ ld(t0, Address(xthread, JavaThread::exception_pc_offset()));
--  __ beqz(t0, pc_empty);
--  __ stop("exception pc must be empty");
--  __ bind(pc_empty);
-+    { Label L1;
-+      __ bnez(x10, L1);                                 // have we deoptimized?
-+      __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
-+      __ bind(L1);
-+    }
-+
-+    // the deopt blob expects exceptions in the special fields of
-+    // JavaThread, so copy and clear pending exception.
-+
-+    // load and clear pending exception
-+    __ ld(x10, Address(xthread, Thread::pending_exception_offset()));
-+    __ sd(zr, Address(xthread, Thread::pending_exception_offset()));
-+
-+    // check that there is really a valid exception
-+    __ verify_not_null_oop(x10);
-+
-+    // load throwing pc: this is the return address of the stub
-+    __ ld(x13, Address(fp, wordSize));
-+
-+#ifdef ASSERT
-+    // Check that fields in JavaThread for exception oop and issuing pc are empty
-+    Label oop_empty;
-+    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
-+    __ beqz(t0, oop_empty);
-+    __ stop("exception oop must be empty");
-+    __ bind(oop_empty);
-+
-+    Label pc_empty;
-+    __ ld(t0, Address(xthread, JavaThread::exception_pc_offset()));
-+    __ beqz(t0, pc_empty);
-+    __ stop("exception pc must be empty");
-+    __ bind(pc_empty);
- #endif
- 
--  // Runtime will return true if the nmethod has been deoptimized, this is the
--  // expected scenario and anything else is an error. Note that we maintain a
--  // check on the result purely as a defensive measure.
--  Label no_deopt;
--  __ beqz(x10, no_deopt);                                // Have we deoptimized?
-+    // store exception oop and throwing pc to JavaThread
-+    __ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
-+    __ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
-+
-+    restore_live_registers(sasm);
- 
--  // Perform a re-execute. The proper return address is already on the stack,
--  // we just need to restore registers, pop all of our frames but the return
--  // address and jump to the deopt blob.
-+    __ leave();
-+
-+    // Forward the exception directly to deopt blob. We can blow no
-+    // registers and must leave throwing pc on the stack.  A patch may
-+    // have values live in registers so the entry point with the
-+    // exception in tls.
-+    __ far_jump(RuntimeAddress(deopt_blob->unpack_with_exception_in_tls()));
-+
-+    __ bind(L);
-+  }
-+
-+  // Runtime will return true if the nmethod has been deoptimized during
-+  // the patching process. In that case we must do a deopt reexecute instead.
-+  Label cont;
-+
-+  __ beqz(x10, cont);                                 // have we deoptimized?
-+
-+  // Will reexecute. Proper return address is already on the stack we just restore
-+  // registers, pop all of our frame but the return address and jump to the deopt blob
- 
-   restore_live_registers(sasm);
-   __ leave();
-   __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
- 
--  __ bind(no_deopt);
--  __ stop("deopt not performed");
-+  __ bind(cont);
-+  restore_live_registers(sasm);
-+  __ leave();
-+  __ ret();
- 
-   return oop_maps;
- }
-
-From 3fa279b459fffd1bd1ce158a7fdaa9d8704450a8 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 13 Apr 2023 18:29:27 +0800
-Subject: [PATCH 043/140] Revert JDK-8212681: Refactor IC locking to use a fine
- grained CompiledICLocker
-
----
- src/hotspot/cpu/riscv/compiledIC_riscv.cpp | 2 +-
- src/hotspot/cpu/riscv/nativeInst_riscv.cpp | 3 +--
- 2 files changed, 2 insertions(+), 3 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-index 75bc4be7840..4d1687301fc 100644
---- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-@@ -113,10 +113,10 @@ void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, ad
- }
- 
- void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
-+  assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
-   // Reset stub.
-   address stub = static_stub->addr();
-   assert(stub != NULL, "stub not found");
--  assert(CompiledICLocker::is_safe(stub), "mt unsafe call");
-   // Creation also verifies the object.
-   NativeMovConstReg* method_holder
-     = nativeMovConstReg_at(stub + NativeFenceI::instruction_size());
-diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
-index 0a05c577860..459683735e9 100644
---- a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
-@@ -146,8 +146,7 @@ address NativeCall::destination() const {
- // during code generation, where no patching lock is needed.
- void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
-   assert(!assert_lock ||
--         (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()) ||
--         CompiledICLocker::is_safe(addr_at(0)),
-+         (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()),
-          "concurrent code patching");
- 
-   ResourceMark rm;
-
-From 727f1a8f9b4a6dfbb0cf2002f12b86b5d5f23362 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 13 Apr 2023 18:36:11 +0800
-Subject: [PATCH 044/140] Revert JDK-8225681:
- vmTestbase/nsk/jvmti/RedefineClasses/StressRedefine fails due a) MT-unsafe
- modification of inline cache
-
----
- src/hotspot/cpu/riscv/compiledIC_riscv.cpp | 9 +++++++--
- 1 file changed, 7 insertions(+), 2 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-index 4d1687301fc..0b13e44c8d6 100644
---- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-@@ -99,10 +99,15 @@ void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, ad
-   // Creation also verifies the object.
-   NativeMovConstReg* method_holder
-     = nativeMovConstReg_at(stub + NativeFenceI::instruction_size());
--#ifdef ASSERT
-+#ifndef PRODUCT
-   NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address());
- 
--  verify_mt_safe(callee, entry, method_holder, jump);
-+  // read the value once
-+  volatile intptr_t data = method_holder->data();
-+  assert(data == 0 || data == (intptr_t)callee(),
-+         "a) MT-unsafe modification of inline cache");
-+  assert(data == 0 || jump->jump_destination() == entry,
-+         "b) MT-unsafe modification of inline cache");
- #endif
-   // Update stub.
-   method_holder->set_data((intptr_t)callee());
-
-From 26e37551ecc41db0cf8eeb775a5501b4f45b4ffa Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 13 Apr 2023 18:39:52 +0800
-Subject: [PATCH 045/140] Revert JDK-8232046: AArch64 build failure after
- JDK-8225681
-
----
- src/hotspot/cpu/riscv/compiledIC_riscv.cpp |  2 --
- src/hotspot/cpu/riscv/nativeInst_riscv.cpp | 19 ++++---------------
- 2 files changed, 4 insertions(+), 17 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-index 0b13e44c8d6..1cfc92b28fa 100644
---- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-@@ -126,8 +126,6 @@ void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_
-   NativeMovConstReg* method_holder
-     = nativeMovConstReg_at(stub + NativeFenceI::instruction_size());
-   method_holder->set_data(0);
--  NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
--  jump->set_jump_destination((address)-1);
- }
- 
- //-----------------------------------------------------------------------------
-diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
-index 459683735e9..bfe84fa4e30 100644
---- a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
-@@ -272,15 +272,9 @@ address NativeJump::jump_destination() const {
- 
-   // We use jump to self as the unresolved address which the inline
-   // cache code (and relocs) know about
--  // As a special case we also use sequence movptr_with_offset(r,0), jalr(r,0)
--  // i.e. jump to 0 when we need leave space for a wide immediate
--  // load
--
--  // return -1 if jump to self or to 0
--  if ((dest == (address) this) || dest == 0) {
--    dest = (address) -1;
--  }
- 
-+  // return -1 if jump to self
-+  dest = (dest == (address) this) ? (address) -1 : dest;
-   return dest;
- };
- 
-@@ -302,14 +296,9 @@ address NativeGeneralJump::jump_destination() const {
- 
-   // We use jump to self as the unresolved address which the inline
-   // cache code (and relocs) know about
--  // As a special case we also use jump to 0 when first generating
--  // a general jump
--
--  // return -1 if jump to self or to 0
--  if ((dest == (address) this) || dest == 0) {
--    dest = (address) -1;
--  }
- 
-+  // return -1 if jump to self
-+  dest = (dest == (address) this) ? (address) -1 : dest;
-   return dest;
- }
- 
-
-From 4fc68bc3cd13e623276965947d6c8cb14da15873 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 13 Apr 2023 18:47:08 +0800
-Subject: [PATCH 046/140] Revert JDK-8213084: Rework and enhance
- Print[Opto]Assembly output
-
----
- src/hotspot/cpu/riscv/assembler_riscv.hpp    |  8 --------
- src/hotspot/cpu/riscv/disassembler_riscv.hpp | 20 --------------------
- 2 files changed, 28 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
-index 44e8d4b4ff1..b4e7287ce08 100644
---- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
-@@ -268,14 +268,6 @@ class Assembler : public AbstractAssembler {
- 
-   enum { instruction_size = 4 };
- 
--  //---<  calculate length of instruction  >---
--  // We just use the values set above.
--  // instruction must start at passed address
--  static unsigned int instr_len(unsigned char *instr) { return instruction_size; }
--
--  //---<  longest instructions  >---
--  static unsigned int instr_maxlen() { return instruction_size; }
--
-   enum RoundingMode {
-     rne = 0b000,     // round to Nearest, ties to Even
-     rtz = 0b001,     // round towards Zero
-diff --git a/src/hotspot/cpu/riscv/disassembler_riscv.hpp b/src/hotspot/cpu/riscv/disassembler_riscv.hpp
-index b0e5560c906..06bca5298cd 100644
---- a/src/hotspot/cpu/riscv/disassembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/disassembler_riscv.hpp
-@@ -35,24 +35,4 @@ static const char* pd_cpu_opts() {
-   return "";
- }
- 
--// Returns address of n-th instruction preceding addr,
--// NULL if no preceding instruction can be found.
--// On riscv, we assume a constant instruction length.
--// It might be beneficial to check "is_readable" as we do on ppc and s390.
--static address find_prev_instr(address addr, int n_instr) {
--  return addr - Assembler::instruction_size * n_instr;
--}
--
--// special-case instruction decoding.
--// There may be cases where the binutils disassembler doesn't do
--// the perfect job. In those cases, decode_instruction0 may kick in
--// and do it right.
--// If nothing had to be done, just return "here", otherwise return "here + instr_len(here)"
--static address decode_instruction0(address here, outputStream* st, address virtual_begin = NULL) {
--  return here;
--}
--
--// platform-specific instruction annotations (like value of loaded constants)
--static void annotate(address pc, outputStream* st) {}
--
- #endif // CPU_RISCV_DISASSEMBLER_RISCV_HPP
-
-From f660c594eccb174c9779ebdc9ba40fe579aa50cc Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 13 Apr 2023 19:44:28 +0800
-Subject: [PATCH 047/140] Revert JDK-8241909: Remove useless code cache lookup
- in frame::patch_pc
-
----
- src/hotspot/cpu/riscv/frame_riscv.cpp | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
-index b056eb2488a..d03adc0bff4 100644
---- a/src/hotspot/cpu/riscv/frame_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
-@@ -270,7 +270,6 @@ bool frame::safe_for_sender(JavaThread *thread) {
- }
- 
- void frame::patch_pc(Thread* thread, address pc) {
--  assert(_cb == CodeCache::find_blob(pc), "unexpected pc");
-   address* pc_addr = &(((address*) sp())[-1]);
-   if (TracePcPatching) {
-     tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
-@@ -280,6 +279,7 @@ void frame::patch_pc(Thread* thread, address pc) {
-   // patch in the same address that's already there.
-   assert(_pc == *pc_addr || pc == *pc_addr, "must be");
-   *pc_addr = pc;
-+  _cb = CodeCache::find_blob(pc);
-   address original_pc = CompiledMethod::get_deopt_original_pc(this);
-   if (original_pc != NULL) {
-     assert(original_pc == _pc, "expected original PC to be stored before patching");
-
-From 0d1ed436d9b70c9244c5de42fb492bbfa5e785e8 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 16 Apr 2023 21:10:06 +0800
-Subject: [PATCH 048/140] Revert JDK-8277411: C2 fast_unlock intrinsic on
- AArch64 has unnecessary ownership check & JDK-8277180: Intrinsify recursive
- ObjectMonitor locking for C2 x64 and A64
-
----
- src/hotspot/cpu/riscv/riscv.ad | 24 ++++--------------------
- 1 file changed, 4 insertions(+), 20 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 9da8a76c190..c0fbda4f3f9 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -2204,16 +2204,6 @@ encode %{
-     __ mv(tmp, (address)markOopDesc::unused_mark());
-     __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
- 
--    __ beqz(flag, cont); // CAS success means locking succeeded
--
--    __ bne(flag, xthread, cont); // Check for recursive locking
--
--    // Recursive lock case
--    __ mv(flag, zr);
--    __ ld(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markOopDesc::monitor_value));
--    __ add(tmp, tmp, 1u);
--    __ sd(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markOopDesc::monitor_value));
--
-     __ bind(cont);
-   %}
- 
-@@ -2257,18 +2247,12 @@ encode %{
-     __ bind(object_has_monitor);
-     STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX);
-     __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor
-+    __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
-     __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
-+    __ xorr(flag, flag, xthread); // Will be 0 if we are the owner.
-+    __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions
-+    __ bnez(flag, cont);
- 
--    Label notRecursive;
--    __ beqz(disp_hdr, notRecursive); // Will be 0 if not recursive.
--
--    // Recursive lock
--    __ addi(disp_hdr, disp_hdr, -1);
--    __ sd(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
--    __ mv(flag, zr);
--    __ j(cont);
--
--    __ bind(notRecursive);
-     __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
-     __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
-     __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0.
-
-From cac7117dfc03023a81030e274944921df07bbead Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 16 Apr 2023 21:13:21 +0800
-Subject: [PATCH 049/140] Revert JDK-8210381: Obsolete EmitSync
-
----
- src/hotspot/cpu/riscv/riscv.ad | 100 ++++++++++++++++++++-------------
- 1 file changed, 60 insertions(+), 40 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index c0fbda4f3f9..c3ef648b21d 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -2150,9 +2150,17 @@ encode %{
-     // Load markWord from object into displaced_header.
-     __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
- 
-+    // Always do locking in runtime.
-+    if (EmitSync & 0x01) {
-+      __ mv(flag, 1);
-+      return;
-+    }
-+
-     // Check for existing monitor
--    __ andi(t0, disp_hdr, markOopDesc::monitor_value);
--    __ bnez(t0, object_has_monitor);
-+    if ((EmitSync & 0x02) == 0) {
-+      __ andi(t0, disp_hdr, markOopDesc::monitor_value);
-+      __ bnez(t0, object_has_monitor);
-+    }
- 
-     // Set tmp to be (markWord of object | UNLOCK_VALUE).
-     __ ori(tmp, disp_hdr, markOopDesc::unlocked_value);
-@@ -2185,24 +2193,26 @@ encode %{
-     __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
-     __ mv(flag, tmp); // we can use the value of tmp as the result here
- 
--    __ j(cont);
--
--    // Handle existing monitor.
--    __ bind(object_has_monitor);
--    // The object's monitor m is unlocked iff m->owner == NULL,
--    // otherwise m->owner may contain a thread or a stack address.
--    //
--    // Try to CAS m->owner from NULL to current thread.
--    __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value));
--    __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq,
--             Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected)
--
--    // Store a non-null value into the box to avoid looking like a re-entrant
--    // lock. The fast-path monitor unlock code checks for
--    // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the
--    // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
--    __ mv(tmp, (address)markOopDesc::unused_mark());
--    __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
-+    if ((EmitSync & 0x02) == 0) {
-+      __ j(cont);
-+
-+      // Handle existing monitor.
-+      __ bind(object_has_monitor);
-+      // The object's monitor m is unlocked iff m->owner == NULL,
-+      // otherwise m->owner may contain a thread or a stack address.
-+      //
-+      // Try to CAS m->owner from NULL to current thread.
-+      __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value));
-+      __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq,
-+                 Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected)
-+
-+      // Store a non-null value into the box to avoid looking like a re-entrant
-+      // lock. The fast-path monitor unlock code checks for
-+      // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the
-+      // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
-+      __ mv(tmp, (address)markOopDesc::unused_mark());
-+      __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
-+    }
- 
-     __ bind(cont);
-   %}
-@@ -2220,6 +2230,12 @@ encode %{
- 
-     assert_different_registers(oop, box, tmp, disp_hdr, flag);
- 
-+    // Always do locking in runtime.
-+    if (EmitSync & 0x01) {
-+      __ mv(flag, 1);
-+      return;
-+    }
-+
-     // Find the lock address and load the displaced header from the stack.
-     __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
- 
-@@ -2228,9 +2244,11 @@ encode %{
-     __ beqz(disp_hdr, cont);
- 
-     // Handle existing monitor.
--    __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
--    __ andi(t0, disp_hdr, markOopDesc::monitor_value);
--    __ bnez(t0, object_has_monitor);
-+    if ((EmitSync & 0x02) == 0) {
-+      __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
-+      __ andi(t0, disp_hdr, markOopDesc::monitor_value);
-+      __ bnez(t0, object_has_monitor);
-+    }
- 
-     // Check if it is still a light weight lock, this is true if we
-     // see the stack address of the basicLock in the markWord of the
-@@ -2244,23 +2262,25 @@ encode %{
-     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
- 
-     // Handle existing monitor.
--    __ bind(object_has_monitor);
--    STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX);
--    __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor
--    __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
--    __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
--    __ xorr(flag, flag, xthread); // Will be 0 if we are the owner.
--    __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions
--    __ bnez(flag, cont);
--
--    __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
--    __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
--    __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0.
--    __ bnez(flag, cont);
--    // need a release store here
--    __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
--    __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
--    __ sd(zr, Address(tmp)); // set unowned
-+    if ((EmitSync & 0x02) == 0) {
-+      __ bind(object_has_monitor);
-+      STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX);
-+      __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor
-+      __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
-+      __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
-+      __ xorr(flag, flag, xthread); // Will be 0 if we are the owner.
-+      __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions
-+      __ bnez(flag, cont);
-+
-+      __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
-+      __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
-+      __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0.
-+      __ bnez(flag, cont);
-+      // need a release store here
-+      __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
-+      __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
-+      __ sd(zr, Address(tmp)); // set unowned
-+    }
- 
-     __ bind(cont);
-   %}
-
-From ca7ab86ee886233651e1a79faff631fd7e226d57 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 16 Apr 2023 22:07:21 +0800
-Subject: [PATCH 050/140] Revert JDK-8256425: Obsolete Biased Locking in JDK 18
-
----
- src/hotspot/cpu/riscv/assembler_riscv.hpp     |   2 +
- .../cpu/riscv/c1_LIRAssembler_riscv.cpp       |   6 +-
- .../cpu/riscv/c1_LIRGenerator_riscv.cpp       |   7 +-
- .../cpu/riscv/c1_MacroAssembler_riscv.cpp     |  35 ++-
- .../cpu/riscv/c1_MacroAssembler_riscv.hpp     |   3 +-
- src/hotspot/cpu/riscv/interp_masm_riscv.cpp   |  27 ++-
- .../cpu/riscv/macroAssembler_riscv.cpp        | 217 ++++++++++++++++++
- .../cpu/riscv/macroAssembler_riscv.hpp        |  28 +++
- src/hotspot/cpu/riscv/riscv.ad                |  12 +
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp |   8 +
- src/hotspot/cpu/riscv/templateTable_riscv.cpp |   8 +-
- 11 files changed, 341 insertions(+), 12 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
-index b4e7287ce08..51aa052a0c7 100644
---- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
-@@ -3043,4 +3043,6 @@ enum Nf {
-   virtual ~Assembler() {}
- };
- 
-+class BiasedLockingCounters;
-+
- #endif // CPU_RISCV_ASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-index 46a20a64194..6a961ee2307 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -1511,9 +1511,13 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) {
-   if (!UseFastLocking) {
-     __ j(*op->stub()->entry());
-   } else if (op->code() == lir_lock) {
-+    Register scratch = noreg;
-+    if (UseBiasedLocking) {
-+      scratch = op->scratch_opr()->as_register();
-+    }
-     assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
-     // add debug info for NullPointerException only if one is possible
--    int null_check_offset = __ lock_object(hdr, obj, lock, *op->stub()->entry());
-+    int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry());
-     if (op->info() != NULL) {
-       add_debug_info_for_null_check(null_check_offset, op->info());
-     }
-diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-index e126f148cdf..c45a75b2301 100644
---- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-@@ -277,6 +277,11 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) {
- 
-   // "lock" stores the address of the monitor stack slot, so this is not an oop
-   LIR_Opr lock = new_register(T_INT);
-+  // Need a scratch register for biased locking
-+  LIR_Opr scratch = LIR_OprFact::illegalOpr;
-+  if (UseBiasedLocking) {
-+    scratch = new_register(T_INT);
-+  }
- 
-   CodeEmitInfo* info_for_exception = NULL;
-   if (x->needs_null_check()) {
-@@ -285,7 +290,7 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) {
-   // this CodeEmitInfo must not have the xhandlers because here the
-   // object is already locked (xhandlers expect object to be unlocked)
-   CodeEmitInfo* info = state_for(x, x->state(), true);
--  monitor_enter(obj.result(), lock, syncTempOpr(), LIR_OprFact::illegalOpr,
-+  monitor_enter(obj.result(), lock, syncTempOpr(), scratch,
-                 x->monitor_no(), info_for_exception, info);
- }
- 
-diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-index 2d52343587e..e486f41948e 100644
---- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-@@ -35,6 +35,7 @@
- #include "oops/arrayOop.hpp"
- #include "oops/markWord.hpp"
- #include "runtime/basicLock.hpp"
-+#include "runtime/biasedLocking.hpp"
- #include "runtime/os.hpp"
- #include "runtime/sharedRuntime.hpp"
- #include "runtime/stubRoutines.hpp"
-@@ -50,7 +51,7 @@ void C1_MacroAssembler::float_cmp(bool is_float, int unordered_result,
-   }
- }
- 
--int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) {
-+int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) {
-   const int aligned_mask = BytesPerWord - 1;
-   const int hdr_offset = oopDesc::mark_offset_in_bytes();
-   assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
-@@ -62,7 +63,12 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr
-   // save object being locked into the BasicObjectLock
-   sd(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
- 
--  null_check_offset = offset();
-+  if (UseBiasedLocking) {
-+    assert(scratch != noreg, "should have scratch register at this point");
-+    null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case);
-+  } else {
-+    null_check_offset = offset();
-+  }
- 
-   // Load object header
-   ld(hdr, Address(obj, hdr_offset));
-@@ -98,6 +104,10 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr
-   // otherwise we don't care about the result and handle locking via runtime call
-   bnez(hdr, slow_case, /* is_far */ true);
-   bind(done);
-+  if (PrintBiasedLockingStatistics) {
-+    la(t1, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr()));
-+    add_memory_int32(Address(t1, 0), 1);
-+  }
-   return null_check_offset;
- }
- 
-@@ -107,13 +117,21 @@ void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_
-   assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
-   Label done;
- 
-+  if (UseBiasedLocking) {
-+    // load object
-+    ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
-+    biased_locking_exit(obj, hdr, done);
-+  }
-+
-   // load displaced header
-   ld(hdr, Address(disp_hdr, 0));
-   // if the loaded hdr is NULL we had recursive locking
-   // if we had recursive locking, we are done
-   beqz(hdr, done);
--  // load object
--  ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
-+  if (!UseBiasedLocking) {
-+    // load object
-+    ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
-+  }
-   verify_oop(obj);
-   // test if object header is pointing to the displaced header, and if so, restore
-   // the displaced header in the object - if the object header is not pointing to
-@@ -140,8 +158,13 @@ void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, i
- 
- void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2) {
-   assert_different_registers(obj, klass, len);
--  // This assumes that all prototype bits fitr in an int32_t
--  mv(tmp1, (int32_t)(intptr_t)markOopDesc::prototype());
-+  if (UseBiasedLocking && !len->is_valid()) {
-+    assert_different_registers(obj, klass, len, tmp1, tmp2);
-+    ld(tmp1, Address(klass, Klass::prototype_header_offset()));
-+  } else {
-+    // This assumes that all prototype bits fitr in an int32_t
-+    mv(tmp1, (int32_t)(intptr_t)markOopDesc::prototype());
-+  }
-   sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes()));
- 
-   if (UseCompressedClassPointers) { // Take care not to kill klass
-diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
-index dfd3c17d7c7..1950cee5dd5 100644
---- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
-@@ -59,8 +59,9 @@ using MacroAssembler::null_check;
-   // hdr     : must be x10, contents destroyed
-   // obj     : must point to the object to lock, contents preserved
-   // disp_hdr: must point to the displaced header location, contents preserved
-+  // scratch : scratch register, contents destroyed
-   // returns code offset at which to add null check debug information
--  int lock_object  (Register swap, Register obj, Register disp_hdr, Label& slow_case);
-+  int lock_object  (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case);
- 
-   // unlocking
-   // hdr     : contents destroyed
-diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-index 4e642af87c4..f0c249f0d26 100644
---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-@@ -39,6 +39,7 @@
- #include "prims/jvmtiExport.hpp"
- #include "prims/jvmtiThreadState.hpp"
- #include "runtime/basicLock.hpp"
-+#include "runtime/biasedLocking.hpp"
- #include "runtime/frame.inline.hpp"
- #include "runtime/safepointMechanism.hpp"
- #include "runtime/sharedRuntime.hpp"
-@@ -782,6 +783,10 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg)
-     // Load object pointer into obj_reg c_rarg3
-     ld(obj_reg, Address(lock_reg, obj_offset));
- 
-+    if (UseBiasedLocking) {
-+      biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case);
-+    }
-+
-     // Load (object->mark() | 1) into swap_reg
-     ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-     ori(swap_reg, t0, 1);
-@@ -792,7 +797,17 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg)
-     assert(lock_offset == 0,
-            "displached header must be first word in BasicObjectLock");
- 
--    cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL);
-+    if (PrintBiasedLockingStatistics) {
-+      Label fail, fast;
-+      cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, fast, &fail);
-+      bind(fast);
-+      atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
-+                  t1, t0);
-+      j(done);
-+      bind(fail);
-+    } else {
-+      cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL);
-+    }
- 
-     // Test if the oopMark is an obvious stack pointer, i.e.,
-     //  1) (mark & 7) == 0, and
-@@ -809,6 +824,12 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg)
- 
-     // Save the test result, for recursive case, the result is zero
-     sd(swap_reg, Address(lock_reg, mark_offset));
-+
-+    if (PrintBiasedLockingStatistics) {
-+      bnez(swap_reg, slow_case);
-+      atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
-+                  t1, t0);
-+    }
-     beqz(swap_reg, done);
- 
-     bind(slow_case);
-@@ -861,6 +882,10 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg)
-     // Free entry
-     sd(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
- 
-+    if (UseBiasedLocking) {
-+      biased_locking_exit(obj_reg, header_reg, done);
-+    }
-+
-     // Load the old header from BasicLock structure
-     ld(header_reg, Address(swap_reg,
-                            BasicLock::displaced_header_offset_in_bytes()));
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index 73629e3dba3..e557a134b5b 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -41,6 +41,7 @@
- #include "oops/compressedOops.inline.hpp"
- #include "oops/klass.inline.hpp"
- #include "oops/oop.hpp"
-+#include "runtime/biasedLocking.hpp"
- #include "runtime/interfaceSupport.inline.hpp"
- #include "runtime/jniHandles.inline.hpp"
- #include "runtime/sharedRuntime.hpp"
-@@ -2791,6 +2792,222 @@ void MacroAssembler::reserved_stack_check() {
-     bind(no_reserved_zone_enabling);
- }
- 
-+void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) {
-+  Label retry_load;
-+  bind(retry_load);
-+  // flush and load exclusive from the memory location
-+  lr_w(tmp, counter_addr);
-+  addw(tmp, tmp, 1);
-+  // if we store+flush with no intervening write tmp wil be zero
-+  sc_w(tmp, tmp, counter_addr);
-+  bnez(tmp, retry_load);
-+}
-+
-+void MacroAssembler::load_prototype_header(Register dst, Register src) {
-+  load_klass(dst, src);
-+  ld(dst, Address(dst, Klass::prototype_header_offset()));
-+}
-+
-+int MacroAssembler::biased_locking_enter(Register lock_reg,
-+                                         Register obj_reg,
-+                                         Register swap_reg,
-+                                         Register tmp_reg,
-+                                         bool swap_reg_contains_mark,
-+                                         Label& done,
-+                                         Label* slow_case,
-+                                         BiasedLockingCounters* counters,
-+                                         Register flag) {
-+  assert(UseBiasedLocking, "why call this otherwise?");
-+  assert_different_registers(lock_reg, obj_reg, swap_reg);
-+
-+  if (PrintBiasedLockingStatistics && counters == NULL)
-+    counters = BiasedLocking::counters();
-+
-+  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, t0);
-+  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
-+  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
-+
-+  // Biased locking
-+  // See whether the lock is currently biased toward our thread and
-+  // whether the epoch is still valid
-+  // Note that the runtime guarantees sufficient alignment of JavaThread
-+  // pointers to allow age to be placed into low bits
-+  // First check to see whether biasing is even enabled for this object
-+  Label cas_label;
-+  int null_check_offset = -1;
-+  if (!swap_reg_contains_mark) {
-+    null_check_offset = offset();
-+    ld(swap_reg, mark_addr);
-+  }
-+  andi(tmp_reg, swap_reg, markOopDesc::biased_lock_mask_in_place);
-+  li(t0, markOopDesc::biased_lock_pattern);
-+  bne(t0, tmp_reg, cas_label);
-+  // The bias pattern is present in the object's header. Need to check
-+  // whether the bias owner and the epoch are both still current.
-+  load_prototype_header(tmp_reg, obj_reg);
-+  orr(tmp_reg, tmp_reg, xthread);
-+  xorr(tmp_reg, swap_reg, tmp_reg);
-+  andi(tmp_reg, tmp_reg, ~((int) markOopDesc::age_mask_in_place));
-+  if (flag->is_valid()) {
-+    mv(flag, tmp_reg);
-+  }
-+  if (counters != NULL) {
-+    Label around;
-+    bnez(tmp_reg, around);
-+    atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, t0);
-+    j(done);
-+    bind(around);
-+  } else {
-+    beqz(tmp_reg, done);
-+  }
-+
-+  Label try_revoke_bias;
-+  Label try_rebias;
-+
-+  // At this point we know that the header has the bias pattern and
-+  // that we are not the bias owner in the current epoch. We need to
-+  // figure out more details about the state of the header in order to
-+  // know what operations can be legally performed on the object's
-+  // header.
-+
-+  // If the low three bits in the xor result aren't clear, that means
-+  // the prototype header is no longer biased and we have to revoke
-+  // the bias on this object.
-+  andi(t0, tmp_reg, markOopDesc::biased_lock_mask_in_place);
-+  bnez(t0, try_revoke_bias);
-+
-+  // Biasing is still enabled for this data type. See whether the
-+  // epoch of the current bias is still valid, meaning that the epoch
-+  // bits of the mark word are equal to the epoch bits of the
-+  // prototype header. (Note that the prototype header's epoch bits
-+  // only change at a safepoint.) If not, attempt to rebias the object
-+  // toward the current thread. Note that we must be absolutely sure
-+  // that the current epoch is invalid in order to do this because
-+  // otherwise the manipulations it performs on the mark word are
-+  // illegal.
-+  andi(t0, tmp_reg, markOopDesc::epoch_mask_in_place);
-+  bnez(t0, try_rebias);
-+
-+  // The epoch of the current bias is still valid but we know nothing
-+  // about the owner; it might be set or it might be clear. Try to
-+  // acquire the bias of the object using an atomic operation. If this
-+  // fails we will go in to the runtime to revoke the object's bias.
-+  // Note that we first construct the presumed unbiased header so we
-+  // don't accidentally blow away another thread's valid bias.
-+  {
-+    Label cas_success;
-+    Label counter;
-+    mv(t0, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
-+    andr(swap_reg, swap_reg, t0);
-+    orr(tmp_reg, swap_reg, xthread);
-+    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case);
-+    // cas failed here if slow_cass == NULL
-+    if (flag->is_valid()) {
-+      mv(flag, 1);
-+      j(counter);
-+    }
-+    // If the biasing toward our thread failed, this means that
-+    // another thread succeeded in biasing it toward itself and we
-+    // need to revoke that bias. The revocation will occur in the
-+    // interpreter runtime in the slow case.
-+    bind(cas_success);
-+    if (flag->is_valid()) {
-+      mv(flag, 0);
-+      bind(counter);
-+    }
-+    if (counters != NULL) {
-+      atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()),
-+                  tmp_reg, t0);
-+    }
-+  }
-+  j(done);
-+
-+  bind(try_rebias);
-+  // At this point we know the epoch has expired, meaning that the
-+  // current "bias owner", if any, is actually invalid. Under these
-+  // circumstances _only_, we are allowed to use the current header's
-+  // value as the comparison value when doing the cas to acquire the
-+  // bias in the current epoch. In other words, we allow transfer of
-+  // the bias from one thread to another directly in this situation.
-+  //
-+  // FIXME: due to a lack of registers we currently blow away the age
-+  // bits in this situation. Should attempt to preserve them.
-+  {
-+    Label cas_success;
-+    Label counter;
-+    load_prototype_header(tmp_reg, obj_reg);
-+    orr(tmp_reg, xthread, tmp_reg);
-+    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case);
-+    // cas failed here if slow_cass == NULL
-+    if (flag->is_valid()) {
-+      mv(flag, 1);
-+      j(counter);
-+    }
-+
-+    // If the biasing toward our thread failed, then another thread
-+    // succeeded in biasing it toward itself and we need to revoke that
-+    // bias. The revocation will occur in the runtime in the slow case.
-+    bind(cas_success);
-+    if (flag->is_valid()) {
-+      mv(flag, 0);
-+      bind(counter);
-+    }
-+    if (counters != NULL) {
-+      atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()),
-+                  tmp_reg, t0);
-+    }
-+  }
-+  j(done);
-+
-+  bind(try_revoke_bias);
-+  // The prototype mark in the klass doesn't have the bias bit set any
-+  // more, indicating that objects of this data type are not supposed
-+  // to be biased any more. We are going to try to reset the mark of
-+  // this object to the prototype value and fall through to the
-+  // CAS-based locking scheme. Note that if our CAS fails, it means
-+  // that another thread raced us for the privilege of revoking the
-+  // bias of this particular object, so it's okay to continue in the
-+  // normal locking code.
-+  //
-+  // FIXME: due to a lack of registers we currently blow away the age
-+  // bits in this situation. Should attempt to preserve them.
-+  {
-+    Label cas_success, nope;
-+    load_prototype_header(tmp_reg, obj_reg);
-+    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, &nope);
-+    bind(cas_success);
-+
-+    // Fall through to the normal CAS-based lock, because no matter what
-+    // the result of the above CAS, some thread must have succeeded in
-+    // removing the bias bit from the object's header.
-+    if (counters != NULL) {
-+      atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg,
-+                  t0);
-+    }
-+    bind(nope);
-+  }
-+
-+  bind(cas_label);
-+
-+  return null_check_offset;
-+}
-+
-+void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag) {
-+  assert(UseBiasedLocking, "why call this otherwise?");
-+
-+  // Check for biased locking unlock case, which is a no-op
-+  // Note: we do not have to check the thread ID for two reasons.
-+  // First, the interpreter checks for IllegalMonitorStateException at
-+  // a higher level. Second, if the bias was revoked while we held the
-+  // lock, the object could not be rebiased toward another thread, so
-+  // the bias bit would be clear.
-+  ld(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-+  andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
-+  sub(tmp_reg, tmp_reg, markOopDesc::biased_lock_pattern);
-+  if (flag->is_valid()) { mv(flag, tmp_reg); }
-+  beqz(tmp_reg, done);
-+}
-+
- // Move the address of the polling page into dest.
- void MacroAssembler::get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype) {
-   if (SafepointMechanism::uses_thread_local_poll()) {
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index 8a2c6e07d88..c1ffa120774 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -47,6 +47,32 @@ class MacroAssembler: public Assembler {
-   void safepoint_poll(Label& slow_path);
-   void safepoint_poll_acquire(Label& slow_path);
- 
-+  // Biased locking support
-+  // lock_reg and obj_reg must be loaded up with the appropriate values.
-+  // swap_reg is killed.
-+  // tmp_reg must be supplied and must not be rscratch1 or rscratch2
-+  // Optional slow case is for implementations (interpreter and C1) which branch to
-+  // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
-+  // Returns offset of first potentially-faulting instruction for null
-+  // check info (currently consumed only by C1). If
-+  // swap_reg_contains_mark is true then returns -1 as it is assumed
-+  // the calling code has already passed any potential faults.
-+  int biased_locking_enter(Register lock_reg, Register obj_reg,
-+                           Register swap_reg, Register tmp_reg,
-+                           bool swap_reg_contains_mark,
-+                           Label& done, Label* slow_case = NULL,
-+                           BiasedLockingCounters* counters = NULL,
-+                           Register flag = noreg);
-+  void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done, Register flag = noreg);
-+
-+  // Helper functions for statistics gathering.
-+  // Unconditional atomic increment.
-+  void atomic_incw(Register counter_addr, Register tmp);
-+  void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) {
-+    la(tmp1, counter_addr);
-+    atomic_incw(tmp1, tmp2);
-+  }
-+
-   // Place a fence.i after code may have been modified due to a safepoint.
-   void safepoint_ifence();
- 
-@@ -225,6 +251,8 @@ class MacroAssembler: public Assembler {
-   // stored using routines that take a jobject.
-   void store_heap_oop_null(Address dst);
- 
-+  void load_prototype_header(Register dst, Register src);
-+
-   // This dummy is to prevent a call to store_heap_oop from
-   // converting a zero (linke NULL) into a Register by giving
-   // the compiler two choices it can't resolve
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index c3ef648b21d..c2a0be140e9 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -2156,6 +2156,10 @@ encode %{
-       return;
-     }
- 
-+    if (UseBiasedLocking && !UseOptoBiasInlining) {
-+      __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont, /*slow_case*/NULL, NULL, flag);
-+    }
-+
-     // Check for existing monitor
-     if ((EmitSync & 0x02) == 0) {
-       __ andi(t0, disp_hdr, markOopDesc::monitor_value);
-@@ -2236,6 +2240,10 @@ encode %{
-       return;
-     }
- 
-+    if (UseBiasedLocking && !UseOptoBiasInlining) {
-+      __ biased_locking_exit(oop, tmp, cont, flag);
-+    }
-+
-     // Find the lock address and load the displaced header from the stack.
-     __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
- 
-@@ -4961,6 +4969,10 @@ instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFla
-   ins_pipe(pipe_serial);
- %}
- 
-+// storeLConditional is used by PhaseMacroExpand::expand_lock_node
-+// when attempting to rebias a lock towards the current thread.  We
-+// must use the acquire form of cmpxchg in order to guarantee acquire
-+// semantics in this case.
- instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
- %{
-   match(Set cr (StoreLConditional mem (Binary oldval newval)));
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-index d740c99c979..eaefcc2b595 100644
---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -1489,6 +1489,10 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
-     // Load the oop from the handle
-     __ ld(obj_reg, Address(oop_handle_reg, 0));
- 
-+    if (UseBiasedLocking) {
-+      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, lock_done, &slow_path_lock);
-+    }
-+
-     // Load (object->mark() | 1) into swap_reg % x10
-     __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-     __ ori(swap_reg, t0, 1);
-@@ -1597,6 +1601,10 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
- 
-     Label done;
- 
-+    if (UseBiasedLocking) {
-+      __ biased_locking_exit(obj_reg, old_hdr, done);
-+    }
-+
-     // Simple recursive lock?
-     __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
-     __ beqz(t0, done);
-diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-index c9d399ccdaf..1e23fb4dc09 100644
---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-@@ -3563,9 +3563,13 @@ void TemplateTable::_new() {
-       __ bnez(x13, loop);
-     }
- 
--    // initialize object hader only.
-+    // initialize object header only.
-     __ bind(initialize_header);
--    __ mv(t0, (intptr_t)markOopDesc::prototype());
-+    if (UseBiasedLocking) {
-+      __ ld(t0, Address(x14, Klass::prototype_header_offset()));
-+    } else {
-+      __ mv(t0, (intptr_t)markOopDesc::prototype());
-+    }
-     __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes()));
-     __ store_klass_gap(x10, zr);   // zero klass gap for compressed oops
-     __ store_klass(x10, x14);      // store klass last
-
-From 864e551505bb816f3dc8a3bd1b065328ba7b5d65 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Mon, 17 Apr 2023 19:52:44 +0800
-Subject: [PATCH 051/140] Revert JDK-8227680: FastJNIAccessors: Check for JVMTI
- field access event requests at runtime
-
----
- .../cpu/riscv/jniFastGetField_riscv.cpp       | 32 ++++---------------
- 1 file changed, 6 insertions(+), 26 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
-index 814ed23e471..f6e7351c4fc 100644
---- a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
-@@ -83,28 +83,10 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
-   // An even value means there are no ongoing safepoint operations
-   __ andi(t0, rcounter, 1);
-   __ bnez(t0, slow);
--
--  if (JvmtiExport::can_post_field_access()) {
--    // Using barrier to order wrt. JVMTI check and load of result.
--    __ membar(MacroAssembler::LoadLoad);
--
--    // Check to see if a field access watch has been set before we
--    // take the fast path.
--    int32_t offset2;
--    __ la_patchable(result,
--                    ExternalAddress((address) JvmtiExport::get_field_access_count_addr()),
--                    offset2);
--    __ lwu(result, Address(result, offset2));
--    __ bnez(result, slow);
--
--    __ mv(robj, c_rarg1);
--  } else {
--    // Using address dependency to order wrt. load of result.
--    __ xorr(robj, c_rarg1, rcounter);
--    __ xorr(robj, robj, rcounter);               // obj, since
--                                                 // robj ^ rcounter ^ rcounter == robj
--                                                 // robj is address dependent on rcounter.
--  }
-+  __ xorr(robj, c_rarg1, rcounter);
-+  __ xorr(robj, robj, rcounter);               // obj, since
-+                                               // robj ^ rcounter ^ rcounter == robj
-+                                               // robj is address dependent on rcounter.
- 
-   // Both robj and t0 are clobbered by try_resolve_jobject_in_native.
-   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
-@@ -137,10 +119,8 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
-     default:        ShouldNotReachHere();
-   }
- 
--  // Using acquire: Order JVMTI check and load of result wrt. succeeding check
--  // (LoadStore for volatile field).
--  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
--
-+  __ xorr(rcounter_addr, rcounter_addr, result);
-+  __ xorr(rcounter_addr, rcounter_addr, result);
-   __ lw(t0, safepoint_counter_addr);
-   __ bne(rcounter, t0, slow);
- 
-
-From b822b64cb6be38cb7806fda3d56675674557c163 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 18 Apr 2023 16:34:32 +0800
-Subject: [PATCH 052/140] Revert JDK-8249768: Move static oops and
- NullPointerException oops from Universe into OopStorage
-
----
- src/hotspot/cpu/riscv/templateTable_riscv.cpp | 1 -
- 1 file changed, 1 deletion(-)
-
-diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-index 1e23fb4dc09..fbcdcf60d9c 100644
---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-@@ -411,7 +411,6 @@ void TemplateTable::fast_aldc(bool wide)
-     int32_t offset = 0;
-     __ movptr_with_offset(rarg, Universe::the_null_sentinel_addr(), offset);
-     __ ld(tmp, Address(rarg, offset));
--    __ resolve_oop_handle(tmp);
-     __ bne(result, tmp, notNull);
-     __ mv(result, zr);  // NULL object reference
-     __ bind(notNull);
-
-From c82c482aa065ffd39eab6b87a0ad6c6cbca1e3af Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 18 Apr 2023 16:58:23 +0800
-Subject: [PATCH 053/140] Revert JDK-8217998: Remove method_type field
- associated with the appendix field of an indy or method handle call
-
----
- src/hotspot/cpu/riscv/templateTable_riscv.cpp | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-index fbcdcf60d9c..158294f7436 100644
---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-@@ -3192,6 +3192,7 @@ void TemplateTable::prepare_invoke(int byte_no,
-     // since the parameter_size includes it.
-     __ push_reg(x9);
-     __ mv(x9, index);
-+    assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
-     __ load_resolved_reference_at_index(index, x9);
-     __ pop_reg(x9);
-     __ push_reg(index);  // push appendix (MethodType, CallSite, etc.)
-
-From 3e50d62dd06c3f8bc586e3ab2b00f2f587d950bf Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 16:04:31 +0800
-Subject: [PATCH 054/140] Revert JDK-8277372: Add getters for BOT and card
- table members
-
----
- src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp | 4 ++--
- .../riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp  | 6 +++---
- 2 files changed, 5 insertions(+), 5 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-index 1c46b3947d3..6b75bf63781 100644
---- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-@@ -215,7 +215,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
-   ExternalAddress cardtable((address) ct->byte_map_base());
-   const Register card_addr = tmp;
- 
--  __ srli(card_addr, store_addr, CardTable::card_shift());
-+  __ srli(card_addr, store_addr, CardTable::card_shift);
- 
-   // get the address of the card
-   __ load_byte_map_base(tmp2);
-@@ -437,7 +437,7 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler*
-   assert_different_registers(card_offset, byte_map_base, t0);
- 
-   __ load_parameter(0, card_offset);
--  __ srli(card_offset, card_offset, CardTable::card_shift());
-+  __ srli(card_offset, card_offset, CardTable::card_shift);
-   __ load_byte_map_base(byte_map_base);
- 
-   // Convert card offset into an address in card_addr
-diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
-index a419f92b5f6..868d022ac74 100644
---- a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
-@@ -41,7 +41,7 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob
-   BarrierSet* bs = BarrierSet::barrier_set();
-   assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind");
- 
--  __ srli(obj, obj, CardTable::card_shift());
-+  __ srli(obj, obj, CardTable::card_shift);
- 
-   assert(CardTable::dirty_card_val() == 0, "must be");
- 
-@@ -74,8 +74,8 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl
-   __ shadd(end, count, start, count, LogBytesPerHeapOop);
-   __ sub(end, end, BytesPerHeapOop); // last element address to make inclusive
- 
--  __ srli(start, start, CardTable::card_shift());
--  __ srli(end, end, CardTable::card_shift());
-+  __ srli(start, start, CardTable::card_shift);
-+  __ srli(end, end, CardTable::card_shift);
-   __ sub(count, end, start); // number of bytes to copy
- 
-   __ load_byte_map_base(tmp);
-
-From 6a81a820e6c08cfdd8e29a835e953dabffdca98a Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Wed, 19 Apr 2023 11:30:58 +0800
-Subject: [PATCH 055/140] Revert JDK-8260941: Remove the conc_scan parameter
- for CardTable
-
----
- .../shared/cardTableBarrierSetAssembler_riscv.cpp   | 13 +++++++++++++
- 1 file changed, 13 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
-index 868d022ac74..a476e5ec84d 100644
---- a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
-@@ -41,6 +41,9 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob
-   BarrierSet* bs = BarrierSet::barrier_set();
-   assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind");
- 
-+  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
-+  CardTable* ct = ctbs->card_table();
-+
-   __ srli(obj, obj, CardTable::card_shift);
- 
-   assert(CardTable::dirty_card_val() == 0, "must be");
-@@ -56,6 +59,9 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob
-     __ sb(zr, Address(tmp));
-     __ bind(L_already_dirty);
-   } else {
-+    if (ct->scanned_concurrently()) {
-+      __ membar(MacroAssembler::StoreStore);
-+    }
-     __ sb(zr, Address(tmp));
-   }
- }
-@@ -66,6 +72,10 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl
-   assert_different_registers(start, tmp);
-   assert_different_registers(count, tmp);
- 
-+  BarrierSet* bs = BarrierSet::barrier_set();
-+  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
-+  CardTable* ct = ctbs->card_table();
-+
-   Label L_loop, L_done;
-   const Register end = count;
- 
-@@ -80,6 +90,9 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl
- 
-   __ load_byte_map_base(tmp);
-   __ add(start, start, tmp);
-+  if (ct->scanned_concurrently()) {
-+    __ membar(MacroAssembler::StoreStore);
-+  }
- 
-   __ bind(L_loop);
-   __ add(tmp, start, count);
-
-From 24688cb665b16331b491bed2566dc97582a3d73c Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Wed, 19 Apr 2023 11:32:54 +0800
-Subject: [PATCH 056/140] Revert JDK-8220301: Remove jbyte use in CardTable
-
-Note: An assertion in `CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier` is removed. See the jdk11u backport for AArch64: https://mail.openjdk.org/pipermail/jdk-updates-dev/2019-August/001746.html
----
- src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp    | 3 +++
- .../cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp | 1 +
- src/hotspot/cpu/riscv/macroAssembler_riscv.cpp                 | 2 +-
- src/hotspot/cpu/riscv/riscv.ad                                 | 3 +--
- 4 files changed, 6 insertions(+), 3 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-index 6b75bf63781..b6786c6b327 100644
---- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-@@ -196,6 +196,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
-   BarrierSet* bs = BarrierSet::barrier_set();
-   CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
-   CardTable* ct = ctbs->card_table();
-+  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
- 
-   Label done;
-   Label runtime;
-@@ -213,6 +214,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
-   // storing region crossing non-NULL, is card already dirty?
- 
-   ExternalAddress cardtable((address) ct->byte_map_base());
-+  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
-   const Register card_addr = tmp;
- 
-   __ srli(card_addr, store_addr, CardTable::card_shift);
-@@ -419,6 +421,7 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler*
-   BarrierSet* bs = BarrierSet::barrier_set();
-   CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
-   CardTable* ct = ctbs->card_table();
-+  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
- 
-   Label done;
-   Label runtime;
-diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
-index a476e5ec84d..81d47d61d4c 100644
---- a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
-@@ -43,6 +43,7 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob
- 
-   CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
-   CardTable* ct = ctbs->card_table();
-+  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
- 
-   __ srli(obj, obj, CardTable::card_shift);
- 
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index e557a134b5b..6e4d22db40f 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -2719,7 +2719,7 @@ void MacroAssembler::get_thread(Register thread) {
- }
- 
- void MacroAssembler::load_byte_map_base(Register reg) {
--  CardTable::CardValue* byte_map_base =
-+  jbyte *byte_map_base =
-     ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
-   li(reg, (uint64_t)byte_map_base);
- }
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index c2a0be140e9..ca6a232e1e0 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -2735,8 +2735,7 @@ operand immByteMapBase()
- %{
-   // Get base of card map
-   predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
--            (CardTable::CardValue*)n->get_ptr() ==
--             ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
-+            (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
-   match(ConP);
- 
-   op_cost(0);
-
-From 6ee27261d406342a5378d4a404319866a9bae804 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Wed, 19 Apr 2023 11:51:20 +0800
-Subject: [PATCH 057/140] Revert JDK-8230486:
- G1BarrierSetAssembler::g1_write_barrier_post unnecessarily pushes/pops
- new_val
-
----
- src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-index b6786c6b327..d724876ec3a 100644
---- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-@@ -250,7 +250,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
- 
-   __ bind(runtime);
-   // save the live input values
--  RegSet saved = RegSet::of(store_addr);
-+  RegSet saved = RegSet::of(store_addr, new_val);
-   __ push_reg(saved, sp);
-   __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
-   __ pop_reg(saved, sp);
-
-From 57067a358ffc1b54edfb305549bd460b0fca47f0 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Fri, 21 Apr 2023 12:10:22 +0800
-Subject: [PATCH 058/140] Revert JDK-8242449: AArch64: r27 can be allocated in
- CompressedOops mode
-
----
- src/hotspot/cpu/riscv/riscv.ad | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index ca6a232e1e0..e3f976faa0d 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -4846,6 +4846,8 @@ instruct storeN(iRegN src, memory mem)
- instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
- %{
-   match(Set mem (StoreN mem zero));
-+  predicate(Universe::narrow_oop_base() == NULL &&
-+            Universe::narrow_klass_base() == NULL);
- 
-   ins_cost(STORE_COST);
-   format %{ "sw  rheapbase, $mem\t# compressed ptr (rheapbase==0), #@storeImmN0" %}
-
-From 0db520768d4d268a9dc641e301df45653c52f6eb Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 23 Apr 2023 14:59:09 +0800
-Subject: [PATCH 059/140] A fix for interpreter frame verification code,
- skipping the locals check if there is no locals. See one of the additional
- commits in JDK-8286301, the RISC-V loom port.
-
----
- src/hotspot/cpu/riscv/frame_riscv.cpp | 11 ++++++++++-
- 1 file changed, 10 insertions(+), 1 deletion(-)
-
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
-index d03adc0bff4..13c482b610a 100644
---- a/src/hotspot/cpu/riscv/frame_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
-@@ -571,7 +571,16 @@ bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
- 
-   // validate locals
-   address locals = (address) *interpreter_frame_locals_addr();
--  if (locals > thread->stack_base() || locals < (address) fp()) {
-+  if (locals > thread->stack_base()) {
-+    return false;
-+  }
-+
-+  if (m->max_locals() > 0 && locals < (address) fp()) {
-+    // fp in interpreter frame on RISC-V is higher than that on AArch64,
-+    // pointing to sender_sp and sender_sp-2 relatively.
-+    // On RISC-V, if max_locals is 0, the 'locals' pointer may be below fp,
-+    // pointing to sender_sp-1 (with one padding slot).
-+    // So we verify the 'locals' pointer only if max_locals > 0.
-     return false;
-   }
- 
-
-From 795da5afe59658b4d89cd8501b4f4ec56471b14c Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 11 Apr 2023 11:45:40 +0800
-Subject: [PATCH 060/140] ShenandoahGC adaptations on JDK11 for RISC-V backend
-
----
- .../cpu/riscv/c1_LIRAssembler_riscv.cpp       |   4 +-
- .../c1/shenandoahBarrierSetC1_riscv.cpp       |   2 +-
- .../shenandoahBarrierSetAssembler_riscv.cpp   | 229 +++++++++---------
- .../shenandoahBarrierSetAssembler_riscv.hpp   |  15 +-
- .../riscv/gc/shenandoah/shenandoah_riscv64.ad |  88 -------
- src/hotspot/cpu/riscv/riscv.ad                |   6 +-
- .../templateInterpreterGenerator_riscv.cpp    |  15 +-
- 7 files changed, 146 insertions(+), 213 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-index 6a961ee2307..90c4af5d3b0 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -1817,10 +1817,12 @@ void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) {
- 
- 
- void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
--  if (patch_code != lir_patch_none) {
-+#if INCLUDE_SHENANDOAHGC
-+  if (UseShenandoahGC && patch_code != lir_patch_none) {
-     deoptimize_trap(info);
-     return;
-   }
-+#endif
- 
-   assert(patch_code == lir_patch_none, "Patch code not supported");
-   LIR_Address* adr = addr->as_address_ptr();
-diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
-index cd568cc723f..d19f5b859ce 100644
---- a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
-@@ -103,7 +103,7 @@ LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRIt
-   __ xchg(access.resolved_addr(), value_opr, result, tmp);
- 
-   if (access.is_oop()) {
--    result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), access.decorators());
-+    result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0));
-     LIR_Opr tmp_opr = gen->new_register(type);
-     __ move(result, tmp_opr);
-     result = tmp_opr;
-diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
-index 84e1205bc25..b8534c52e77 100644
---- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
-@@ -27,7 +27,7 @@
- #include "gc/shenandoah/shenandoahBarrierSet.hpp"
- #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
- #include "gc/shenandoah/shenandoahForwarding.hpp"
--#include "gc/shenandoah/shenandoahHeap.inline.hpp"
-+#include "gc/shenandoah/shenandoahHeap.hpp"
- #include "gc/shenandoah/shenandoahHeapRegion.hpp"
- #include "gc/shenandoah/shenandoahRuntime.hpp"
- #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
-@@ -44,6 +44,8 @@
- 
- #define __ masm->
- 
-+address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL;
-+
- void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-                                                        Register src, Register dst, Register count, RegSet saved_regs) {
-   if (is_oop) {
-@@ -116,10 +118,10 @@ void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm,
-   Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
- 
-   // Is marking active?
--  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
-+  if (in_bytes(ShenandoahSATBMarkQueue::byte_width_of_active()) == 4) {
-     __ lwu(tmp, in_progress);
-   } else {
--    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
-+    assert(in_bytes(ShenandoahSATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
-     __ lbu(tmp, in_progress);
-   }
-   __ beqz(tmp, done);
-@@ -225,37 +227,21 @@ void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssemb
-   __ pop_reg(saved_regs, sp);
- }
- 
--void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm,
--                                                           Register dst,
--                                                           Address load_addr,
--                                                           DecoratorSet decorators) {
-+void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm,
-+                                                                    Register dst,
-+                                                                    Address load_addr) {
-   assert(ShenandoahLoadRefBarrier, "Should be enabled");
-   assert(dst != t1 && load_addr.base() != t1, "need t1");
-   assert_different_registers(load_addr.base(), t0, t1);
- 
--  bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
--  bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
--  bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
--  bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
--  bool is_narrow  = UseCompressedOops && !is_native;
--
--  Label heap_stable, not_cset;
-+  Label done;
-   __ enter();
-   Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
-   __ lbu(t1, gc_state);
- 
-   // Check for heap stability
--  if (is_strong) {
--    __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED);
--    __ beqz(t1, heap_stable);
--  } else {
--    Label lrb;
--    __ andi(t0, t1, ShenandoahHeap::WEAK_ROOTS);
--    __ bnez(t0, lrb);
--    __ andi(t0, t1, ShenandoahHeap::HAS_FORWARDED);
--    __ beqz(t0, heap_stable);
--    __ bind(lrb);
--  }
-+  __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED);
-+  __ beqz(t1, done);
- 
-   // use x11 for load address
-   Register result_dst = dst;
-@@ -270,43 +256,12 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm,
-   __ la(x11, load_addr);
-   __ mv(x10, dst);
- 
--  // Test for in-cset
--  if (is_strong) {
--    __ li(t1, (uint64_t)ShenandoahHeap::in_cset_fast_test_addr());
--    __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint());
--    __ add(t1, t1, t0);
--    __ lbu(t1, Address(t1));
--    __ andi(t0, t1, 1);
--    __ beqz(t0, not_cset);
--  }
-+  __ far_call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb())));
- 
--  __ push_call_clobbered_registers();
--  if (is_strong) {
--    if (is_narrow) {
--      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow);
--    } else {
--      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong);
--    }
--  } else if (is_weak) {
--    if (is_narrow) {
--      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow);
--    } else {
--      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak);
--    }
--  } else {
--    assert(is_phantom, "only remaining strength");
--    assert(!is_narrow, "phantom access cannot be narrow");
--    __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak);
--  }
--  __ jalr(ra);
--  __ mv(t0, x10);
--  __ pop_call_clobbered_registers();
--  __ mv(x10, t0);
--  __ bind(not_cset);
-   __ mv(result_dst, x10);
-   __ pop_reg(saved_regs, sp);
- 
--  __ bind(heap_stable);
-+  __ bind(done);
-   __ leave();
- }
- 
-@@ -320,6 +275,15 @@ void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register ds
-   }
- }
- 
-+void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr) {
-+  if (ShenandoahLoadRefBarrier) {
-+    Label is_null;
-+    __ beqz(dst, is_null);
-+    load_reference_barrier_not_null(masm, dst, load_addr);
-+    __ bind(is_null);
-+  }
-+}
-+
- //
- // Arguments:
- //
-@@ -363,7 +327,7 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm,
- 
-     BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
- 
--    load_reference_barrier(masm, dst, src, decorators);
-+    load_reference_barrier(masm, dst, src);
- 
-     if (dst != result_dst) {
-       __ mv(result_dst, dst);
-@@ -555,7 +519,7 @@ void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, Shen
-   Register pre_val_reg = stub->pre_val()->as_register();
- 
-   if (stub->do_load()) {
--    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */);
-+    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/);
-   }
-   __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
-   ce->store_parameter(stub->pre_val()->as_register(), 0);
-@@ -568,12 +532,6 @@ void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assemble
-   ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
-   __ bind(*stub->entry());
- 
--  DecoratorSet decorators = stub->decorators();
--  bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
--  bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
--  bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
--  bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
--
-   Register obj = stub->obj()->as_register();
-   Register res = stub->result()->as_register();
-   Register addr = stub->addr()->as_pointer_register();
-@@ -587,30 +545,32 @@ void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assemble
-     __ mv(res, obj);
-   }
- 
--  if (is_strong) {
--    // Check for object in cset.
--    __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr());
--    __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint());
--    __ add(tmp2, tmp2, tmp1);
--    __ lbu(tmp2, Address(tmp2));
--    __ beqz(tmp2, *stub->continuation(), true /* is_far */);
--  }
-+  // Check for null.
-+  __ beqz(res, *stub->continuation(), /* is_far */ true);
-+
-+  // Check for object in cset.
-+  __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr());
-+  __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint());
-+  __ add(t0, tmp2, tmp1);
-+  __ lb(tmp2, Address(t0));
-+  __ beqz(tmp2, *stub->continuation(), /* is_far */ true);
-+
-+  // Check if object is already forwarded.
-+  Label slow_path;
-+  __ ld(tmp1, Address(res, oopDesc::mark_offset_in_bytes()));
-+  __ xori(tmp1, tmp1, -1);
-+  __ andi(t0, tmp1, markOopDesc::lock_mask_in_place);
-+  __ bnez(t0, slow_path);
-+
-+  // Decode forwarded object.
-+  __ ori(tmp1, tmp1, markOopDesc::marked_value);
-+  __ xori(res, tmp1, -1);
-+  __ j(*stub->continuation());
- 
-+  __ bind(slow_path);
-   ce->store_parameter(res, 0);
-   ce->store_parameter(addr, 1);
--
--  if (is_strong) {
--    if (is_native) {
--      __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin()));
--    } else {
--      __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin()));
--    }
--  } else if (is_weak) {
--    __ far_call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin()));
--  } else {
--    assert(is_phantom, "only remaining strength");
--    __ far_call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin()));
--  }
-+  __ far_call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin()));
- 
-   __ j(*stub->continuation());
- }
-@@ -664,8 +624,7 @@ void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAss
-   __ epilogue();
- }
- 
--void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm,
--                                                                                    DecoratorSet decorators) {
-+void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm) {
-   __ prologue("shenandoah_load_reference_barrier", false);
-   // arg0 : object to be resolved
- 
-@@ -673,31 +632,10 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s
-   __ load_parameter(0, x10);
-   __ load_parameter(1, x11);
- 
--  bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
--  bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
--  bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
--  bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
--  if (is_strong) {
--    if (is_native) {
--      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong);
--    } else {
--      if (UseCompressedOops) {
--        __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow);
--      } else {
--        __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong);
--      }
--    }
--  } else if (is_weak) {
--    assert(!is_native, "weak must not be called off-heap");
--    if (UseCompressedOops) {
--      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow);
--    } else {
--      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak);
--    }
-+  if (UseCompressedOops) {
-+    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
-   } else {
--    assert(is_phantom, "only remaining strength");
--    assert(is_native, "phantom must only be called off-heap");
--    __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_phantom);
-+    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
-   }
-   __ jalr(ra);
-   __ mv(t0, x10);
-@@ -710,3 +648,68 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s
- #undef __
- 
- #endif // COMPILER1
-+
-+address ShenandoahBarrierSetAssembler::shenandoah_lrb() {
-+  assert(_shenandoah_lrb != NULL, "need load reference barrier stub");
-+  return _shenandoah_lrb;
-+}
-+
-+#define __ cgen->assembler()->
-+
-+// Shenandoah load reference barrier.
-+//
-+// Input:
-+//   x10: OOP to evacuate.  Not null.
-+//   x11: load address
-+//
-+// Output:
-+//   x10: Pointer to evacuated OOP.
-+//
-+// Trash t0 t1  Preserve everything else.
-+address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) {
-+  __ align(6);
-+  StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb");
-+  address start = __ pc();
-+
-+  Label slow_path;
-+  __ mv(t1, ShenandoahHeap::in_cset_fast_test_addr());
-+  __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint());
-+  __ add(t1, t1, t0);
-+  __ lbu(t1, Address(t1, 0));
-+  __ andi(t0, t1, 1);
-+  __ bnez(t0, slow_path);
-+  __ ret();
-+
-+  __ bind(slow_path);
-+  __ enter(); // required for proper stackwalking of RuntimeStub frame
-+
-+  __ push_call_clobbered_registers();
-+
-+  if (UseCompressedOops) {
-+    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
-+  } else {
-+    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
-+  }
-+  __ jalr(ra);
-+  __ mv(t0, x10);
-+  __ pop_call_clobbered_registers();
-+  __ mv(x10, t0);
-+
-+  __ leave(); // required for proper stackwalking of RuntimeStub frame
-+  __ ret();
-+
-+  return start;
-+}
-+
-+#undef __
-+
-+void ShenandoahBarrierSetAssembler::barrier_stubs_init() {
-+  if (ShenandoahLoadRefBarrier) {
-+    int stub_code_size = 2048;
-+    ResourceMark rm;
-+    BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size);
-+    CodeBuffer buf(bb);
-+    StubCodeGenerator cgen(&buf);
-+    _shenandoah_lrb = generate_shenandoah_lrb(&cgen);
-+  }
-+}
-diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
-index a705f497667..5d75035e9d4 100644
---- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
-@@ -40,6 +40,8 @@ class StubCodeGenerator;
- class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
- private:
- 
-+  static address _shenandoah_lrb;
-+
-   void satb_write_barrier_pre(MacroAssembler* masm,
-                               Register obj,
-                               Register pre_val,
-@@ -57,17 +59,22 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
- 
-   void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg);
-   void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg);
--  void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, DecoratorSet decorators);
-+  void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr);
-+  void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr);
-+
-+  address generate_shenandoah_lrb(StubCodeGenerator* cgen);
- 
- public:
- 
-+  static address shenandoah_lrb();
-+
-   void iu_barrier(MacroAssembler* masm, Register dst, Register tmp);
- 
- #ifdef COMPILER1
-   void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
-   void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub);
-   void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
--  void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators);
-+  void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm);
- #endif
- 
-   virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-@@ -81,8 +88,10 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
-   virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
-                                              Register obj, Register tmp, Label& slowpath);
- 
--  void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val,
-+  virtual void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val,
-                    Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result);
-+
-+  virtual void barrier_stubs_init();
- };
- 
- #endif // CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
-index 6c855f23c2a..bab407a8b76 100644
---- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
-+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
-@@ -176,48 +176,6 @@ instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldva
-   ins_pipe(pipe_slow);
- %}
- 
--instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
--  predicate(needs_acquiring_load_reserved(n));
--  match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval)));
--  ins_cost(10 * DEFAULT_COST);
--
--  effect(TEMP_DEF res, TEMP tmp, KILL cr);
--  format %{
--    "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq_shenandoah"
--  %}
--
--  ins_encode %{
--    Register tmp = $tmp$$Register;
--    __ mv(tmp, $oldval$$Register);
--    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
--                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
--                                                   true /* is_cae */, $res$$Register);
--  %}
--
--  ins_pipe(pipe_slow);
--%}
--
--instruct compareAndExchangePAcq_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
--  predicate(needs_acquiring_load_reserved(n));
--  match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval)));
--  ins_cost(10 * DEFAULT_COST);
--
--  effect(TEMP_DEF res, TEMP tmp, KILL cr);
--  format %{
--    "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq_shenandoah"
--  %}
--
--  ins_encode %{
--    Register tmp = $tmp$$Register;
--    __ mv(tmp, $oldval$$Register);
--    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
--                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
--                                                   true /* is_cae */, $res$$Register);
--  %}
--
--  ins_pipe(pipe_slow);
--%}
--
- instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
-   match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval)));
-   ins_cost(10 * DEFAULT_COST);
-@@ -237,49 +195,3 @@ instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldva
- 
-   ins_pipe(pipe_slow);
- %}
--
--instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
--  predicate(needs_acquiring_load_reserved(n));
--  match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval)));
--  ins_cost(10 * DEFAULT_COST);
--
--  effect(TEMP tmp, KILL cr);
--  format %{
--    "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapNAcq_shenandoah"
--    "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)"
--  %}
--
--  ins_encode %{
--    Register tmp = $tmp$$Register;
--    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
--    // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
--    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
--                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
--                                                   false /* is_cae */, $res$$Register);
--  %}
--
--  ins_pipe(pipe_slow);
--%}
--
--instruct weakCompareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
--  predicate(needs_acquiring_load_reserved(n));
--  match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval)));
--  ins_cost(10 * DEFAULT_COST);
--
--  effect(TEMP tmp, KILL cr);
--  format %{
--    "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapPAcq_shenandoah"
--    "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)"
--  %}
--
--  ins_encode %{
--    Register tmp = $tmp$$Register;
--    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
--    // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
--    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
--                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
--                                                   false /* is_cae */, $res$$Register);
--  %}
--
--  ins_pipe(pipe_slow);
--%}
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index e3f976faa0d..a6061de7a33 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -828,8 +828,10 @@ bool is_CAS(int opcode, bool maybe_volatile)
-     case Op_CompareAndSwapL:
-     case Op_CompareAndSwapP:
-     case Op_CompareAndSwapN:
-+#if INCLUDE_SHENANDOAHGC
-     case Op_ShenandoahCompareAndSwapP:
-     case Op_ShenandoahCompareAndSwapN:
-+#endif
-     case Op_CompareAndSwapB:
-     case Op_CompareAndSwapS:
-     case Op_GetAndSetI:
-@@ -851,10 +853,6 @@ bool is_CAS(int opcode, bool maybe_volatile)
-     case Op_WeakCompareAndSwapL:
-     case Op_WeakCompareAndSwapP:
-     case Op_WeakCompareAndSwapN:
--    case Op_ShenandoahWeakCompareAndSwapP:
--    case Op_ShenandoahWeakCompareAndSwapN:
--    case Op_ShenandoahCompareAndExchangeP:
--    case Op_ShenandoahCompareAndExchangeN:
-       return maybe_volatile;
-     default:
-       return false;
-diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-index a07dea35b73..5a87c687cf7 100644
---- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-@@ -765,9 +765,18 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
-   __ sd(ProfileInterpreter ? t0 : zr, Address(sp, 6 * wordSize));
- 
-   // Get mirror and store it in the frame as GC root for this Method*
--  __ load_mirror(t2, xmethod);
--  __ sd(zr, Address(sp, 5 * wordSize));
--  __ sd(t2, Address(sp, 4 * wordSize));
-+#if INCLUDE_SHENANDOAHGC
-+  if (UseShenandoahGC) {
-+    __ load_mirror(x28, xmethod);
-+    __ sd(zr, Address(sp, 5 * wordSize));
-+    __ sd(x28, Address(sp, 4 * wordSize));
-+  } else
-+#endif
-+  {
-+    __ load_mirror(t2, xmethod);
-+    __ sd(zr, Address(sp, 5 * wordSize));
-+    __ sd(t2, Address(sp, 4 * wordSize));
-+  }
- 
-   __ ld(xcpool, Address(xmethod, Method::const_offset()));
-   __ ld(xcpool, Address(xcpool, ConstMethod::constants_offset()));
-
-From d8b14fd5e6455b47cfcb02d13c0c24c74e824570 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 14:42:07 +0800
-Subject: [PATCH 061/140] Revert JDK-8248404: AArch64: Remove uses of long and
- unsigned long
-
----
- src/hotspot/cpu/riscv/assembler_riscv.hpp     | 19 +++++++++++++------
- .../cpu/riscv/macroAssembler_riscv.cpp        |  6 ------
- .../cpu/riscv/macroAssembler_riscv.hpp        | 13 ++++++++-----
- 3 files changed, 21 insertions(+), 17 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
-index 51aa052a0c7..31aeeb9b425 100644
---- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
-@@ -183,13 +183,20 @@ class Address {
-     : _base(noreg), _index(noreg), _offset(0), _mode(no_mode), _target(NULL) { }
-   Address(Register r)
-     : _base(r), _index(noreg), _offset(0), _mode(base_plus_offset), _target(NULL) { }
--
--  template<typename T, ENABLE_IF(std::is_integral<T>::value)>
--  Address(Register r, T o)
--    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) {}
--
-+  Address(Register r, int o)
-+    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
-+  Address(Register r, long o)
-+    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
-+  Address(Register r, long long o)
-+    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
-+  Address(Register r, unsigned int o)
-+    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
-+  Address(Register r, unsigned long o)
-+    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
-+  Address(Register r, unsigned long long o)
-+    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
-   Address(Register r, ByteSize disp)
--    : Address(r, in_bytes(disp)) {}
-+    : Address(r, in_bytes(disp)) { }
-   Address(address target, RelocationHolder const& rspec)
-     : _base(noreg),
-       _index(noreg),
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index 6e4d22db40f..b95f69cfcda 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -1364,12 +1364,6 @@ void MacroAssembler::mv(Register Rd, Address dest) {
-   movptr(Rd, dest.target());
- }
- 
--void MacroAssembler::mv(Register Rd, address addr) {
--  // Here in case of use with relocation, use fix length instruciton
--  // movptr instead of li
--  movptr(Rd, addr);
--}
--
- void MacroAssembler::mv(Register Rd, RegisterOrConstant src) {
-   if (src.is_register()) {
-     mv(Rd, src.as_register());
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index c1ffa120774..76b2716659b 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -543,15 +543,18 @@ class MacroAssembler: public Assembler {
-   }
- 
-   // mv
--  template<typename T, ENABLE_IF(std::is_integral<T>::value)>
--  inline void mv(Register Rd, T o) {
--    li(Rd, (int64_t)o);
--  }
-+  void mv(Register Rd, address addr)                    { li(Rd, (int64_t)addr);  }
-+
-+  inline void mv(Register Rd, int imm64)                { li(Rd, (int64_t)imm64); }
-+  inline void mv(Register Rd, long imm64)               { li(Rd, (int64_t)imm64); }
-+  inline void mv(Register Rd, long long imm64)          { li(Rd, (int64_t)imm64); }
-+  inline void mv(Register Rd, unsigned int imm64)       { li(Rd, (int64_t)imm64); }
-+  inline void mv(Register Rd, unsigned long imm64)      { li(Rd, (int64_t)imm64); }
-+  inline void mv(Register Rd, unsigned long long imm64) { li(Rd, (int64_t)imm64); }
- 
-   inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); }
- 
-   void mv(Register Rd, Address dest);
--  void mv(Register Rd, address addr);
-   void mv(Register Rd, RegisterOrConstant src);
- 
-   // logic
-
-From 94c1c9c01e61d0cb7c32596ef19b347c32406546 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 16:54:36 +0800
-Subject: [PATCH 062/140] Revert JDK-8280503: Use allStatic.hpp instead of
- allocation.hpp where possible
-
----
- src/hotspot/cpu/riscv/bytes_riscv.hpp    | 2 --
- src/hotspot/cpu/riscv/jniTypes_riscv.hpp | 1 -
- 2 files changed, 3 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/bytes_riscv.hpp b/src/hotspot/cpu/riscv/bytes_riscv.hpp
-index 23d982f9abd..f60e0e38ae8 100644
---- a/src/hotspot/cpu/riscv/bytes_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/bytes_riscv.hpp
-@@ -27,8 +27,6 @@
- #ifndef CPU_RISCV_BYTES_RISCV_HPP
- #define CPU_RISCV_BYTES_RISCV_HPP
- 
--#include "memory/allStatic.hpp"
--
- class Bytes: AllStatic {
-  public:
-   // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
-diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
-index 83ffcc55d83..bc4e5758256 100644
---- a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
-@@ -27,7 +27,6 @@
- #define CPU_RISCV_JNITYPES_RISCV_HPP
- 
- #include "jni.h"
--#include "memory/allStatic.hpp"
- #include "oops/oop.hpp"
- 
- // This file holds platform-dependent routines used to write primitive jni
-
-From 49e6399009b51edafa6904164528e1d051aeae6c Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 17:07:31 +0800
-Subject: [PATCH 063/140] Revert JDK-8276453: Undefined behavior in C1
- LIR_OprDesc causes SEGV in fastdebug build
-
----
- src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp    | 4 ++--
- src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp     | 4 ++--
- src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp | 1 +
- 3 files changed, 5 insertions(+), 4 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
-index af7bd067f33..6057d43296b 100644
---- a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
-@@ -58,7 +58,7 @@ RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array)
- }
- 
- RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index)
--  : _index(index), _array(), _throw_index_out_of_bounds_exception(true) {
-+  : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) {
-   assert(info != NULL, "must have info");
-   _info = new CodeEmitInfo(info);
- }
-@@ -83,7 +83,7 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) {
-   if (_throw_index_out_of_bounds_exception) {
-     stub_id = Runtime1::throw_index_exception_id;
-   } else {
--    assert(_array != LIR_Opr::nullOpr(), "sanity");
-+    assert(_array != NULL, "sanity");
-     __ mv(t1, _array->as_pointer_register());
-     stub_id = Runtime1::throw_range_check_failed_id;
-   }
-diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
-index 172031941b2..1f8b2b55100 100644
---- a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
-@@ -156,8 +156,8 @@ LIR_Opr FrameMap::long11_opr;
- LIR_Opr FrameMap::fpu10_float_opr;
- LIR_Opr FrameMap::fpu10_double_opr;
- 
--LIR_Opr FrameMap::_caller_save_cpu_regs[] = {};
--LIR_Opr FrameMap::_caller_save_fpu_regs[] = {};
-+LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, };
-+LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, };
- 
- //--------------------------------------------------------
- //               FrameMap
-diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-index c45a75b2301..227e7664225 100644
---- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-@@ -206,6 +206,7 @@ LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) {
-       break;
-     default:
-       ShouldNotReachHere();
-+      r = NULL;
-   }
-   return r;
- }
-
-From b94bda9d1a2c12fa379f8fe813460c498344f543 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 17:19:19 +0800
-Subject: [PATCH 064/140] Revert JDK-8256205: Simplify compiler calling
- convention handling
-
----
- src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp  |  2 +-
- src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp   |  2 +-
- src/hotspot/cpu/riscv/riscv.ad                | 25 +++++++++++++++++++
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 11 ++------
- 4 files changed, 29 insertions(+), 11 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
-index 6057d43296b..12980c12de6 100644
---- a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
-@@ -290,7 +290,7 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
-   const int args_num = 5;
-   VMRegPair args[args_num];
-   BasicType signature[args_num] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT };
--  SharedRuntime::java_calling_convention(signature, args, args_num);
-+  SharedRuntime::java_calling_convention(signature, args, args_num, true);
- 
-   // push parameters
-   Register r[args_num];
-diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
-index 1f8b2b55100..682ebe82627 100644
---- a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
-@@ -314,7 +314,7 @@ void FrameMap::initialize() {
- 
-   VMRegPair regs;
-   BasicType sig_bt = T_OBJECT;
--  SharedRuntime::java_calling_convention(&sig_bt, &regs, 1);
-+  SharedRuntime::java_calling_convention(&sig_bt, &regs, 1, true);
-   receiver_opr = as_oop_opr(regs.first()->as_Register());
- 
-   for (i = 0; i < nof_caller_save_fpu_regs; i++) {
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index a6061de7a33..1667994699f 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -2443,6 +2443,12 @@ frame %{
-   // Stack alignment requirement
-   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
- 
-+  // Number of stack slots between incoming argument block and the start of
-+  // a new frame.  The PROLOG must add this many slots to the stack.  The
-+  // EPILOG must remove this many slots. RISC-V needs two slots for
-+  // return address and fp.
-+  in_preserve_stack_slots(2 * VMRegImpl::slots_per_word);
-+
-   // Number of outgoing stack slots killed above the out_preserve_stack_slots
-   // for calls to C.  Supports the var-args backing area for register parms.
-   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes / BytesPerInt);
-@@ -2461,6 +2467,25 @@ frame %{
-                         Compile::current()->fixed_slots()),
-                        stack_alignment_in_slots()));
- 
-+  // Body of function which returns an integer array locating
-+  // arguments either in registers or in stack slots.  Passed an array
-+  // of ideal registers called "sig" and a "length" count.  Stack-slot
-+  // offsets are based on outgoing arguments, i.e. a CALLER setting up
-+  // arguments for a CALLEE.  Incoming stack arguments are
-+  // automatically biased by the preserve_stack_slots field above.
-+
-+  calling_convention
-+  %{
-+    // No difference between ingoing/outgoing just pass false
-+    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
-+  %}
-+
-+  c_calling_convention
-+  %{
-+    // This is obviously always outgoing
-+    (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
-+  %}
-+
-   // Location of compiled Java return values.  Same as C for now.
-   return_value
-   %{
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-index eaefcc2b595..411bddd2ace 100644
---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -233,7 +233,8 @@ static int reg2offset_out(VMReg r) {
- 
- int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
-                                            VMRegPair *regs,
--                                           int total_args_passed) {
-+                                           int total_args_passed,
-+                                           int is_outgoing) {
-   // Create the mapping between argument positions and
-   // registers.
-   static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
-@@ -2155,14 +2156,6 @@ void SharedRuntime::generate_deopt_blob() {
-   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
- }
- 
--// Number of stack slots between incoming argument block and the start of
--// a new frame. The PROLOG must add this many slots to the stack. The
--// EPILOG must remove this many slots.
--// RISCV needs two words for RA (return address) and FP (frame pointer).
--uint SharedRuntime::in_preserve_stack_slots() {
--  return 2 * VMRegImpl::slots_per_word;
--}
--
- uint SharedRuntime::out_preserve_stack_slots() {
-   return 0;
- }
-
-From 3fc948472c4a0918b967646b45c8886103b839d2 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 17:27:57 +0800
-Subject: [PATCH 065/140] Revert JDK-8183574: Unify the is_power_of_2 functions
-
----
- src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp        | 4 ++--
- src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp              | 1 -
- src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp              | 3 +--
- src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp                  | 1 -
- src/hotspot/cpu/riscv/interp_masm_riscv.cpp                  | 1 -
- src/hotspot/cpu/riscv/macroAssembler_riscv.cpp               | 1 -
- src/hotspot/cpu/riscv/macroAssembler_riscv.hpp               | 1 -
- src/hotspot/cpu/riscv/stubGenerator_riscv.cpp                | 1 -
- src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp | 1 -
- src/hotspot/cpu/riscv/templateTable_riscv.cpp                | 1 -
- 10 files changed, 3 insertions(+), 12 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
-index 4c1c13dc290..65d0eda62ef 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
-@@ -190,7 +190,7 @@ void LIR_Assembler::arith_op_double_cpu(LIR_Code code, LIR_Opr left, LIR_Opr rig
-         code == lir_add ? __ add(dreg, lreg_lo, c) : __ sub(dreg, lreg_lo, c);
-         break;
-       case lir_div:
--        assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
-+        assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant");
-         if (c == 1) {
-           // move lreg_lo to dreg if divisor is 1
-           __ mv(dreg, lreg_lo);
-@@ -208,7 +208,7 @@ void LIR_Assembler::arith_op_double_cpu(LIR_Code code, LIR_Opr left, LIR_Opr rig
-         }
-         break;
-       case lir_rem:
--        assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
-+        assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant");
-         if (c == 1) {
-           // move 0 to dreg if divisor is 1
-           __ mv(dreg, zr);
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-index 90c4af5d3b0..9de89a3b026 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -41,7 +41,6 @@
- #include "oops/objArrayKlass.hpp"
- #include "runtime/frame.inline.hpp"
- #include "runtime/sharedRuntime.hpp"
--#include "utilities/powerOfTwo.hpp"
- #include "vmreg_riscv.inline.hpp"
- 
- #ifndef PRODUCT
-diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-index 227e7664225..a9345158749 100644
---- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-@@ -38,7 +38,6 @@
- #include "ci/ciTypeArrayKlass.hpp"
- #include "runtime/sharedRuntime.hpp"
- #include "runtime/stubRoutines.hpp"
--#include "utilities/powerOfTwo.hpp"
- #include "vmreg_riscv.inline.hpp"
- 
- #ifdef ASSERT
-@@ -383,7 +382,7 @@ void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
-       // no need to do div-by-zero check if the divisor is a non-zero constant
-       if (c != 0) { need_zero_check = false; }
-       // do not load right if the divisor is a power-of-2 constant
--      if (c > 0 && is_power_of_2(c)) {
-+      if (c > 0 && is_power_of_2_long(c)) {
-         right.dont_load_item();
-       } else {
-         right.load_item();
-diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
-index 1f45fba9de0..fc88d5c180e 100644
---- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
-@@ -46,7 +46,6 @@
- #include "runtime/stubRoutines.hpp"
- #include "runtime/vframe.hpp"
- #include "runtime/vframeArray.hpp"
--#include "utilities/powerOfTwo.hpp"
- #include "vmreg_riscv.inline.hpp"
- 
- 
-diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-index f0c249f0d26..2fc0b00e2cb 100644
---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-@@ -44,7 +44,6 @@
- #include "runtime/safepointMechanism.hpp"
- #include "runtime/sharedRuntime.hpp"
- #include "runtime/thread.inline.hpp"
--#include "utilities/powerOfTwo.hpp"
- 
- void InterpreterMacroAssembler::narrow(Register result) {
-   // Get method->_constMethod->_result_type
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index b95f69cfcda..41a415ef2cf 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -47,7 +47,6 @@
- #include "runtime/sharedRuntime.hpp"
- #include "runtime/stubRoutines.hpp"
- #include "runtime/thread.hpp"
--#include "utilities/powerOfTwo.hpp"
- #ifdef COMPILER2
- #include "opto/compile.hpp"
- #include "opto/node.hpp"
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index 76b2716659b..dd39f67d507 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -30,7 +30,6 @@
- #include "asm/assembler.hpp"
- #include "metaprogramming/enableIf.hpp"
- #include "oops/compressedOops.hpp"
--#include "utilities/powerOfTwo.hpp"
- 
- // MacroAssembler extends Assembler by frequently used macros.
- //
-diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-index 8392b768847..0c5b0e001ee 100644
---- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-@@ -45,7 +45,6 @@
- #include "runtime/stubRoutines.hpp"
- #include "runtime/thread.inline.hpp"
- #include "utilities/align.hpp"
--#include "utilities/powerOfTwo.hpp"
- #ifdef COMPILER2
- #include "opto/runtime.hpp"
- #endif
-diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-index 5a87c687cf7..a10677bf650 100644
---- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-@@ -51,7 +51,6 @@
- #include "runtime/timer.hpp"
- #include "runtime/vframeArray.hpp"
- #include "utilities/debug.hpp"
--#include "utilities/powerOfTwo.hpp"
- #include <sys/types.h>
- 
- #ifndef PRODUCT
-diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-index 158294f7436..2a92fb9dd49 100644
---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-@@ -44,7 +44,6 @@
- #include "runtime/sharedRuntime.hpp"
- #include "runtime/stubRoutines.hpp"
- #include "runtime/synchronizer.hpp"
--#include "utilities/powerOfTwo.hpp"
- 
- #define __ _masm->
- 
-
-From 31b18aa6a29b83e2cae7ea76c5d4759b2596eca0 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 17:34:39 +0800
-Subject: [PATCH 066/140] Revert JDK-8276976: Rename LIR_OprDesc to LIR_Opr
-
----
- src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp |  2 +-
- src/hotspot/cpu/riscv/c1_LIR_riscv.cpp          | 14 +++++++-------
- 2 files changed, 8 insertions(+), 8 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-index 9de89a3b026..70ee6295bfb 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -1261,7 +1261,7 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
-     assert(op->addr()->is_address(), "what else?");
-     LIR_Address* addr_ptr = op->addr()->as_address_ptr();
-     assert(addr_ptr->disp() == 0, "need 0 disp");
--    assert(addr_ptr->index() == LIR_Opr::illegalOpr(), "need 0 index");
-+    assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index");
-     addr = as_reg(addr_ptr->base());
-   }
-   Register newval = as_reg(op->new_value());
-diff --git a/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp
-index 5f1c394ab3d..0317ed9003e 100644
---- a/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp
-@@ -27,22 +27,22 @@
- #include "asm/register.hpp"
- #include "c1/c1_LIR.hpp"
- 
--FloatRegister LIR_Opr::as_float_reg() const {
-+FloatRegister LIR_OprDesc::as_float_reg() const {
-   return as_FloatRegister(fpu_regnr());
- }
- 
--FloatRegister LIR_Opr::as_double_reg() const {
-+FloatRegister LIR_OprDesc::as_double_reg() const {
-   return as_FloatRegister(fpu_regnrLo());
- }
- 
- // Reg2 unused.
- LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) {
-   assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform");
--  return (LIR_Opr)(intptr_t)((reg1 << LIR_Opr::reg1_shift) |
--                             (reg1 << LIR_Opr::reg2_shift) |
--                             LIR_Opr::double_type          |
--                             LIR_Opr::fpu_register         |
--                             LIR_Opr::double_size);
-+  return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) |
-+                             (reg1 << LIR_OprDesc::reg2_shift) |
-+                             LIR_OprDesc::double_type          |
-+                             LIR_OprDesc::fpu_register         |
-+                             LIR_OprDesc::double_size);
- }
- 
- #ifndef PRODUCT
-
-From 2e64fa47eddc271d32b136ace4f062cfb9648b25 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 17:39:16 +0800
-Subject: [PATCH 067/140] Revert JDK-8269672: C1: Remove unaligned move on all
- architectures
-
----
- src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp           | 8 +++++---
- .../cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp       | 2 +-
- 2 files changed, 6 insertions(+), 4 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-index 70ee6295bfb..e29c0df5f8b 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -673,7 +673,7 @@ void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool po
-   }
- }
- 
--void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide) {
-+void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) {
-   LIR_Address* to_addr = dest->as_address_ptr();
-   // t0 was used as tmp reg in as_Address, so we use t1 as compressed_src
-   Register compressed_src = t1;
-@@ -795,7 +795,7 @@ void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) {
-   reg2stack(temp, dest, dest->type(), false);
- }
- 
--void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide) {
-+void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) {
-   assert(src->is_address(), "should not call otherwise");
-   assert(dest->is_register(), "should not call otherwise");
- 
-@@ -910,11 +910,13 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L
-   Label done;
-   move_op(opr2, result, type, lir_patch_none, NULL,
-           false,   // pop_fpu_stack
-+          false,   // unaligned
-           false);  // wide
-   __ j(done);
-   __ bind(label);
-   move_op(opr1, result, type, lir_patch_none, NULL,
-           false,   // pop_fpu_stack
-+          false,   // unaligned
-           false);  // wide
-   __ bind(done);
- }
-@@ -1866,7 +1868,7 @@ void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* arg
- 
- void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) {
-   if (dest->is_address() || src->is_address()) {
--    move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, /* wide */ false);
-+    move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, /*unaligned*/ false, /* wide */ false);
-   } else {
-     ShouldNotReachHere();
-   }
-diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-index d724876ec3a..bc847388f68 100644
---- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-@@ -340,7 +340,7 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier
-   Register pre_val_reg = stub->pre_val()->as_register();
- 
-   if (stub->do_load()) {
--    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */);
-+    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/);
-   }
-   __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
-   ce->store_parameter(stub->pre_val()->as_register(), 0);
-
-From 5f15abe61c700cbf59805530c52e8e558354d552 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 17:54:05 +0800
-Subject: [PATCH 068/140] Revert JDK-8264805: Remove the experimental
- Ahead-of-Time Compiler
-
----
- src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp | 1 +
- src/hotspot/cpu/riscv/compiledIC_riscv.cpp      | 4 ++--
- 2 files changed, 3 insertions(+), 2 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
-index 051328c3a8a..5c81f1c704c 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
-@@ -73,6 +73,7 @@ friend class ArrayCopyStub;
-     // CompiledStaticCall::to_interp_stub_size() (14) + CompiledStaticCall::to_trampoline_stub_size() (1 + 3 + address)
-     _call_stub_size = 14 * NativeInstruction::instruction_size +
-                       (NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size),
-+    _call_aot_stub_size = 0,
-     // See emit_exception_handler for detail
-     // verify_not_null_oop + far_call + should_not_reach_here + invalidate_registers(DEBUG_ONLY)
-     _exception_handler_size = DEBUG_ONLY(584) NOT_DEBUG(548), // or smaller
-diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-index 1cfc92b28fa..a29e5be9dbb 100644
---- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-@@ -86,7 +86,7 @@ int CompiledStaticCall::reloc_to_interp_stub() {
- }
- 
- void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) {
--  address stub = find_stub();
-+  address stub = find_stub(false /* is_aot */);
-   guarantee(stub != NULL, "stub not found");
- 
-   if (TraceICs) {
-@@ -138,7 +138,7 @@ void CompiledDirectStaticCall::verify() {
-   _call->verify_alignment();
- 
-   // Verify stub.
--  address stub = find_stub();
-+  address stub = find_stub(false /* is_aot */);
-   assert(stub != NULL, "no stub found for static call");
-   // Creation also verifies the object.
-   NativeMovConstReg* method_holder
-
-From 4cfd20c7d163188a1a4e63ffaa19708e15be9d96 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 17:59:45 +0800
-Subject: [PATCH 069/140] Revert JDK-8277417: C1 LIR instruction for load-klass
-
----
- .../cpu/riscv/c1_LIRAssembler_riscv.cpp       | 30 ++++++++-----------
- 1 file changed, 12 insertions(+), 18 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-index e29c0df5f8b..49653d04d81 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -840,7 +840,14 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch
-       __ ld(dest->as_register(), as_Address(from_addr));
-       break;
-     case T_ADDRESS:
--      __ ld(dest->as_register(), as_Address(from_addr));
-+      // FIXME: OMG this is a horrible kludge.  Any offset from an
-+      // address that matches klass_offset_in_bytes() will be loaded
-+      // as a word, not a long.
-+      if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) {
-+        __ lwu(dest->as_register(), as_Address(from_addr));
-+      } else {
-+        __ ld(dest->as_register(), as_Address(from_addr));
-+      }
-       break;
-     case T_INT:
-       __ lw(dest->as_register(), as_Address(from_addr));
-@@ -869,6 +876,10 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch
-       __ decode_heap_oop(dest->as_register());
-     }
-     __ verify_oop(dest->as_register());
-+  } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) {
-+    if (UseCompressedClassPointers) {
-+      __ decode_klass_not_null(dest->as_register());
-+    }
-   }
- }
- 
-@@ -1531,23 +1542,6 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) {
-   __ bind(*op->stub()->continuation());
- }
- 
--void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) {
--  Register obj = op->obj()->as_pointer_register();
--  Register result = op->result_opr()->as_pointer_register();
--
--  CodeEmitInfo* info = op->info();
--  if (info != NULL) {
--    add_debug_info_for_null_check_here(info);
--  }
--
--  if (UseCompressedClassPointers) {
--    __ lwu(result, Address(obj, oopDesc::klass_offset_in_bytes()));
--    __ decode_klass_not_null(result);
--  } else {
--    __ ld(result, Address(obj, oopDesc::klass_offset_in_bytes()));
--  }
--}
--
- void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
-   ciMethod* method = op->profiled_method();
-   int bci          = op->profiled_bci();
-
-From eb4de6fc8f9b6192d16343382ebbe4035ce71702 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 18:09:31 +0800
-Subject: [PATCH 070/140] Revert JDK-8245957: Remove unused LIR_OpBranch::type
- after SPARC port removal
-
----
- src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp | 8 ++++----
- 1 file changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-index a9345158749..2aba4f4974f 100644
---- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-@@ -393,7 +393,7 @@ void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
-     if (need_zero_check) {
-       CodeEmitInfo* info = state_for(x);
-       __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0));
--      __ branch(lir_cond_equal, new DivByZeroStub(info));
-+      __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info));
-     }
- 
-     rlock_result(x);
-@@ -467,7 +467,7 @@ void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) {
-     if (need_zero_check) {
-       CodeEmitInfo* info = state_for(x);
-       __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0));
--      __ branch(lir_cond_equal, new DivByZeroStub(info));
-+      __ branch(lir_cond_equal, T_INT, new DivByZeroStub(info));
-     }
- 
-     LIR_Opr ill = LIR_OprFact::illegalOpr;
-@@ -1055,9 +1055,9 @@ void LIRGenerator::do_If(If* x) {
-   profile_branch(x, cond);
-   move_to_phi(x->state());
-   if (x->x()->type()->is_float_kind()) {
--    __ branch(lir_cond(cond), x->tsux(), x->usux());
-+    __ branch(lir_cond(cond), right->type(), x->tsux(), x->usux());
-   } else {
--    __ branch(lir_cond(cond), x->tsux());
-+    __ branch(lir_cond(cond), right->type(), x->tsux());
-   }
-   assert(x->default_sux() == x->fsux(), "wrong destination above");
-   __ jump(x->default_sux());
-
-From d34f25c618982d3ac79e6ab2a47b3a199434d01b Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 18:14:10 +0800
-Subject: [PATCH 071/140] Revert JDK-8266950: Remove vestigial support for
- non-strict floating-point execution
-
----
- src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp | 4 ++++
- src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp       | 7 ++++++-
- 2 files changed, 10 insertions(+), 1 deletion(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
-index 65d0eda62ef..2a99d49c94b 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
-@@ -238,7 +238,9 @@ void LIR_Assembler::arith_op_single_fpu(LIR_Code code, LIR_Opr left, LIR_Opr rig
-   switch (code) {
-     case lir_add: __ fadd_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
-     case lir_sub: __ fsub_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
-+    case lir_mul_strictfp: // fall through
-     case lir_mul: __ fmul_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
-+    case lir_div_strictfp: // fall through
-     case lir_div: __ fdiv_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
-     default:
-       ShouldNotReachHere();
-@@ -251,7 +253,9 @@ void LIR_Assembler::arith_op_double_fpu(LIR_Code code, LIR_Opr left, LIR_Opr rig
-     switch (code) {
-       case lir_add: __ fadd_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
-       case lir_sub: __ fsub_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
-+      case lir_mul_strictfp: // fall through
-       case lir_mul: __ fmul_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
-+      case lir_div_strictfp: // fall through
-       case lir_div: __ fdiv_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
-       default:
-         ShouldNotReachHere();
-diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-index 2aba4f4974f..21ae066e9ab 100644
---- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-@@ -360,7 +360,12 @@ void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) {
-   right.load_item();
- 
-   LIR_Opr reg = rlock(x);
--  arithmetic_op_fpu(x->op(), reg, left.result(), right.result());
-+  LIR_Opr tmp = LIR_OprFact::illegalOpr;
-+  if (x->is_strictfp() && (x->op() == Bytecodes::_dmul || x->op() == Bytecodes::_ddiv)) {
-+    tmp = new_register(T_DOUBLE);
-+  }
-+
-+  arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp());
- 
-   set_result(x, round_item(reg));
- }
-
-From 02c0a84d52417d4aeddbdd10c07df446ee45c5de Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 18:19:51 +0800
-Subject: [PATCH 072/140] Revert JDK-8276217: Harmonize StrictMath intrinsics
- handling
-
----
- src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp | 6 ++----
- 1 file changed, 2 insertions(+), 4 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-index 21ae066e9ab..f9242251491 100644
---- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-@@ -651,16 +651,14 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
-       do_LibmIntrinsic(x);
-       break;
-     case vmIntrinsics::_dabs: // fall through
--    case vmIntrinsics::_dsqrt: // fall through
--    case vmIntrinsics::_dsqrt_strict: {
-+    case vmIntrinsics::_dsqrt: {
-       assert(x->number_of_arguments() == 1, "wrong type");
-       LIRItem value(x->argument_at(0), this);
-       value.load_item();
-       LIR_Opr dst = rlock_result(x);
- 
-       switch (x->id()) {
--        case vmIntrinsics::_dsqrt: // fall through
--        case vmIntrinsics::_dsqrt_strict: {
-+        case vmIntrinsics::_dsqrt: {
-           __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
-           break;
-         }
-
-From 8dbace163d42cbb41ff49463b34f8971437fe82f Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 18:35:08 +0800
-Subject: [PATCH 073/140] Revert JDK-8276209: Some call sites doesn't pass the
- parameter 'size' to SharedRuntime::dtrace_object_alloc(_base)
-
----
- src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp   | 2 +-
- src/hotspot/cpu/riscv/templateTable_riscv.cpp | 2 +-
- 2 files changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
-index fc88d5c180e..329df2e1ca7 100644
---- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
-@@ -1186,7 +1186,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
-         StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments);
-         save_live_registers(sasm);
- 
--        __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast<int (*)(oopDesc*)>(SharedRuntime::dtrace_object_alloc)), c_rarg0);
-+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), c_rarg0);
- 
-         restore_live_registers(sasm);
-       }
-diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-index 2a92fb9dd49..ddc9498dddc 100644
---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-@@ -3577,7 +3577,7 @@ void TemplateTable::_new() {
-       SkipIfEqual skip(_masm, &DTraceAllocProbes, false);
-       // Trigger dtrace event for fastpath
-       __ push(atos); // save the return value
--      __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast<int (*)(oopDesc*)>(SharedRuntime::dtrace_object_alloc)), x10);
-+      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), x10);
-       __ pop(atos); // restore the return value
-     }
-     __ j(done);
-
-From 8930b6049a5b6e31ec9409c167b0e58d24cf6821 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 18:38:51 +0800
-Subject: [PATCH 074/140] Revert JDK-8229838: Rename markOop files to markWord
-
----
- src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp | 1 -
- src/hotspot/cpu/riscv/frame_riscv.cpp             | 1 -
- src/hotspot/cpu/riscv/interp_masm_riscv.cpp       | 1 -
- 3 files changed, 3 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-index e486f41948e..44ceccd8bd1 100644
---- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-@@ -33,7 +33,6 @@
- #include "gc/shared/collectedHeap.hpp"
- #include "interpreter/interpreter.hpp"
- #include "oops/arrayOop.hpp"
--#include "oops/markWord.hpp"
- #include "runtime/basicLock.hpp"
- #include "runtime/biasedLocking.hpp"
- #include "runtime/os.hpp"
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
-index 13c482b610a..050595389e9 100644
---- a/src/hotspot/cpu/riscv/frame_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
-@@ -29,7 +29,6 @@
- #include "interpreter/interpreter.hpp"
- #include "memory/resourceArea.hpp"
- #include "memory/universe.hpp"
--#include "oops/markWord.hpp"
- #include "oops/method.hpp"
- #include "oops/oop.inline.hpp"
- #include "prims/methodHandles.hpp"
-diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-index 2fc0b00e2cb..006fe49b155 100644
---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-@@ -33,7 +33,6 @@
- #include "interpreter/interpreterRuntime.hpp"
- #include "logging/log.hpp"
- #include "oops/arrayOop.hpp"
--#include "oops/markWord.hpp"
- #include "oops/method.hpp"
- #include "oops/methodData.hpp"
- #include "prims/jvmtiExport.hpp"
-
-From f11c5a2beca94c8248c30899fef90947d478e10c Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 18:42:33 +0800
-Subject: [PATCH 075/140] Revert JDK-8235673: [C1, C2] Split inlining control
- flags
-
----
- src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-index fe46f7b21c8..fd25f8f9afd 100644
---- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-@@ -42,6 +42,7 @@ define_pd_global(bool, TieredCompilation,            false);
- define_pd_global(intx, CompileThreshold,             1500 );
- 
- define_pd_global(intx, OnStackReplacePercentage,     933  );
-+define_pd_global(intx, FreqInlineSize,               325  );
- define_pd_global(intx, NewSizeThreadIncrease,        4*K  );
- define_pd_global(intx, InitialCodeCacheSize,         160*K);
- define_pd_global(intx, ReservedCodeCacheSize,        32*M );
-
-From 6908dc58f2c66ca6a5adf4444a7ec2a91a80b9c8 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 18:45:00 +0800
-Subject: [PATCH 076/140] Revert JDK-8262074: Consolidate the default value of
- MetaspaceSize
-
----
- src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 1 +
- src/hotspot/cpu/riscv/c2_globals_riscv.hpp | 3 +++
- 2 files changed, 4 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-index fd25f8f9afd..1c55a23eecf 100644
---- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-@@ -53,6 +53,7 @@ define_pd_global(bool, ProfileInterpreter,           false);
- define_pd_global(intx, CodeCacheExpansionSize,       32*K );
- define_pd_global(uintx, CodeCacheMinBlockLength,     1);
- define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
-+define_pd_global(uintx, MetaspaceSize,               12*M );
- define_pd_global(bool, NeverActAsServerClassMachine, true );
- define_pd_global(uint64_t, MaxRAM,                  1ULL*G);
- define_pd_global(bool, CICompileOSR,                 true );
-diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-index 53a41665f4b..d9e5fcc1bb0 100644
---- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-@@ -75,6 +75,9 @@ define_pd_global(intx, NonNMethodCodeHeapSize,       5*M );
- define_pd_global(uintx, CodeCacheMinBlockLength,     6);
- define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
- 
-+// Heap related flags
-+define_pd_global(uintx,MetaspaceSize,    ScaleForWordSize(16*M));
-+
- // Ergonomics related flags
- define_pd_global(bool, NeverActAsServerClassMachine, false);
- 
-
-From a3e991b37781d90c822471b54ace915622bee0da Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 18:48:15 +0800
-Subject: [PATCH 077/140] Revert JDK-8246023: Obsolete LIRFillDelaySlot
-
----
- src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-index 1c55a23eecf..bd8d039de03 100644
---- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-@@ -60,6 +60,7 @@ define_pd_global(bool, CICompileOSR,                 true );
- #endif // !COMPILER2
- define_pd_global(bool, UseTypeProfile,               false);
- 
-+define_pd_global(bool, LIRFillDelaySlots,            false);
- define_pd_global(bool, OptimizeSinglePrecision,      true );
- define_pd_global(bool, CSEArrayLength,               false);
- define_pd_global(bool, TwoOperandLIRForm,            false);
-
-From 9f6082ae9810e6a26c6803cb37cce62297d15a74 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 18:50:27 +0800
-Subject: [PATCH 078/140] Revert JDK-8136414: Large performance penalty
- declaring a method strictfp on strict-only platforms
-
----
- src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-index bd8d039de03..16a87b7aced 100644
---- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-@@ -59,6 +59,7 @@ define_pd_global(uint64_t, MaxRAM,                  1ULL*G);
- define_pd_global(bool, CICompileOSR,                 true );
- #endif // !COMPILER2
- define_pd_global(bool, UseTypeProfile,               false);
-+define_pd_global(bool, RoundFPResults,               true );
- 
- define_pd_global(bool, LIRFillDelaySlots,            false);
- define_pd_global(bool, OptimizeSinglePrecision,      true );
-
-From fbf03fc61be068f7f7c8ca1ab3854cc05519c5a3 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 18:58:36 +0800
-Subject: [PATCH 079/140] Revert JDK-8251462: Simplify compilation policy
-
----
- src/hotspot/cpu/riscv/c1_globals_riscv.hpp    |   4 +-
- src/hotspot/cpu/riscv/c2_globals_riscv.hpp    |   2 +-
- src/hotspot/cpu/riscv/globals_riscv.hpp       |   2 +-
- .../templateInterpreterGenerator_riscv.cpp    | 114 +++++++++---
- src/hotspot/cpu/riscv/templateTable_riscv.cpp | 176 ++++++++++++------
- 5 files changed, 210 insertions(+), 88 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-index 16a87b7aced..8f2f4e0e81d 100644
---- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-@@ -32,7 +32,7 @@
- // Sets the default values for platform dependent flags used by the client compiler.
- // (see c1_globals.hpp)
- 
--#ifndef COMPILER2
-+#ifndef TIERED
- define_pd_global(bool, BackgroundCompilation,        true );
- define_pd_global(bool, InlineIntrinsics,             true );
- define_pd_global(bool, PreferInterpreterNativeStubs, false);
-@@ -57,7 +57,7 @@ define_pd_global(uintx, MetaspaceSize,               12*M );
- define_pd_global(bool, NeverActAsServerClassMachine, true );
- define_pd_global(uint64_t, MaxRAM,                  1ULL*G);
- define_pd_global(bool, CICompileOSR,                 true );
--#endif // !COMPILER2
-+#endif // !TIERED
- define_pd_global(bool, UseTypeProfile,               false);
- define_pd_global(bool, RoundFPResults,               true );
- 
-diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-index d9e5fcc1bb0..6c301cdae04 100644
---- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-@@ -39,7 +39,7 @@ define_pd_global(bool, PreferInterpreterNativeStubs, false);
- define_pd_global(bool, ProfileTraps,                 true);
- define_pd_global(bool, UseOnStackReplacement,        true);
- define_pd_global(bool, ProfileInterpreter,           true);
--define_pd_global(bool, TieredCompilation,            COMPILER1_PRESENT(true) NOT_COMPILER1(false));
-+define_pd_global(bool, TieredCompilation,            trueInTiered);
- define_pd_global(intx, CompileThreshold,             10000);
- 
- define_pd_global(intx, OnStackReplacePercentage,     140);
-diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
-index 50bbb6a77b8..b78f258a764 100644
---- a/src/hotspot/cpu/riscv/globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
-@@ -36,7 +36,7 @@ define_pd_global(bool, ImplicitNullChecks,       true);  // Generate code for im
- define_pd_global(bool, TrapBasedNullChecks,      false);
- define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs past to check cast
- 
--define_pd_global(uintx, CodeCacheSegmentSize,    64 COMPILER1_AND_COMPILER2_PRESENT(+64)); // Tiered compilation has large code-entry alignment.
-+define_pd_global(uintx, CodeCacheSegmentSize,    64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment.
- define_pd_global(intx, CodeEntryAlignment,       64);
- define_pd_global(intx, OptoLoopAlignment,        16);
- 
-diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-index a10677bf650..8aea4eca048 100644
---- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-@@ -556,31 +556,81 @@ address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state,
- //
- // xmethod: method
- //
--void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow) {
-+void TemplateInterpreterGenerator::generate_counter_incr(
-+        Label* overflow,
-+        Label* profile_method,
-+        Label* profile_method_continue) {
-   Label done;
-   // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not.
--  int increment = InvocationCounter::count_increment;
--  Label no_mdo;
--  if (ProfileInterpreter) {
--    // Are we profiling?
--    __ ld(x10, Address(xmethod, Method::method_data_offset()));
--    __ beqz(x10, no_mdo);
--    // Increment counter in the MDO
--    const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) +
--                                         in_bytes(InvocationCounter::counter_offset()));
--    const Address mask(x10, in_bytes(MethodData::invoke_mask_offset()));
--    __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow);
--    __ j(done);
-+  if (TieredCompilation) {
-+    int increment = InvocationCounter::count_increment;
-+    Label no_mdo;
-+    if (ProfileInterpreter) {
-+      // Are we profiling?
-+      __ ld(x10, Address(xmethod, Method::method_data_offset()));
-+      __ beqz(x10, no_mdo);
-+      // Increment counter in the MDO
-+      const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) +
-+                                                in_bytes(InvocationCounter::counter_offset()));
-+      const Address mask(x10, in_bytes(MethodData::invoke_mask_offset()));
-+      __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow);
-+      __ j(done);
-+    }
-+    __ bind(no_mdo);
-+    // Increment counter in MethodCounters
-+    const Address invocation_counter(t1,
-+                  MethodCounters::invocation_counter_offset() +
-+                  InvocationCounter::counter_offset());
-+    __ get_method_counters(xmethod, t1, done);
-+    const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset()));
-+    __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow);
-+    __ bind(done);
-+  } else { // not TieredCompilation
-+    const Address backedge_counter(t1,
-+                  MethodCounters::backedge_counter_offset() +
-+                  InvocationCounter::counter_offset());
-+    const Address invocation_counter(t1,
-+                  MethodCounters::invocation_counter_offset() +
-+                  InvocationCounter::counter_offset());
-+
-+    __ get_method_counters(xmethod, t1, done);
-+
-+    if (ProfileInterpreter) { // %%% Merge this into MethodData*
-+      __ lwu(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset()));
-+      __ addw(x11, x11, 1);
-+      __ sw(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset()));
-+    }
-+    // Update standard invocation counters
-+    __ lwu(x11, invocation_counter);
-+    __ lwu(x10, backedge_counter);
-+
-+    __ addw(x11, x11, InvocationCounter::count_increment);
-+    __ andi(x10, x10, InvocationCounter::count_mask_value);
-+
-+    __ sw(x11, invocation_counter);
-+    __ addw(x10, x10, x11);                // add both counters
-+
-+    // profile_method is non-null only for interpreted method so
-+    // profile_method != NULL == !native_call
-+
-+    if (ProfileInterpreter && profile_method != NULL) {
-+      // Test to see if we should create a method data oop
-+      __ ld(t1, Address(xmethod, Method::method_counters_offset()));
-+      __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset())));
-+      __ blt(x10, t1, *profile_method_continue);
-+
-+      // if no method data exists, go to profile_method
-+      __ test_method_data_pointer(t1, *profile_method);
-+    }
-+
-+    {
-+      __ ld(t1, Address(xmethod, Method::method_counters_offset()));
-+      __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_invocation_limit_offset())));
-+      __ bltu(x10, t1, done);
-+      __ j(*overflow);
-+    }
-+    __ bind(done);
-   }
--  __ bind(no_mdo);
--  // Increment counter in MethodCounters
--  const Address invocation_counter(t1,
--                                   MethodCounters::invocation_counter_offset() +
--                                   InvocationCounter::counter_offset());
--  __ get_method_counters(xmethod, t1, done);
--  const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset()));
--  __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow);
--  __ bind(done);
- }
- 
- void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) {
-@@ -977,7 +1027,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
-   // increment invocation count & check for overflow
-   Label invocation_counter_overflow;
-   if (inc_counter) {
--    generate_counter_incr(&invocation_counter_overflow);
-+    generate_counter_incr(&invocation_counter_overflow, NULL, NULL);
-   }
- 
-   Label continue_after_compile;
-@@ -1389,8 +1439,15 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
- 
-   // increment invocation count & check for overflow
-   Label invocation_counter_overflow;
-+  Label profile_method;
-+  Label profile_method_continue;
-   if (inc_counter) {
--    generate_counter_incr(&invocation_counter_overflow);
-+    generate_counter_incr(&invocation_counter_overflow,
-+                          &profile_method,
-+                          &profile_method_continue);
-+    if (ProfileInterpreter) {
-+      __ bind(profile_method_continue);
-+    }
-   }
- 
-   Label continue_after_compile;
-@@ -1427,6 +1484,15 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
- 
-   // invocation counter overflow
-   if (inc_counter) {
-+    if (ProfileInterpreter) {
-+      // We have decided to profile this method in the interpreter
-+      __ bind(profile_method);
-+      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
-+      __ set_method_data_pointer_for_bcp();
-+      // don't think we need this
-+      __ get_method(x11);
-+      __ j(profile_method_continue);
-+    }
-     // Handle overflow of counter and compile method
-     __ bind(invocation_counter_overflow);
-     generate_counter_overflow(continue_after_compile);
-diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-index ddc9498dddc..bb20f228447 100644
---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-@@ -1745,6 +1745,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide)
-   assert(UseLoopCounter || !UseOnStackReplacement,
-          "on-stack-replacement requires loop counters");
-   Label backedge_counter_overflow;
-+  Label profile_method;
-   Label dispatch;
-   if (UseLoopCounter) {
-     // increment backedge counter for backward branches
-@@ -1769,31 +1770,75 @@ void TemplateTable::branch(bool is_jsr, bool is_wide)
-     __ beqz(t0, dispatch); // No MethodCounters allocated, OutOfMemory
-     __ bind(has_counters);
- 
--    Label no_mdo;
--    int increment = InvocationCounter::count_increment;
--    if (ProfileInterpreter) {
--      // Are we profiling?
--      __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset())));
--      __ beqz(x11, no_mdo);
--      // Increment the MDO backedge counter
--      const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) +
--                                         in_bytes(InvocationCounter::counter_offset()));
--      const Address mask(x11, in_bytes(MethodData::backedge_mask_offset()));
--      __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
--                                 x10, t0, false,
-+    if (TieredCompilation) {
-+      Label no_mdo;
-+      int increment = InvocationCounter::count_increment;
-+      if (ProfileInterpreter) {
-+        // Are we profiling?
-+        __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset())));
-+        __ beqz(x11, no_mdo);
-+        // Increment the MDO backedge counter
-+        const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) +
-+                                           in_bytes(InvocationCounter::counter_offset()));
-+        const Address mask(x11, in_bytes(MethodData::backedge_mask_offset()));
-+        __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
-+                                   x10, t0, false,
-+                                   UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
-+        __ j(dispatch);
-+      }
-+      __ bind(no_mdo);
-+      // Increment backedge counter in MethodCounters*
-+      __ ld(t0, Address(xmethod, Method::method_counters_offset()));
-+      const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset()));
-+      __ increment_mask_and_jump(Address(t0, be_offset), increment, mask,
-+                                 x10, t1, false,
-                                  UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
--      __ j(dispatch);
-+    } else { // not TieredCompilation
-+      // increment counter
-+      __ ld(t1, Address(xmethod, Method::method_counters_offset()));
-+      __ lwu(x10, Address(t1, be_offset));     // load backedge counter
-+      __ addw(t0, x10, InvocationCounter::count_increment); // increment counter
-+      __ sw(t0, Address(t1, be_offset));       // store counter
-+
-+      __ lwu(x10, Address(t1, inv_offset));    // load invocation counter
-+      __ andi(x10, x10, (unsigned)InvocationCounter::count_mask_value, x13); // and the status bits
-+      __ addw(x10, x10, t0);        // add both counters
-+
-+      if (ProfileInterpreter) {
-+        // Test to see if we should create a method data oop
-+        __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset())));
-+        __ blt(x10, t0, dispatch);
-+
-+        // if no method data exists, go to profile method
-+        __ test_method_data_pointer(x10, profile_method);
-+
-+        if (UseOnStackReplacement) {
-+          // check for overflow against x11 which is the MDO taken count
-+          __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset())));
-+          __ bltu(x11, t0, dispatch); // Intel == Assembler::below, lo:unsigned lower
-+
-+          // When ProfileInterpreter is on, the backedge_count comes
-+          // from the MethodData*, which value does not get reset on
-+          // the call to frequency_counter_overflow().  To avoid
-+          // excessive calls to the overflow routine while the method is
-+          // being compiled, add a second test to make sure the overflow
-+          // function is called only once every overflow_frequency.
-+          const int overflow_frequency = 1024;
-+          __ andi(x11, x11, overflow_frequency - 1);
-+          __ beqz(x11, backedge_counter_overflow);
-+
-+        }
-+      } else {
-+        if (UseOnStackReplacement) {
-+          // check for overflow against x10, which is the sum of the
-+          // counters
-+          __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset())));
-+          __ bgeu(x10, t0, backedge_counter_overflow); // Intel == Assembler::aboveEqual
-+        }
-+      }
-     }
--    __ bind(no_mdo);
--    // Increment backedge counter in MethodCounters*
--    __ ld(t0, Address(xmethod, Method::method_counters_offset()));
--    const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset()));
--    __ increment_mask_and_jump(Address(t0, be_offset), increment, mask,
--                               x10, t1, false,
--                               UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
-     __ bind(dispatch);
-   }
--
-   // Pre-load the next target bytecode into t0
-   __ load_unsigned_byte(t0, Address(xbcp, 0));
- 
-@@ -1802,52 +1847,63 @@ void TemplateTable::branch(bool is_jsr, bool is_wide)
-   // xbcp: target bcp
-   __ dispatch_only(vtos, /*generate_poll*/true);
- 
--  if (UseLoopCounter && UseOnStackReplacement) {
--    // invocation counter overflow
--    __ bind(backedge_counter_overflow);
--    __ neg(x12, x12);
--    __ add(x12, x12, xbcp);     // branch xbcp
--    // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp)
--    __ call_VM(noreg,
--               CAST_FROM_FN_PTR(address,
--                                InterpreterRuntime::frequency_counter_overflow),
--               x12);
--    __ load_unsigned_byte(x11, Address(xbcp, 0));  // restore target bytecode
--
--    // x10: osr nmethod (osr ok) or NULL (osr not possible)
--    // w11: target bytecode
--    // x12: temporary
--    __ beqz(x10, dispatch);     // test result -- no osr if null
--    // nmethod may have been invalidated (VM may block upon call_VM return)
--    __ lbu(x12, Address(x10, nmethod::state_offset()));
--    if (nmethod::in_use != 0) {
--      __ sub(x12, x12, nmethod::in_use);
-+  if (UseLoopCounter) {
-+    if (ProfileInterpreter && !TieredCompilation) {
-+      // Out-of-line code to allocate method data oop.
-+      __ bind(profile_method);
-+      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
-+      __ load_unsigned_byte(x11, Address(xbcp, 0));  // restore target bytecode
-+      __ set_method_data_pointer_for_bcp();
-+      __ j(dispatch);
-     }
--    __ bnez(x12, dispatch);
- 
--    // We have the address of an on stack replacement routine in x10
--    // We need to prepare to execute the OSR method. First we must
--    // migrate the locals and monitors off of the stack.
-+    if (UseOnStackReplacement) {
-+      // invocation counter overflow
-+      __ bind(backedge_counter_overflow);
-+      __ neg(x12, x12);
-+      __ add(x12, x12, xbcp);     // branch xbcp
-+      // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp)
-+      __ call_VM(noreg,
-+                 CAST_FROM_FN_PTR(address,
-+                                  InterpreterRuntime::frequency_counter_overflow),
-+                 x12);
-+      __ load_unsigned_byte(x11, Address(xbcp, 0));  // restore target bytecode
-+
-+      // x10: osr nmethod (osr ok) or NULL (osr not possible)
-+      // w11: target bytecode
-+      // x12: temporary
-+      __ beqz(x10, dispatch);     // test result -- no osr if null
-+      // nmethod may have been invalidated (VM may block upon call_VM return)
-+      __ lbu(x12, Address(x10, nmethod::state_offset()));
-+      if (nmethod::in_use != 0) {
-+        __ sub(x12, x12, nmethod::in_use);
-+      }
-+      __ bnez(x12, dispatch);
-+
-+      // We have the address of an on stack replacement routine in x10
-+      // We need to prepare to execute the OSR method. First we must
-+      // migrate the locals and monitors off of the stack.
- 
--    __ mv(x9, x10);                             // save the nmethod
-+      __ mv(x9, x10);                             // save the nmethod
- 
--    call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
-+      call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
- 
--    // x10 is OSR buffer, move it to expected parameter location
--    __ mv(j_rarg0, x10);
-+      // x10 is OSR buffer, move it to expected parameter location
-+      __ mv(j_rarg0, x10);
- 
--    // remove activation
--    // get sender esp
--    __ ld(esp,
--        Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize));
--    // remove frame anchor
--    __ leave();
--    // Ensure compiled code always sees stack at proper alignment
--    __ andi(sp, esp, -16);
-+      // remove activation
-+      // get sender esp
-+      __ ld(esp,
-+          Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize));
-+      // remove frame anchor
-+      __ leave();
-+      // Ensure compiled code always sees stack at proper alignment
-+      __ andi(sp, esp, -16);
- 
--    // and begin the OSR nmethod
--    __ ld(t0, Address(x9, nmethod::osr_entry_point_offset()));
--    __ jr(t0);
-+      // and begin the OSR nmethod
-+      __ ld(t0, Address(x9, nmethod::osr_entry_point_offset()));
-+      __ jr(t0);
-+    }
-   }
- }
- 
-
-From b1f3fd0510681324d70028443a3532d6084be504 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 27 Apr 2023 11:37:05 +0800
-Subject: [PATCH 080/140] Revert JDK-8250902: Implement MD5 Intrinsics on x86
-
----
- src/hotspot/cpu/riscv/vm_version_riscv.cpp    |  5 ----
- ...nericTestCaseForUnsupportedRISCV64CPU.java | 30 +++++++++----------
- 2 files changed, 15 insertions(+), 20 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-index c0491d23fa6..d4b79162d84 100644
---- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-@@ -97,11 +97,6 @@ void VM_Version::initialize() {
-     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
-   }
- 
--  if (UseMD5Intrinsics) {
--    warning("MD5 intrinsics are not available on this CPU.");
--    FLAG_SET_DEFAULT(UseMD5Intrinsics, false);
--  }
--
-   if (UseRVV) {
-     if (!(_features & CPU_V)) {
-       warning("RVV is not supported on this CPU");
-diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
-index 2ecfec07a4c..8566d57c391 100644
---- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
-+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
-@@ -24,7 +24,7 @@
- 
- package compiler.intrinsics.sha.cli.testcases;
- 
--import compiler.intrinsics.sha.cli.DigestOptionsBase;
-+import compiler.intrinsics.sha.cli.SHAOptionsBase;
- import jdk.test.lib.process.ExitCode;
- import jdk.test.lib.Platform;
- import jdk.test.lib.cli.CommandLineOptionTest;
-@@ -36,7 +36,7 @@
-  * which don't support instruction required by the tested option.
-  */
- public class GenericTestCaseForUnsupportedRISCV64CPU extends
--        DigestOptionsBase.TestCase {
-+        SHAOptionsBase.TestCase {
- 
-     final private boolean checkUseSHA;
- 
-@@ -46,7 +46,7 @@ public GenericTestCaseForUnsupportedRISCV64CPU(String optionName) {
- 
-     public GenericTestCaseForUnsupportedRISCV64CPU(String optionName, boolean checkUseSHA) {
-         super(optionName, new AndPredicate(Platform::isRISCV64,
--                new NotPredicate(DigestOptionsBase.getPredicateForOption(
-+                new NotPredicate(SHAOptionsBase.getPredicateForOption(
-                         optionName))));
- 
-         this.checkUseSHA = checkUseSHA;
-@@ -58,27 +58,27 @@ protected void verifyWarnings() throws Throwable {
-                 + "option '-XX:-%s' without any warnings", optionName);
-         //Verify that option could be disabled without any warnings.
-         CommandLineOptionTest.verifySameJVMStartup(null, new String[] {
--                        DigestOptionsBase.getWarningForUnsupportedCPU(optionName)
-+                        SHAOptionsBase.getWarningForUnsupportedCPU(optionName)
-                 }, shouldPassMessage, shouldPassMessage, ExitCode.OK,
--                DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-+                SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-                 CommandLineOptionTest.prepareBooleanFlag(optionName, false));
- 
-         if (checkUseSHA) {
-             shouldPassMessage = String.format("If JVM is started with '-XX:-"
-                     + "%s' '-XX:+%s', output should contain warning.",
--                    DigestOptionsBase.USE_SHA_OPTION, optionName);
-+                    SHAOptionsBase.USE_SHA_OPTION, optionName);
- 
-             // Verify that when the tested option is enabled, then
-             // a warning will occur in VM output if UseSHA is disabled.
--            if (!optionName.equals(DigestOptionsBase.USE_SHA_OPTION)) {
-+            if (!optionName.equals(SHAOptionsBase.USE_SHA_OPTION)) {
-                 CommandLineOptionTest.verifySameJVMStartup(
--                        new String[] { DigestOptionsBase.getWarningForUnsupportedCPU(optionName) },
-+                        new String[] { SHAOptionsBase.getWarningForUnsupportedCPU(optionName) },
-                         null,
-                         shouldPassMessage,
-                         shouldPassMessage,
-                         ExitCode.OK,
--                        DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
--                        CommandLineOptionTest.prepareBooleanFlag(DigestOptionsBase.USE_SHA_OPTION, false),
-+                        SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-+                        CommandLineOptionTest.prepareBooleanFlag(SHAOptionsBase.USE_SHA_OPTION, false),
-                         CommandLineOptionTest.prepareBooleanFlag(optionName, true));
-             }
-         }
-@@ -90,7 +90,7 @@ protected void verifyOptionValues() throws Throwable {
-         CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
-                 String.format("Option '%s' should be disabled by default",
-                         optionName),
--                DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS);
-+                SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS);
- 
-         if (checkUseSHA) {
-             // Verify that option is disabled even if it was explicitly enabled
-@@ -98,7 +98,7 @@ protected void verifyOptionValues() throws Throwable {
-             CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
-                     String.format("Option '%s' should be off on unsupported "
-                             + "RISCV64CPU even if set to true directly", optionName),
--                    DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-+                    SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-                     CommandLineOptionTest.prepareBooleanFlag(optionName, true));
- 
-             // Verify that option is disabled when +UseSHA was passed to JVM.
-@@ -106,10 +106,10 @@ protected void verifyOptionValues() throws Throwable {
-                     String.format("Option '%s' should be off on unsupported "
-                             + "RISCV64CPU even if %s flag set to JVM",
-                             optionName, CommandLineOptionTest.prepareBooleanFlag(
--                                DigestOptionsBase.USE_SHA_OPTION, true)),
--                    DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-+                                  SHAOptionsBase.USE_SHA_OPTION, true)),
-+                    SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-                     CommandLineOptionTest.prepareBooleanFlag(
--                            DigestOptionsBase.USE_SHA_OPTION, true));
-+                            SHAOptionsBase.USE_SHA_OPTION, true));
-         }
-     }
- }
-
-From b5e96cb7663b2def3a064b9aede7209fb0c5eeda Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 27 Apr 2023 15:41:48 +0800
-Subject: [PATCH 081/140] Revert JDK-8253555: Make ByteSize and WordSize typed
- scoped enums
-
----
- src/hotspot/cpu/riscv/assembler_riscv.hpp | 4 +++-
- 1 file changed, 3 insertions(+), 1 deletion(-)
-
-diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
-index 31aeeb9b425..9959ac1d02c 100644
---- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
-@@ -195,8 +195,10 @@ class Address {
-     : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
-   Address(Register r, unsigned long long o)
-     : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
-+#ifdef ASSERT
-   Address(Register r, ByteSize disp)
--    : Address(r, in_bytes(disp)) { }
-+    : _base(r), _index(noreg), _offset(in_bytes(disp)), _mode(base_plus_offset), _target(0) { }
-+#endif
-   Address(address target, RelocationHolder const& rspec)
-     : _base(noreg),
-       _index(noreg),
-
-From 592afab705a4d4c8b2773a0808e47efc2a14517d Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sat, 29 Apr 2023 15:18:12 +0800
-Subject: [PATCH 082/140] Revert JDK-8253457: Remove unimplemented register
- stack functions
-
----
- .../os_cpu/linux_riscv/thread_linux_riscv.hpp    | 16 ++++++++++++++++
- 1 file changed, 16 insertions(+)
-
-diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
-index 61e2cf85b63..313a7b932c3 100644
---- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
-+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
-@@ -34,15 +34,31 @@
-   frame pd_last_frame();
- 
-  public:
-+
-+  void set_base_of_stack_pointer(intptr_t* base_sp) {
-+  }
-+
-   static ByteSize last_Java_fp_offset()          {
-     return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
-   }
- 
-+  intptr_t* base_of_stack_pointer() {
-+    return NULL;
-+  }
-+  void record_base_of_stack_pointer() {
-+  }
-+
-   bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
-     bool isInJava);
- 
-   bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava);
- private:
-   bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
-+public:
-+  // These routines are only used on cpu architectures that
-+  // have separate register stacks (Itanium).
-+  static bool register_stack_overflow() { return false; }
-+  static void enable_register_stack_guard() {}
-+  static void disable_register_stack_guard() {}
- 
- #endif // OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP
-
-From 28238cf776bd25c9805d9dd686c08fe8d3a1500b Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sat, 29 Apr 2023 15:22:30 +0800
-Subject: [PATCH 083/140] Revert JDK-8253539: Remove unused JavaThread
- functions for set_last_Java_fp/pc
-
----
- src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp       | 3 +++
- src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp | 3 +++
- 2 files changed, 6 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
-index 9a6084afa1d..5a0c9b812fc 100644
---- a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
-@@ -83,4 +83,7 @@
- 
-   intptr_t* last_Java_fp(void)                   { return _last_Java_fp; }
- 
-+  // Assert (last_Java_sp == NULL || fp == NULL)
-+  void set_last_Java_fp(intptr_t* fp)            { OrderAccess::release(); _last_Java_fp = fp; }
-+
- #endif // CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
-index 313a7b932c3..4b91fa855ae 100644
---- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
-+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
-@@ -34,6 +34,9 @@
-   frame pd_last_frame();
- 
-  public:
-+  // Mutators are highly dangerous....
-+  intptr_t* last_Java_fp()                       { return _anchor.last_Java_fp(); }
-+  void  set_last_Java_fp(intptr_t* fp)           { _anchor.set_last_Java_fp(fp);   }
- 
-   void set_base_of_stack_pointer(intptr_t* base_sp) {
-   }
-
-From f9322bb6235b603eac825c6e6751093ada1e6cfe Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sat, 29 Apr 2023 15:45:56 +0800
-Subject: [PATCH 084/140] Revert JDK-8269853: Prefetch::read should accept
- pointer to const
-
----
- src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
-index a6432c84ec7..2bd48e09c34 100644
---- a/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
-+++ b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
-@@ -29,7 +29,7 @@
- #include "runtime/prefetch.hpp"
- 
- 
--inline void Prefetch::read (const void *loc, intx interval) {
-+inline void Prefetch::read (void *loc, intx interval) {
- }
- 
- inline void Prefetch::write(void *loc, intx interval) {
-
-From aa6f7320d8d849b8e47b6e77a20257e3d99fd14f Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sat, 29 Apr 2023 16:14:55 +0800
-Subject: [PATCH 085/140] Revert: JDK-8254231: Implementation of Foreign Linker
- API (Incubator) JDK-8264774: Implementation of Foreign Function and Memory
- API (Incubator)
-
----
- .../cpu/riscv/foreign_globals_riscv.cpp       | 44 -------------------
- .../cpu/riscv/foreign_globals_riscv.hpp       | 32 --------------
- src/hotspot/cpu/riscv/frame_riscv.cpp         | 15 -------
- src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 12 +----
- src/hotspot/cpu/riscv/riscv.ad                |  5 ---
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 10 +----
- .../riscv/universalNativeInvoker_riscv.cpp    | 33 --------------
- .../cpu/riscv/universalUpcallHandle_riscv.cpp | 42 ------------------
- src/hotspot/cpu/riscv/vmreg_riscv.cpp         |  5 ---
- 9 files changed, 2 insertions(+), 196 deletions(-)
- delete mode 100644 src/hotspot/cpu/riscv/foreign_globals_riscv.cpp
- delete mode 100644 src/hotspot/cpu/riscv/foreign_globals_riscv.hpp
- delete mode 100644 src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp
- delete mode 100644 src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp
-
-diff --git a/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp b/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp
-deleted file mode 100644
-index 5c700be9c91..00000000000
---- a/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp
-+++ /dev/null
-@@ -1,44 +0,0 @@
--/*
-- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#include "precompiled.hpp"
--#include "prims/foreign_globals.hpp"
--#include "utilities/debug.hpp"
--
--// Stubbed out, implement later
--const ABIDescriptor ForeignGlobals::parse_abi_descriptor_impl(jobject jabi) const {
--  Unimplemented();
--  return {};
--}
--
--const BufferLayout ForeignGlobals::parse_buffer_layout_impl(jobject jlayout) const {
--  Unimplemented();
--  return {};
--}
--
--const CallRegs ForeignGlobals::parse_call_regs_impl(jobject jconv) const {
--  ShouldNotCallThis();
--  return {};
--}
-diff --git a/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp b/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp
-deleted file mode 100644
-index 3ac89752c27..00000000000
---- a/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp
-+++ /dev/null
-@@ -1,32 +0,0 @@
--/*
-- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#ifndef CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP
--#define CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP
--
--class ABIDescriptor {};
--class BufferLayout {};
--
--#endif // CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
-index 050595389e9..40ec584b994 100644
---- a/src/hotspot/cpu/riscv/frame_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
-@@ -361,21 +361,6 @@ frame frame::sender_for_entry_frame(RegisterMap* map) const {
-   return fr;
- }
- 
--OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const {
--  ShouldNotCallThis();
--  return nullptr;
--}
--
--bool frame::optimized_entry_frame_is_first() const {
--  ShouldNotCallThis();
--  return false;
--}
--
--frame frame::sender_for_optimized_entry_frame(RegisterMap* map) const {
--  ShouldNotCallThis();
--  return {};
--}
--
- //------------------------------------------------------------------------------
- // frame::verify_deopt_original_pc
- //
-diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-index 1f7c0c87c21..3bf5cfb16c3 100644
---- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-@@ -181,13 +181,6 @@ address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler*
-     return NULL;
-   }
- 
--  // No need in interpreter entry for linkToNative for now.
--  // Interpreter calls compiled entry through i2c.
--  if (iid == vmIntrinsics::_linkToNative) {
--    __ ebreak();
--    return NULL;
--  }
--
-   // x30: sender SP (must preserve; see prepare_to_jump_from_interpreted)
-   // xmethod: Method*
-   // x13: argument locator (parameter slot count, added to sp)
-@@ -280,10 +273,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
-   assert_different_registers(temp1, temp2, temp3, receiver_reg);
-   assert_different_registers(temp1, temp2, temp3, member_reg);
- 
--  if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) {
--    if (iid == vmIntrinsics::_linkToNative) {
--      assert(for_compiler_entry, "only compiler entry is supported");
--    }
-+  if (iid == vmIntrinsics::_invokeBasic) {
-     // indirect through MH.form.vmentry.vmtarget
-     jump_to_lambda_form(_masm, receiver_reg, xmethod, temp1, for_compiler_entry);
-   } else {
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 1667994699f..7ec76e72ff0 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -945,11 +945,6 @@ int MachCallRuntimeNode::ret_addr_offset() {
-   }
- }
- 
--int MachCallNativeNode::ret_addr_offset() {
--  Unimplemented();
--  return -1;
--}
--
- //
- // Compute padding required for nodes which need alignment
- //
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-index 411bddd2ace..897dafcc99c 100644
---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -1037,7 +1037,7 @@ static void gen_special_dispatch(MacroAssembler* masm,
-     member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
-     member_reg = x9;  // known to be free at this point
-     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
--  } else if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) {
-+  } else if (iid == vmIntrinsics::_invokeBasic) {
-     has_receiver = true;
-   } else {
-     fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid));
-@@ -2566,14 +2566,6 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha
- }
- 
- #ifdef COMPILER2
--RuntimeStub* SharedRuntime::make_native_invoker(address call_target,
--                                                int shadow_space_bytes,
--                                                const GrowableArray<VMReg>& input_registers,
--                                                const GrowableArray<VMReg>& output_registers) {
--  Unimplemented();
--  return nullptr;
--}
--
- //------------------------------generate_exception_blob---------------------------
- // creates exception blob at the end
- // Using exception blob, this code is jumped from a compiled method.
-diff --git a/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp b/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp
-deleted file mode 100644
-index 4f50adb05c3..00000000000
---- a/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp
-+++ /dev/null
-@@ -1,33 +0,0 @@
--/*
-- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#include "precompiled.hpp"
--#include "prims/universalNativeInvoker.hpp"
--#include "utilities/debug.hpp"
--
--address ProgrammableInvoker::generate_adapter(jobject jabi, jobject jlayout) {
--  Unimplemented();
--  return nullptr;
--}
-diff --git a/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp b/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp
-deleted file mode 100644
-index ce70da72f2e..00000000000
---- a/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp
-+++ /dev/null
-@@ -1,42 +0,0 @@
--/*
-- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#include "precompiled.hpp"
--#include "prims/universalUpcallHandler.hpp"
--#include "utilities/debug.hpp"
--
--address ProgrammableUpcallHandler::generate_upcall_stub(jobject jrec, jobject jabi, jobject jlayout) {
--  Unimplemented();
--  return nullptr;
--}
--
--address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject mh, Method* entry, jobject jabi, jobject jconv) {
--  ShouldNotCallThis();
--  return nullptr;
--}
--
--bool ProgrammableUpcallHandler::supports_optimized_upcalls() {
--  return false;
--}
-diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
-index 1f6eff96cba..5d1187c2a27 100644
---- a/src/hotspot/cpu/riscv/vmreg_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
-@@ -49,8 +49,3 @@ void VMRegImpl::set_regName() {
-     regName[i] = "NON-GPR-FPR";
-   }
- }
--
--VMReg VMRegImpl::vmStorageToVMReg(int type, int index) {
--  Unimplemented();
--  return VMRegImpl::Bad();
--}
-
-From a5889735a97f3712bb649c454dee192d75457f96 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sat, 29 Apr 2023 17:35:20 +0800
-Subject: [PATCH 086/140] Revert JDK-8256254: Convert vmIntrinsics::ID to enum
- class
-
----
- src/hotspot/cpu/riscv/interp_masm_riscv.cpp   | 2 +-
- src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 2 +-
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 2 +-
- 3 files changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-index 006fe49b155..1133e80a210 100644
---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-@@ -1841,7 +1841,7 @@ void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret,
-       beq(t0, tmp, do_profile);
-       get_method(tmp);
-       lhu(t0, Address(tmp, Method::intrinsic_id_offset_in_bytes()));
--      li(t1, static_cast<int>(vmIntrinsics::_compiledLambdaForm));
-+      li(t1, vmIntrinsics::_compiledLambdaForm);
-       bne(t0, t1, profile_continue);
-       bind(do_profile);
-     }
-diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-index 3bf5cfb16c3..4442b5991b1 100644
---- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-@@ -411,7 +411,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
-       }
- 
-       default:
--        fatal("unexpected intrinsic %d: %s", vmIntrinsics::as_int(iid), vmIntrinsics::name_at(iid));
-+        fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid));
-         break;
-     }
- 
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-index 897dafcc99c..5b934b04e8e 100644
---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -1040,7 +1040,7 @@ static void gen_special_dispatch(MacroAssembler* masm,
-   } else if (iid == vmIntrinsics::_invokeBasic) {
-     has_receiver = true;
-   } else {
--    fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid));
-+    fatal("unexpected intrinsic id %d", iid);
-   }
- 
-   if (member_reg != noreg) {
-
-From 245d01e2cae27e41b875450f5f92751e4f36a095 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 20:27:58 +0800
-Subject: [PATCH 087/140] Revert JDK-8216557: Aarch64: Add support for
- Concurrent Class Unloading
-
----
- .../cpu/riscv/c1_MacroAssembler_riscv.cpp     |   4 -
- .../gc/shared/barrierSetAssembler_riscv.cpp   |  71 --------
- .../gc/shared/barrierSetAssembler_riscv.hpp   |   3 -
- .../gc/shared/barrierSetNMethod_riscv.cpp     | 171 ------------------
- .../cpu/riscv/macroAssembler_riscv.cpp        |  35 +---
- .../cpu/riscv/macroAssembler_riscv.hpp        |   2 -
- src/hotspot/cpu/riscv/relocInfo_riscv.cpp     |   1 -
- src/hotspot/cpu/riscv/riscv.ad                |  16 --
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp |   7 -
- src/hotspot/cpu/riscv/stubGenerator_riscv.cpp |  49 -----
- src/hotspot/cpu/riscv/stubRoutines_riscv.cpp  |   1 -
- src/hotspot/cpu/riscv/stubRoutines_riscv.hpp  |   6 -
- 12 files changed, 5 insertions(+), 361 deletions(-)
- delete mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp
-
-diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-index 44ceccd8bd1..a6d1b1470f9 100644
---- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-@@ -322,10 +322,6 @@ void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) {
-   // Note that we do this before creating a frame.
-   generate_stack_overflow_check(bang_size_in_bytes);
-   MacroAssembler::build_frame(framesize);
--
--  // Insert nmethod entry barrier into frame.
--  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
--  bs->nmethod_entry_barrier(this);
- }
- 
- void C1_MacroAssembler::remove_frame(int framesize) {
-diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
-index 3c115a2ea02..2b556b95d71 100644
---- a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
-@@ -27,7 +27,6 @@
- #include "classfile/classLoaderData.hpp"
- #include "gc/shared/barrierSet.hpp"
- #include "gc/shared/barrierSetAssembler.hpp"
--#include "gc/shared/barrierSetNMethod.hpp"
- #include "gc/shared/collectedHeap.hpp"
- #include "interpreter/interp_masm.hpp"
- #include "memory/universe.hpp"
-@@ -230,73 +229,3 @@ void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm,
-   }
-   __ sd(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset())));
- }
--
--void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {
--  BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
--
--  if (bs_nm == NULL) {
--    return;
--  }
--
--  // RISCV atomic operations require that the memory address be naturally aligned.
--  __ align(4);
--
--  Label skip, guard;
--  Address thread_disarmed_addr(xthread, in_bytes(bs_nm->thread_disarmed_offset()));
--
--  __ lwu(t0, guard);
--
--  // Subsequent loads of oops must occur after load of guard value.
--  // BarrierSetNMethod::disarm sets guard with release semantics.
--  __ membar(MacroAssembler::LoadLoad);
--  __ lwu(t1, thread_disarmed_addr);
--  __ beq(t0, t1, skip);
--
--  int32_t offset = 0;
--  __ movptr_with_offset(t0, StubRoutines::riscv::method_entry_barrier(), offset);
--  __ jalr(ra, t0, offset);
--  __ j(skip);
--
--  __ bind(guard);
--
--  assert(__ offset() % 4 == 0, "bad alignment");
--  __ emit_int32(0); // nmethod guard value. Skipped over in common case.
--
--  __ bind(skip);
--}
--
--void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) {
--  BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod();
--  if (bs == NULL) {
--    return;
--  }
--
--  Label bad_call;
--  __ beqz(xmethod, bad_call);
--
--  // Pointer chase to the method holder to find out if the method is concurrently unloading.
--  Label method_live;
--  __ load_method_holder_cld(t0, xmethod);
--
--  // Is it a strong CLD?
--  __ lwu(t1, Address(t0, ClassLoaderData::keep_alive_offset()));
--  __ bnez(t1, method_live);
--
--  // Is it a weak but alive CLD?
--  __ push_reg(RegSet::of(x28, x29), sp);
--
--  __ ld(x28, Address(t0, ClassLoaderData::holder_offset()));
--
--  // Uses x28 & x29, so we must pass new temporaries.
--  __ resolve_weak_handle(x28, x29);
--  __ mv(t0, x28);
--
--  __ pop_reg(RegSet::of(x28, x29), sp);
--
--  __ bnez(t0, method_live);
--
--  __ bind(bad_call);
--
--  __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
--  __ bind(method_live);
--}
-diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
-index b85f7f5582b..984d94f4c3d 100644
---- a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
-@@ -28,7 +28,6 @@
- 
- #include "asm/macroAssembler.hpp"
- #include "gc/shared/barrierSet.hpp"
--#include "gc/shared/barrierSetNMethod.hpp"
- #include "memory/allocation.hpp"
- #include "oops/access.hpp"
- 
-@@ -71,8 +70,6 @@ class BarrierSetAssembler: public CHeapObj<mtGC> {
-   );
-   virtual void barrier_stubs_init() {}
- 
--  virtual void nmethod_entry_barrier(MacroAssembler* masm);
--  virtual void c2i_entry_barrier(MacroAssembler* masm);
-   virtual ~BarrierSetAssembler() {}
- };
- 
-diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp
-deleted file mode 100644
-index ae7ee4c5a44..00000000000
---- a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp
-+++ /dev/null
-@@ -1,171 +0,0 @@
--/*
-- * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#include "precompiled.hpp"
--#include "code/codeCache.hpp"
--#include "code/nativeInst.hpp"
--#include "gc/shared/barrierSetNMethod.hpp"
--#include "logging/log.hpp"
--#include "memory/resourceArea.hpp"
--#include "runtime/sharedRuntime.hpp"
--#include "runtime/registerMap.hpp"
--#include "runtime/thread.hpp"
--#include "utilities/align.hpp"
--#include "utilities/debug.hpp"
--
--class NativeNMethodBarrier: public NativeInstruction {
--  address instruction_address() const { return addr_at(0); }
--
--  int *guard_addr() {
--    /* auipc + lwu + fence + lwu + beq + lui + addi + slli + addi + slli + jalr + j */
--    return reinterpret_cast<int*>(instruction_address() + 12 * 4);
--  }
--
--public:
--  int get_value() {
--    return Atomic::load_acquire(guard_addr());
--  }
--
--  void set_value(int value) {
--    Atomic::release_store(guard_addr(), value);
--  }
--
--  void verify() const;
--};
--
--// Store the instruction bitmask, bits and name for checking the barrier.
--struct CheckInsn {
--  uint32_t mask;
--  uint32_t bits;
--  const char *name;
--};
--
--static const struct CheckInsn barrierInsn[] = {
--  { 0x00000fff, 0x00000297, "auipc  t0, 0           "},
--  { 0x000fffff, 0x0002e283, "lwu    t0, 48(t0)      "},
--  { 0xffffffff, 0x0aa0000f, "fence  ir, ir          "},
--  { 0x000fffff, 0x000be303, "lwu    t1, 112(xthread)"},
--  { 0x01fff07f, 0x00628063, "beq    t0, t1, skip    "},
--  { 0x00000fff, 0x000002b7, "lui    t0, imm0        "},
--  { 0x000fffff, 0x00028293, "addi   t0, t0, imm1    "},
--  { 0xffffffff, 0x00b29293, "slli   t0, t0, 11      "},
--  { 0x000fffff, 0x00028293, "addi   t0, t0, imm2    "},
--  { 0xffffffff, 0x00529293, "slli   t0, t0, 5       "},
--  { 0x000fffff, 0x000280e7, "jalr   ra, imm3(t0)    "},
--  { 0x00000fff, 0x0000006f, "j      skip            "}
--  /* guard: */
--  /* 32bit nmethod guard value */
--  /* skip: */
--};
--
--// The encodings must match the instructions emitted by
--// BarrierSetAssembler::nmethod_entry_barrier. The matching ignores the specific
--// register numbers and immediate values in the encoding.
--void NativeNMethodBarrier::verify() const {
--  intptr_t addr = (intptr_t) instruction_address();
--  for(unsigned int i = 0; i < sizeof(barrierInsn)/sizeof(struct CheckInsn); i++ ) {
--    uint32_t inst = *((uint32_t*) addr);
--    if ((inst & barrierInsn[i].mask) != barrierInsn[i].bits) {
--      tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", addr, inst);
--      fatal("not an %s instruction.", barrierInsn[i].name);
--    }
--    addr += 4;
--  }
--}
--
--
--/* We're called from an nmethod when we need to deoptimize it. We do
--   this by throwing away the nmethod's frame and jumping to the
--   ic_miss stub. This looks like there has been an IC miss at the
--   entry of the nmethod, so we resolve the call, which will fall back
--   to the interpreter if the nmethod has been unloaded. */
--void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) {
--
--  typedef struct {
--    intptr_t *sp; intptr_t *fp; address ra; address pc;
--  } frame_pointers_t;
--
--  frame_pointers_t *new_frame = (frame_pointers_t *)(return_address_ptr - 5);
--
--  JavaThread *thread = JavaThread::current();
--  RegisterMap reg_map(thread, false);
--  frame frame = thread->last_frame();
--
--  assert(frame.is_compiled_frame() || frame.is_native_frame(), "must be");
--  assert(frame.cb() == nm, "must be");
--  frame = frame.sender(&reg_map);
--
--  LogTarget(Trace, nmethod, barrier) out;
--  if (out.is_enabled()) {
--    ResourceMark mark;
--    log_trace(nmethod, barrier)("deoptimize(nmethod: %s(%p), return_addr: %p, osr: %d, thread: %p(%s), making rsp: %p) -> %p",
--                                nm->method()->name_and_sig_as_C_string(),
--                                nm, *(address *) return_address_ptr, nm->is_osr_method(), thread,
--                                thread->name(), frame.sp(), nm->verified_entry_point());
--  }
--
--  new_frame->sp = frame.sp();
--  new_frame->fp = frame.fp();
--  new_frame->ra = frame.pc();
--  new_frame->pc = SharedRuntime::get_handle_wrong_method_stub();
--}
--
--// This is the offset of the entry barrier from where the frame is completed.
--// If any code changes between the end of the verified entry where the entry
--// barrier resides, and the completion of the frame, then
--// NativeNMethodCmpBarrier::verify() will immediately complain when it does
--// not find the expected native instruction at this offset, which needs updating.
--// Note that this offset is invariant of PreserveFramePointer.
--
--// see BarrierSetAssembler::nmethod_entry_barrier
--// auipc + lwu + fence + lwu + beq + movptr_with_offset(5 instructions) + jalr + j + int32
--static const int entry_barrier_offset = -4 * 13;
--
--static NativeNMethodBarrier* native_nmethod_barrier(nmethod* nm) {
--  address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset;
--  NativeNMethodBarrier* barrier = reinterpret_cast<NativeNMethodBarrier*>(barrier_address);
--  debug_only(barrier->verify());
--  return barrier;
--}
--
--void BarrierSetNMethod::disarm(nmethod* nm) {
--  if (!supports_entry_barrier(nm)) {
--    return;
--  }
--
--  // Disarms the nmethod guard emitted by BarrierSetAssembler::nmethod_entry_barrier.
--  NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
--
--  barrier->set_value(disarmed_value());
--}
--
--bool BarrierSetNMethod::is_armed(nmethod* nm) {
--  if (!supports_entry_barrier(nm)) {
--    return false;
--  }
--
--  NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
--  return barrier->get_value() != disarmed_value();
--}
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index 41a415ef2cf..a75bd9dfa89 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -1638,10 +1638,10 @@ void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp,
-   beq(trial_klass, tmp, L);
- }
- 
--// Move an oop into a register. immediate is true if we want
--// immediate instructions and nmethod entry barriers are not enabled.
--// i.e. we are not going to patch this instruction while the code is being
--// executed by another thread.
-+// Move an oop into a register.  immediate is true if we want
-+// immediate instructions, i.e. we are not going to patch this
-+// instruction while the code is being executed by another thread.  In
-+// that case we can use move immediates rather than the constant pool.
- void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
-   int oop_index;
-   if (obj == NULL) {
-@@ -1656,11 +1656,7 @@ void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
-     oop_index = oop_recorder()->find_index(obj);
-   }
-   RelocationHolder rspec = oop_Relocation::spec(oop_index);
--
--  // nmethod entry barrier necessitate using the constant pool. They have to be
--  // ordered with respected to oop access.
--  // Using immediate literals would necessitate fence.i.
--  if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate) {
-+  if (!immediate) {
-     address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address
-     ld_constant(dst, Address(dummy, rspec));
-   } else
-@@ -1738,22 +1734,6 @@ void MacroAssembler::resolve_oop_handle(Register result, Register tmp) {
-   access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, noreg);
- }
- 
--// ((WeakHandle)result).resolve()
--void MacroAssembler::resolve_weak_handle(Register result, Register tmp) {
--  assert_different_registers(result, tmp);
--  Label resolved;
--
--  // A null weak handle resolves to null.
--  beqz(result, resolved);
--
--  // Only 64 bit platforms support GCs that require a tmp register
--  // Only IN_HEAP loads require a thread_tmp register
--  // WeakHandle::resolve is an indirection like jweak.
--  access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
--                 result, Address(result), tmp, noreg /* tmp_thread */);
--  bind(resolved);
--}
--
- void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
-                                     Register dst, Address src,
-                                     Register tmp1, Register thread_tmp) {
-@@ -3195,11 +3175,6 @@ void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) {
-   beq(src1, t0, equal);
- }
- 
--void MacroAssembler::load_method_holder_cld(Register result, Register method) {
--  load_method_holder(result, method);
--  ld(result, Address(result, InstanceKlass::class_loader_data_offset()));
--}
--
- void MacroAssembler::load_method_holder(Register holder, Register method) {
-   ld(holder, Address(method, Method::const_offset()));                      // ConstMethod*
-   ld(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index dd39f67d507..b16fe904888 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -207,7 +207,6 @@ class MacroAssembler: public Assembler {
-   virtual void check_and_handle_earlyret(Register java_thread);
-   virtual void check_and_handle_popframe(Register java_thread);
- 
--  void resolve_weak_handle(Register result, Register tmp);
-   void resolve_oop_handle(Register result, Register tmp = x15);
-   void resolve_jobject(Register value, Register thread, Register tmp);
- 
-@@ -673,7 +672,6 @@ class MacroAssembler: public Assembler {
-   void cmpptr(Register src1, Address src2, Label& equal);
- 
-   void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = NULL, Label* L_slow_path = NULL);
--  void load_method_holder_cld(Register result, Register method);
-   void load_method_holder(Register holder, Register method);
- 
-   void compute_index(Register str1, Register trailing_zeros, Register match_mask,
-diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
-index 228a64eae2c..047ea2276ca 100644
---- a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
-@@ -41,7 +41,6 @@ void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
-   switch (type()) {
-     case relocInfo::oop_type: {
-       oop_Relocation *reloc = (oop_Relocation *)this;
--      // in movoop when BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate
-       if (NativeInstruction::is_load_pc_relative_at(addr())) {
-         address constptr = (address)code()->oop_addr_at(reloc->oop_index());
-         bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr);
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 7ec76e72ff0..0a1838695e1 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1068,17 +1068,6 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
-   st->print("sd  ra, [sp, #%d]\n\t", - wordSize);
-   if (PreserveFramePointer) { st->print("sub  fp, sp, #%d\n\t", 2 * wordSize); }
-   st->print("sub sp, sp, #%d\n\t", framesize);
--
--  if (C->stub_function() == NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
--    st->print("ld  t0, [guard]\n\t");
--    st->print("membar LoadLoad\n\t");
--    st->print("ld  t1, [xthread, #thread_disarmed_offset]\n\t");
--    st->print("beq t0, t1, skip\n\t");
--    st->print("jalr #nmethod_entry_barrier_stub\n\t");
--    st->print("j skip\n\t");
--    st->print("guard: int\n\t");
--    st->print("skip:\n\t");
--  }
- }
- #endif
- 
-@@ -1114,11 +1103,6 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
- 
-   __ build_frame(framesize);
- 
--  if (C->stub_function() == NULL) {
--    BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
--    bs->nmethod_entry_barrier(&_masm);
--  }
--
-   if (VerifyStackAtCalls) {
-     Unimplemented();
-   }
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-index 5b934b04e8e..326ba62fcb0 100644
---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -642,9 +642,6 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
-     c2i_no_clinit_check_entry = __ pc();
-   }
- 
--  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
--  bs->c2i_entry_barrier(masm);
--
-   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
- 
-   __ flush();
-@@ -1290,10 +1287,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
-   // -2 because return address is already present and so is saved fp
-   __ sub(sp, sp, stack_size - 2 * wordSize);
- 
--  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
--  assert_cond(bs != NULL);
--  bs->nmethod_entry_barrier(masm);
--
-   // Frame is now completed as far as size and linkage.
-   int frame_complete = ((intptr_t)__ pc()) - start;
- 
-diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-index 0c5b0e001ee..74c38c3d044 100644
---- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-@@ -2352,50 +2352,6 @@ class StubGenerator: public StubCodeGenerator {
-     return entry;
-   }
- 
--  address generate_method_entry_barrier() {
--    __ align(CodeEntryAlignment);
--    StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
--
--    Label deoptimize_label;
--
--    address start = __ pc();
--
--    __ set_last_Java_frame(sp, fp, ra, t0);
--
--    __ enter();
--    __ add(t1, sp, wordSize);
--
--    __ sub(sp, sp, 4 * wordSize);
--
--    __ push_call_clobbered_registers();
--
--    __ mv(c_rarg0, t1);
--    __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetNMethod::nmethod_stub_entry_barrier), 1);
--
--    __ reset_last_Java_frame(true);
--
--    __ mv(t0, x10);
--
--    __ pop_call_clobbered_registers();
--
--    __ bnez(t0, deoptimize_label);
--
--    __ leave();
--    __ ret();
--
--    __ BIND(deoptimize_label);
--
--    __ ld(t0, Address(sp, 0));
--    __ ld(fp, Address(sp, wordSize));
--    __ ld(ra, Address(sp, wordSize * 2));
--    __ ld(t1, Address(sp, wordSize * 3));
--
--    __ mv(sp, t0);
--    __ jr(t1);
--
--    return start;
--  }
--
-   // x10  = result
-   // x11  = str1
-   // x12  = cnt1
-@@ -3703,11 +3659,6 @@ class StubGenerator: public StubCodeGenerator {
- 
-     generate_string_indexof_stubs();
- 
--    BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
--    if (bs_nm != NULL) {
--      StubRoutines::riscv::_method_entry_barrier = generate_method_entry_barrier();
--    }
--
-     StubRoutines::riscv::set_completed();
-   }
- 
-diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
-index 395a2d338e4..9202d9ec4b0 100644
---- a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
-@@ -53,6 +53,5 @@ address StubRoutines::riscv::_string_indexof_linear_ll = NULL;
- address StubRoutines::riscv::_string_indexof_linear_uu = NULL;
- address StubRoutines::riscv::_string_indexof_linear_ul = NULL;
- address StubRoutines::riscv::_large_byte_array_inflate = NULL;
--address StubRoutines::riscv::_method_entry_barrier = NULL;
- 
- bool StubRoutines::riscv::_completed = false;
-diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
-index 51f07819c33..0c9445e18a7 100644
---- a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
-@@ -67,8 +67,6 @@ class riscv {
-   static address _string_indexof_linear_ul;
-   static address _large_byte_array_inflate;
- 
--  static address _method_entry_barrier;
--
-   static bool _completed;
- 
-  public:
-@@ -145,10 +143,6 @@ class riscv {
-     return _large_byte_array_inflate;
-   }
- 
--  static address method_entry_barrier() {
--    return _method_entry_barrier;
--  }
--
-   static bool complete() {
-     return _completed;
-   }
-
-From aee31440dde84c54449b5c0dbdfb43b4d3826f5a Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sat, 29 Apr 2023 17:59:40 +0800
-Subject: [PATCH 088/140] Revert JDK-8223173: Implement fast class
- initialization checks on AARCH64 && JDK-8227260: JNI upcalls should bypass
- class initialization barrier in c2i adapter
-
----
- .../cpu/riscv/c1_LIRAssembler_riscv.cpp       | 12 -------
- .../cpu/riscv/c1_MacroAssembler_riscv.cpp     | 12 +++----
- src/hotspot/cpu/riscv/interp_masm_riscv.cpp   | 12 -------
- src/hotspot/cpu/riscv/interp_masm_riscv.hpp   |  2 --
- .../cpu/riscv/macroAssembler_riscv.cpp        | 36 -------------------
- .../cpu/riscv/macroAssembler_riscv.hpp        |  3 --
- src/hotspot/cpu/riscv/riscv.ad                | 11 ------
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 30 +---------------
- src/hotspot/cpu/riscv/templateTable_riscv.cpp | 17 +++------
- 9 files changed, 11 insertions(+), 124 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-index 49653d04d81..1e482d7cc2b 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -90,18 +90,6 @@ static void select_different_registers(Register preserve,
- 
- bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; }
- 
--void LIR_Assembler::clinit_barrier(ciMethod* method) {
--  assert(VM_Version::supports_fast_class_init_checks(), "sanity");
--  assert(!method->holder()->is_not_initialized(), "initialization should have been started");
--
--  Label L_skip_barrier;
--
--  __ mov_metadata(t1, method->holder()->constant_encoding());
--  __ clinit_barrier(t1, t0, &L_skip_barrier /* L_fast_path */);
--  __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
--  __ bind(L_skip_barrier);
--}
--
- LIR_Opr LIR_Assembler::receiverOpr() {
-   return FrameMap::receiver_opr;
- }
-diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-index a6d1b1470f9..99d981f97f4 100644
---- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-@@ -317,6 +317,12 @@ void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache, L
- }
- 
- void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) {
-+  // If we have to make this method not-entrant we'll overwrite its
-+  // first instruction with a jump. For this action to be legal we
-+  // must ensure that this first instruction is a J, JAL or NOP.
-+  // Make it a NOP.
-+  nop();
-+
-   assert(bang_size_in_bytes >= framesize, "stack bang size incorrect");
-   // Make sure there is enough stack space for this method's activation.
-   // Note that we do this before creating a frame.
-@@ -330,12 +336,6 @@ void C1_MacroAssembler::remove_frame(int framesize) {
- 
- 
- void C1_MacroAssembler::verified_entry() {
--  // If we have to make this method not-entrant we'll overwrite its
--  // first instruction with a jump. For this action to be legal we
--  // must ensure that this first instruction is a J, JAL or NOP.
--  // Make it a NOP.
--
--  nop();
- }
- 
- void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) {
-diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-index 1133e80a210..b50be7e726c 100644
---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-@@ -295,18 +295,6 @@ void InterpreterMacroAssembler::load_resolved_klass_at_offset(
-   ld(klass, Address(klass, Array<Klass*>::base_offset_in_bytes()));
- }
- 
--void InterpreterMacroAssembler::load_resolved_method_at_index(int byte_no,
--                                                              Register method,
--                                                              Register cache) {
--  const int method_offset = in_bytes(
--    ConstantPoolCache::base_offset() +
--      ((byte_no == TemplateTable::f2_byte)
--       ? ConstantPoolCacheEntry::f2_offset()
--       : ConstantPoolCacheEntry::f1_offset()));
--
--  ld(method, Address(cache, method_offset)); // get f1 Method*
--}
--
- // Generate a subtype check: branch to ok_is_subtype if sub_klass is a
- // subtype of super_klass.
- //
-diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
-index 4d8cb086f82..4126e8ee70f 100644
---- a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
-@@ -122,8 +122,6 @@ class InterpreterMacroAssembler: public MacroAssembler {
-   // Load cpool->resolved_klass_at(index).
-   void load_resolved_klass_at_offset(Register cpool, Register index, Register klass, Register temp);
- 
--  void load_resolved_method_at_index(int byte_no, Register method, Register cache);
--
-   void pop_ptr(Register r = x10);
-   void pop_i(Register r = x10);
-   void pop_l(Register r = x10);
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index a75bd9dfa89..304b6f2b06c 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -372,36 +372,6 @@ void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thr
-   sd(zr, Address(java_thread, JavaThread::vm_result_2_offset()));
- }
- 
--void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) {
--  assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required");
--  assert_different_registers(klass, xthread, tmp);
--
--  Label L_fallthrough, L_tmp;
--  if (L_fast_path == NULL) {
--    L_fast_path = &L_fallthrough;
--  } else if (L_slow_path == NULL) {
--    L_slow_path = &L_fallthrough;
--  }
--
--  // Fast path check: class is fully initialized
--  lbu(tmp, Address(klass, InstanceKlass::init_state_offset()));
--  sub(tmp, tmp, InstanceKlass::fully_initialized);
--  beqz(tmp, *L_fast_path);
--
--  // Fast path check: current thread is initializer thread
--  ld(tmp, Address(klass, InstanceKlass::init_thread_offset()));
--
--  if (L_slow_path == &L_fallthrough) {
--    beq(xthread, tmp, *L_fast_path);
--    bind(*L_slow_path);
--  } else if (L_fast_path == &L_fallthrough) {
--    bne(xthread, tmp, *L_slow_path);
--    bind(*L_fast_path);
--  } else {
--    Unimplemented();
--  }
--}
--
- void MacroAssembler::verify_oop(Register reg, const char* s) {
-   if (!VerifyOops) { return; }
- 
-@@ -3175,12 +3145,6 @@ void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) {
-   beq(src1, t0, equal);
- }
- 
--void MacroAssembler::load_method_holder(Register holder, Register method) {
--  ld(holder, Address(method, Method::const_offset()));                      // ConstMethod*
--  ld(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
--  ld(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
--}
--
- // string indexof
- // compute index by trailing zeros
- void MacroAssembler::compute_index(Register haystack, Register trailing_zeros,
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index b16fe904888..c6b71bdbc3c 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -671,9 +671,6 @@ class MacroAssembler: public Assembler {
- 
-   void cmpptr(Register src1, Address src2, Label& equal);
- 
--  void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = NULL, Label* L_slow_path = NULL);
--  void load_method_holder(Register holder, Register method);
--
-   void compute_index(Register str1, Register trailing_zeros, Register match_mask,
-                      Register result, Register char_tmp, Register tmp,
-                      bool haystack_isL);
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 0a1838695e1..13546ab328b 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1085,17 +1085,6 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
- 
-   assert_cond(C != NULL);
- 
--  if (C->clinit_barrier_on_entry()) {
--    assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
--
--    Label L_skip_barrier;
--
--    __ mov_metadata(t1, C->method()->holder()->constant_encoding());
--    __ clinit_barrier(t1, t0, &L_skip_barrier);
--    __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
--    __ bind(L_skip_barrier);
--  }
--
-   int bangsize = C->output()->bang_size_in_bytes();
-   if (C->output()->need_stack_bang(bangsize)) {
-     __ generate_stack_overflow_check(bangsize);
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-index 326ba62fcb0..ae414224c5b 100644
---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -623,29 +623,10 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
- 
-   address c2i_entry = __ pc();
- 
--  // Class initialization barrier for static methods
--  address c2i_no_clinit_check_entry = NULL;
--  if (VM_Version::supports_fast_class_init_checks()) {
--    Label L_skip_barrier;
--
--    { // Bypass the barrier for non-static methods
--      __ lwu(t0, Address(xmethod, Method::access_flags_offset()));
--      __ andi(t1, t0, JVM_ACC_STATIC);
--      __ beqz(t1, L_skip_barrier); // non-static
--    }
--
--    __ load_method_holder(t1, xmethod);
--    __ clinit_barrier(t1, t0, &L_skip_barrier);
--    __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
--
--    __ bind(L_skip_barrier);
--    c2i_no_clinit_check_entry = __ pc();
--  }
--
-   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
- 
-   __ flush();
--  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry);
-+  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
- }
- 
- int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
-@@ -1270,15 +1251,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
-   // first instruction with a jump.
-   __ nop();
- 
--  if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) {
--    Label L_skip_barrier;
--    __ mov_metadata(t1, method->method_holder()); // InstanceKlass*
--    __ clinit_barrier(t1, t0, &L_skip_barrier);
--    __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
--
--    __ bind(L_skip_barrier);
--  }
--
-   // Generate stack overflow check
-   __ bang_stack_with_offset(checked_cast<int>(StackOverflow::stack_shadow_zone_size()));
- 
-diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-index bb20f228447..1f4409a9c9a 100644
---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-@@ -2307,7 +2307,7 @@ void TemplateTable::resolve_cache_and_index(int byte_no,
-   const Register temp = x9;
-   assert_different_registers(Rcache, index, temp);
- 
--  Label resolved, clinit_barrier_slow;
-+  Label resolved;
- 
-   Bytecodes::Code code = bytecode();
-   switch (code) {
-@@ -2321,10 +2321,6 @@ void TemplateTable::resolve_cache_and_index(int byte_no,
-   __ mv(t0, (int) code);
-   __ beq(temp, t0, resolved);
- 
--  // resolve first time through
--  // Class initialization barrier slow path lands here as well.
--  __ bind(clinit_barrier_slow);
--
-   address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
-   __ mv(temp, (int) code);
-   __ call_VM(noreg, entry, temp);
-@@ -2334,13 +2330,6 @@ void TemplateTable::resolve_cache_and_index(int byte_no,
-   // n.b. unlike x86 Rcache is now rcpool plus the indexed offset
-   // so all clients ofthis method must be modified accordingly
-   __ bind(resolved);
--
--  // Class initialization barrier for static methods
--  if (VM_Version::supports_fast_class_init_checks() && bytecode() == Bytecodes::_invokestatic) {
--    __ load_resolved_method_at_index(byte_no, temp, Rcache);
--    __ load_method_holder(temp, temp);
--    __ clinit_barrier(temp, t0, NULL, &clinit_barrier_slow);
--  }
- }
- 
- // The Rcache and index registers must be set before call
-@@ -3431,7 +3420,9 @@ void TemplateTable::invokeinterface(int byte_no) {
-   __ profile_virtual_call(x13, x30, x9);
- 
-   // Get declaring interface class from method, and itable index
--  __ load_method_holder(x10, xmethod);
-+  __ ld(x10, Address(xmethod, Method::const_offset()));
-+  __ ld(x10, Address(x10, ConstMethod::constants_offset()));
-+  __ ld(x10, Address(x10, ConstantPool::pool_holder_offset_in_bytes()));
-   __ lwu(xmethod, Address(xmethod, Method::itable_index_offset()));
-   __ subw(xmethod, xmethod, Method::itable_index_max);
-   __ negw(xmethod, xmethod);
-
-From c259a42eac0a11e080d28dabe7f745ee79a53663 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sat, 29 Apr 2023 18:36:13 +0800
-Subject: [PATCH 089/140] Revert JDK-8268119: Rename copy_os_cpu.inline.hpp
- files to copy_os_cpu.hpp && JDK-8142362: Lots of code duplication in Copy
- class
-
----
- src/hotspot/cpu/riscv/copy_riscv.hpp          |  85 +-----------
- .../os_cpu/linux_riscv/copy_linux_riscv.hpp   |  31 -----
- .../linux_riscv/copy_linux_riscv.inline.hpp   | 124 ++++++++++++++++++
- 3 files changed, 128 insertions(+), 112 deletions(-)
- delete mode 100644 src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp
-
-diff --git a/src/hotspot/cpu/riscv/copy_riscv.hpp b/src/hotspot/cpu/riscv/copy_riscv.hpp
-index bceadcc5dcc..05da242e354 100644
---- a/src/hotspot/cpu/riscv/copy_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/copy_riscv.hpp
-@@ -27,7 +27,10 @@
- #ifndef CPU_RISCV_COPY_RISCV_HPP
- #define CPU_RISCV_COPY_RISCV_HPP
- 
--#include OS_CPU_HEADER(copy)
-+// Inline functions for memory copy and fill.
-+
-+// Contains inline asm implementations
-+#include OS_CPU_HEADER_INLINE(copy)
- 
- static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
-   julong* to = (julong*) tohw;
-@@ -53,84 +56,4 @@ static void pd_zero_to_bytes(void* to, size_t count) {
-   (void)memset(to, 0, count);
- }
- 
--static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
--  (void)memmove(to, from, count * HeapWordSize);
--}
--
--static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
--  switch (count) {
--    case 8:  to[7] = from[7];   // fall through
--    case 7:  to[6] = from[6];   // fall through
--    case 6:  to[5] = from[5];   // fall through
--    case 5:  to[4] = from[4];   // fall through
--    case 4:  to[3] = from[3];   // fall through
--    case 3:  to[2] = from[2];   // fall through
--    case 2:  to[1] = from[1];   // fall through
--    case 1:  to[0] = from[0];   // fall through
--    case 0:  break;
--    default:
--      memcpy(to, from, count * HeapWordSize);
--      break;
--  }
--}
--
--static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
--  shared_disjoint_words_atomic(from, to, count);
--}
--
--static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
--  pd_conjoint_words(from, to, count);
--}
--
--static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
--  pd_disjoint_words(from, to, count);
--}
--
--static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
--  (void)memmove(to, from, count);
--}
--
--static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
--  pd_conjoint_bytes(from, to, count);
--}
--
--static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
--  _Copy_conjoint_jshorts_atomic(from, to, count);
--}
--
--static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
--  _Copy_conjoint_jints_atomic(from, to, count);
--}
--
--static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
--  _Copy_conjoint_jlongs_atomic(from, to, count);
--}
--
--static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
--  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size.");
--  _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count);
--}
--
--static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
--  _Copy_arrayof_conjoint_bytes(from, to, count);
--}
--
--static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
--  _Copy_arrayof_conjoint_jshorts(from, to, count);
--}
--
--static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
--  _Copy_arrayof_conjoint_jints(from, to, count);
--}
--
--static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
--  _Copy_arrayof_conjoint_jlongs(from, to, count);
--}
--
--static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
--  assert(!UseCompressedOops, "foo!");
--  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
--  _Copy_arrayof_conjoint_jlongs(from, to, count);
--}
--
- #endif // CPU_RISCV_COPY_RISCV_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp
-deleted file mode 100644
-index 147cfdf3c10..00000000000
---- a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp
-+++ /dev/null
-@@ -1,31 +0,0 @@
--/*
-- * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP
--#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP
--
--// Empty for build system
--
--#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp
-new file mode 100644
-index 00000000000..bdf36d6b4c3
---- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp
-@@ -0,0 +1,124 @@
-+/*
-+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP
-+#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP
-+
-+static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
-+  (void)memmove(to, from, count * HeapWordSize);
-+}
-+
-+static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
-+  switch (count) {
-+    case 8:  to[7] = from[7];   // fall through
-+    case 7:  to[6] = from[6];   // fall through
-+    case 6:  to[5] = from[5];   // fall through
-+    case 5:  to[4] = from[4];   // fall through
-+    case 4:  to[3] = from[3];   // fall through
-+    case 3:  to[2] = from[2];   // fall through
-+    case 2:  to[1] = from[1];   // fall through
-+    case 1:  to[0] = from[0];   // fall through
-+    case 0:  break;
-+    default:
-+      memcpy(to, from, count * HeapWordSize);
-+      break;
-+  }
-+}
-+
-+static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
-+  switch (count) {
-+    case 8:  to[7] = from[7];
-+    case 7:  to[6] = from[6];
-+    case 6:  to[5] = from[5];
-+    case 5:  to[4] = from[4];
-+    case 4:  to[3] = from[3];
-+    case 3:  to[2] = from[2];
-+    case 2:  to[1] = from[1];
-+    case 1:  to[0] = from[0];
-+    case 0:  break;
-+    default:
-+      while (count-- > 0) {
-+        *to++ = *from++;
-+      }
-+      break;
-+  }
-+}
-+
-+static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
-+  pd_conjoint_words(from, to, count);
-+}
-+
-+static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
-+  pd_disjoint_words(from, to, count);
-+}
-+
-+static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
-+  (void)memmove(to, from, count);
-+}
-+
-+static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
-+  pd_conjoint_bytes(from, to, count);
-+}
-+
-+static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
-+  _Copy_conjoint_jshorts_atomic(from, to, count);
-+}
-+
-+static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
-+  _Copy_conjoint_jints_atomic(from, to, count);
-+}
-+
-+static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
-+  _Copy_conjoint_jlongs_atomic(from, to, count);
-+}
-+
-+static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
-+  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size.");
-+  _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count);
-+}
-+
-+static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
-+  _Copy_arrayof_conjoint_bytes(from, to, count);
-+}
-+
-+static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
-+  _Copy_arrayof_conjoint_jshorts(from, to, count);
-+}
-+
-+static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
-+  _Copy_arrayof_conjoint_jints(from, to, count);
-+}
-+
-+static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
-+  _Copy_arrayof_conjoint_jlongs(from, to, count);
-+}
-+
-+static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
-+  assert(!UseCompressedOops, "foo!");
-+  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
-+  _Copy_arrayof_conjoint_jlongs(from, to, count);
-+}
-+
-+#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP
-
-From 6033e30ebd94f2315bf809a42ef00c85bdbc780e Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sat, 29 Apr 2023 19:33:21 +0800
-Subject: [PATCH 090/140] Revert JDK-8241436: C2: Factor out C2-specific code
- from MacroAssembler
-
----
- .../cpu/riscv/c2_MacroAssembler_riscv.cpp     | 1321 -----------------
- .../cpu/riscv/c2_MacroAssembler_riscv.hpp     |  141 --
- .../cpu/riscv/macroAssembler_riscv.cpp        | 1282 ++++++++++++++++
- .../cpu/riscv/macroAssembler_riscv.hpp        |  103 ++
- src/hotspot/cpu/riscv/riscv.ad                |  124 +-
- 5 files changed, 1447 insertions(+), 1524 deletions(-)
- delete mode 100644 src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
- delete mode 100644 src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
-
-diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
-deleted file mode 100644
-index 73f84a724ca..00000000000
---- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
-+++ /dev/null
-@@ -1,1321 +0,0 @@
--/*
-- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#include "precompiled.hpp"
--#include "asm/assembler.hpp"
--#include "asm/assembler.inline.hpp"
--#include "opto/c2_MacroAssembler.hpp"
--#include "opto/intrinsicnode.hpp"
--#include "opto/subnode.hpp"
--#include "runtime/stubRoutines.hpp"
--
--#ifdef PRODUCT
--#define BLOCK_COMMENT(str) /* nothing */
--#define STOP(error) stop(error)
--#else
--#define BLOCK_COMMENT(str) block_comment(str)
--#define STOP(error) block_comment(error); stop(error)
--#endif
--
--#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
--
--// short string
--// StringUTF16.indexOfChar
--// StringLatin1.indexOfChar
--void C2_MacroAssembler::string_indexof_char_short(Register str1, Register cnt1,
--                                                  Register ch, Register result,
--                                                  bool isL)
--{
--  Register ch1 = t0;
--  Register index = t1;
--
--  BLOCK_COMMENT("string_indexof_char_short {");
--
--  Label LOOP, LOOP1, LOOP4, LOOP8;
--  Label MATCH,  MATCH1, MATCH2, MATCH3,
--        MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH;
--
--  mv(result, -1);
--  mv(index, zr);
--
--  bind(LOOP);
--  addi(t0, index, 8);
--  ble(t0, cnt1, LOOP8);
--  addi(t0, index, 4);
--  ble(t0, cnt1, LOOP4);
--  j(LOOP1);
--
--  bind(LOOP8);
--  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
--  beq(ch, ch1, MATCH);
--  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
--  beq(ch, ch1, MATCH1);
--  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
--  beq(ch, ch1, MATCH2);
--  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
--  beq(ch, ch1, MATCH3);
--  isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8));
--  beq(ch, ch1, MATCH4);
--  isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10));
--  beq(ch, ch1, MATCH5);
--  isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12));
--  beq(ch, ch1, MATCH6);
--  isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14));
--  beq(ch, ch1, MATCH7);
--  addi(index, index, 8);
--  addi(str1, str1, isL ? 8 : 16);
--  blt(index, cnt1, LOOP);
--  j(NOMATCH);
--
--  bind(LOOP4);
--  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
--  beq(ch, ch1, MATCH);
--  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
--  beq(ch, ch1, MATCH1);
--  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
--  beq(ch, ch1, MATCH2);
--  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
--  beq(ch, ch1, MATCH3);
--  addi(index, index, 4);
--  addi(str1, str1, isL ? 4 : 8);
--  bge(index, cnt1, NOMATCH);
--
--  bind(LOOP1);
--  isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1));
--  beq(ch, ch1, MATCH);
--  addi(index, index, 1);
--  addi(str1, str1, isL ? 1 : 2);
--  blt(index, cnt1, LOOP1);
--  j(NOMATCH);
--
--  bind(MATCH1);
--  addi(index, index, 1);
--  j(MATCH);
--
--  bind(MATCH2);
--  addi(index, index, 2);
--  j(MATCH);
--
--  bind(MATCH3);
--  addi(index, index, 3);
--  j(MATCH);
--
--  bind(MATCH4);
--  addi(index, index, 4);
--  j(MATCH);
--
--  bind(MATCH5);
--  addi(index, index, 5);
--  j(MATCH);
--
--  bind(MATCH6);
--  addi(index, index, 6);
--  j(MATCH);
--
--  bind(MATCH7);
--  addi(index, index, 7);
--
--  bind(MATCH);
--  mv(result, index);
--  bind(NOMATCH);
--  BLOCK_COMMENT("} string_indexof_char_short");
--}
--
--// StringUTF16.indexOfChar
--// StringLatin1.indexOfChar
--void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1,
--                                            Register ch, Register result,
--                                            Register tmp1, Register tmp2,
--                                            Register tmp3, Register tmp4,
--                                            bool isL)
--{
--  Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG;
--  Register ch1 = t0;
--  Register orig_cnt = t1;
--  Register mask1 = tmp3;
--  Register mask2 = tmp2;
--  Register match_mask = tmp1;
--  Register trailing_char = tmp4;
--  Register unaligned_elems = tmp4;
--
--  BLOCK_COMMENT("string_indexof_char {");
--  beqz(cnt1, NOMATCH);
--
--  addi(t0, cnt1, isL ? -32 : -16);
--  bgtz(t0, DO_LONG);
--  string_indexof_char_short(str1, cnt1, ch, result, isL);
--  j(DONE);
--
--  bind(DO_LONG);
--  mv(orig_cnt, cnt1);
--  if (AvoidUnalignedAccesses) {
--    Label ALIGNED;
--    andi(unaligned_elems, str1, 0x7);
--    beqz(unaligned_elems, ALIGNED);
--    sub(unaligned_elems, unaligned_elems, 8);
--    neg(unaligned_elems, unaligned_elems);
--    if (!isL) {
--      srli(unaligned_elems, unaligned_elems, 1);
--    }
--    // do unaligned part per element
--    string_indexof_char_short(str1, unaligned_elems, ch, result, isL);
--    bgez(result, DONE);
--    mv(orig_cnt, cnt1);
--    sub(cnt1, cnt1, unaligned_elems);
--    bind(ALIGNED);
--  }
--
--  // duplicate ch
--  if (isL) {
--    slli(ch1, ch, 8);
--    orr(ch, ch1, ch);
--  }
--  slli(ch1, ch, 16);
--  orr(ch, ch1, ch);
--  slli(ch1, ch, 32);
--  orr(ch, ch1, ch);
--
--  if (!isL) {
--    slli(cnt1, cnt1, 1);
--  }
--
--  uint64_t mask0101 = UCONST64(0x0101010101010101);
--  uint64_t mask0001 = UCONST64(0x0001000100010001);
--  mv(mask1, isL ? mask0101 : mask0001);
--  uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f);
--  uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff);
--  mv(mask2, isL ? mask7f7f : mask7fff);
--
--  bind(CH1_LOOP);
--  ld(ch1, Address(str1));
--  addi(str1, str1, 8);
--  addi(cnt1, cnt1, -8);
--  compute_match_mask(ch1, ch, match_mask, mask1, mask2);
--  bnez(match_mask, HIT);
--  bgtz(cnt1, CH1_LOOP);
--  j(NOMATCH);
--
--  bind(HIT);
--  ctzc_bit(trailing_char, match_mask, isL, ch1, result);
--  srli(trailing_char, trailing_char, 3);
--  addi(cnt1, cnt1, 8);
--  ble(cnt1, trailing_char, NOMATCH);
--  // match case
--  if (!isL) {
--    srli(cnt1, cnt1, 1);
--    srli(trailing_char, trailing_char, 1);
--  }
--
--  sub(result, orig_cnt, cnt1);
--  add(result, result, trailing_char);
--  j(DONE);
--
--  bind(NOMATCH);
--  mv(result, -1);
--
--  bind(DONE);
--  BLOCK_COMMENT("} string_indexof_char");
--}
--
--typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp);
--
--// Search for needle in haystack and return index or -1
--// x10: result
--// x11: haystack
--// x12: haystack_len
--// x13: needle
--// x14: needle_len
--void C2_MacroAssembler::string_indexof(Register haystack, Register needle,
--                                       Register haystack_len, Register needle_len,
--                                       Register tmp1, Register tmp2,
--                                       Register tmp3, Register tmp4,
--                                       Register tmp5, Register tmp6,
--                                       Register result, int ae)
--{
--  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
--
--  Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH;
--
--  Register ch1 = t0;
--  Register ch2 = t1;
--  Register nlen_tmp = tmp1; // needle len tmp
--  Register hlen_tmp = tmp2; // haystack len tmp
--  Register result_tmp = tmp4;
--
--  bool isLL = ae == StrIntrinsicNode::LL;
--
--  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
--  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
--  int needle_chr_shift = needle_isL ? 0 : 1;
--  int haystack_chr_shift = haystack_isL ? 0 : 1;
--  int needle_chr_size = needle_isL ? 1 : 2;
--  int haystack_chr_size = haystack_isL ? 1 : 2;
--  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
--                              (load_chr_insn)&MacroAssembler::lhu;
--  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
--                                (load_chr_insn)&MacroAssembler::lhu;
--
--  BLOCK_COMMENT("string_indexof {");
--
--  // Note, inline_string_indexOf() generates checks:
--  // if (pattern.count > src.count) return -1;
--  // if (pattern.count == 0) return 0;
--
--  // We have two strings, a source string in haystack, haystack_len and a pattern string
--  // in needle, needle_len. Find the first occurence of pattern in source or return -1.
--
--  // For larger pattern and source we use a simplified Boyer Moore algorithm.
--  // With a small pattern and source we use linear scan.
--
--  // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm.
--  sub(result_tmp, haystack_len, needle_len);
--  // needle_len < 8, use linear scan
--  sub(t0, needle_len, 8);
--  bltz(t0, LINEARSEARCH);
--  // needle_len >= 256, use linear scan
--  sub(t0, needle_len, 256);
--  bgez(t0, LINEARSTUB);
--  // needle_len >= haystack_len/4, use linear scan
--  srli(t0, haystack_len, 2);
--  bge(needle_len, t0, LINEARSTUB);
--
--  // Boyer-Moore-Horspool introduction:
--  // The Boyer Moore alogorithm is based on the description here:-
--  //
--  // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
--  //
--  // This describes and algorithm with 2 shift rules. The 'Bad Character' rule
--  // and the 'Good Suffix' rule.
--  //
--  // These rules are essentially heuristics for how far we can shift the
--  // pattern along the search string.
--  //
--  // The implementation here uses the 'Bad Character' rule only because of the
--  // complexity of initialisation for the 'Good Suffix' rule.
--  //
--  // This is also known as the Boyer-Moore-Horspool algorithm:
--  //
--  // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
--  //
--  // #define ASIZE 256
--  //
--  //    int bm(unsigned char *pattern, int m, unsigned char *src, int n) {
--  //      int i, j;
--  //      unsigned c;
--  //      unsigned char bc[ASIZE];
--  //
--  //      /* Preprocessing */
--  //      for (i = 0; i < ASIZE; ++i)
--  //        bc[i] = m;
--  //      for (i = 0; i < m - 1; ) {
--  //        c = pattern[i];
--  //        ++i;
--  //        // c < 256 for Latin1 string, so, no need for branch
--  //        #ifdef PATTERN_STRING_IS_LATIN1
--  //        bc[c] = m - i;
--  //        #else
--  //        if (c < ASIZE) bc[c] = m - i;
--  //        #endif
--  //      }
--  //
--  //      /* Searching */
--  //      j = 0;
--  //      while (j <= n - m) {
--  //        c = src[i+j];
--  //        if (pattern[m-1] == c)
--  //          int k;
--  //          for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
--  //          if (k < 0) return j;
--  //          // c < 256 for Latin1 string, so, no need for branch
--  //          #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1
--  //          // LL case: (c< 256) always true. Remove branch
--  //          j += bc[pattern[j+m-1]];
--  //          #endif
--  //          #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF
--  //          // UU case: need if (c<ASIZE) check. Skip 1 character if not.
--  //          if (c < ASIZE)
--  //            j += bc[pattern[j+m-1]];
--  //          else
--  //            j += 1
--  //          #endif
--  //          #ifdef SOURCE_IS_UTF_AND_PATTERN_IS_LATIN1
--  //          // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
--  //          if (c < ASIZE)
--  //            j += bc[pattern[j+m-1]];
--  //          else
--  //            j += m
--  //          #endif
--  //      }
--  //      return -1;
--  //    }
--
--  // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result
--  Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
--        BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;
--
--  Register haystack_end = haystack_len;
--  Register skipch = tmp2;
--
--  // pattern length is >=8, so, we can read at least 1 register for cases when
--  // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
--  // UL case. We'll re-read last character in inner pre-loop code to have
--  // single outer pre-loop load
--  const int firstStep = isLL ? 7 : 3;
--
--  const int ASIZE = 256;
--  const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd)
--
--  sub(sp, sp, ASIZE);
--
--  // init BC offset table with default value: needle_len
--  slli(t0, needle_len, 8);
--  orr(t0, t0, needle_len); // [63...16][needle_len][needle_len]
--  slli(tmp1, t0, 16);
--  orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len]
--  slli(tmp1, t0, 32);
--  orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len]
--
--  mv(ch1, sp);  // ch1 is t0
--  mv(tmp6, ASIZE / STORE_BYTES); // loop iterations
--
--  bind(BM_INIT_LOOP);
--  // for (i = 0; i < ASIZE; ++i)
--  //   bc[i] = m;
--  for (int i = 0; i < 4; i++) {
--    sd(tmp5, Address(ch1, i * wordSize));
--  }
--  add(ch1, ch1, 32);
--  sub(tmp6, tmp6, 4);
--  bgtz(tmp6, BM_INIT_LOOP);
--
--  sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern
--  Register orig_haystack = tmp5;
--  mv(orig_haystack, haystack);
--  // result_tmp = tmp4
--  shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift);
--  sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1
--  mv(tmp3, needle);
--
--  //  for (i = 0; i < m - 1; ) {
--  //    c = pattern[i];
--  //    ++i;
--  //    // c < 256 for Latin1 string, so, no need for branch
--  //    #ifdef PATTERN_STRING_IS_LATIN1
--  //    bc[c] = m - i;
--  //    #else
--  //    if (c < ASIZE) bc[c] = m - i;
--  //    #endif
--  //  }
--  bind(BCLOOP);
--  (this->*needle_load_1chr)(ch1, Address(tmp3), noreg);
--  add(tmp3, tmp3, needle_chr_size);
--  if (!needle_isL) {
--    // ae == StrIntrinsicNode::UU
--    mv(tmp6, ASIZE);
--    bgeu(ch1, tmp6, BCSKIP);
--  }
--  add(tmp4, sp, ch1);
--  sb(ch2, Address(tmp4)); // store skip offset to BC offset table
--
--  bind(BCSKIP);
--  sub(ch2, ch2, 1); // for next pattern element, skip distance -1
--  bgtz(ch2, BCLOOP);
--
--  // tmp6: pattern end, address after needle
--  shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift);
--  if (needle_isL == haystack_isL) {
--    // load last 8 bytes (8LL/4UU symbols)
--    ld(tmp6, Address(tmp6, -wordSize));
--  } else {
--    // UL: from UTF-16(source) search Latin1(pattern)
--    lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols)
--    // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d
--    // We'll have to wait until load completed, but it's still faster than per-character loads+checks
--    srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a
--    slli(ch2, tmp6, XLEN - 24);
--    srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b
--    slli(ch1, tmp6, XLEN - 16);
--    srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c
--    andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d
--    slli(ch2, ch2, 16);
--    orr(ch2, ch2, ch1); // 0x00000b0c
--    slli(result, tmp3, 48); // use result as temp register
--    orr(tmp6, tmp6, result); // 0x0a00000d
--    slli(result, ch2, 16);
--    orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d
--  }
--
--  // i = m - 1;
--  // skipch = j + i;
--  // if (skipch == pattern[m - 1]
--  //   for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
--  // else
--  //   move j with bad char offset table
--  bind(BMLOOPSTR2);
--  // compare pattern to source string backward
--  shadd(result, nlen_tmp, haystack, result, haystack_chr_shift);
--  (this->*haystack_load_1chr)(skipch, Address(result), noreg);
--  sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8
--  if (needle_isL == haystack_isL) {
--    // re-init tmp3. It's for free because it's executed in parallel with
--    // load above. Alternative is to initialize it before loop, but it'll
--    // affect performance on in-order systems with 2 or more ld/st pipelines
--    srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1]
--  }
--  if (!isLL) { // UU/UL case
--    slli(ch2, nlen_tmp, 1); // offsets in bytes
--  }
--  bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char
--  add(result, haystack, isLL ? nlen_tmp : ch2);
--  ld(ch2, Address(result)); // load 8 bytes from source string
--  mv(ch1, tmp6);
--  if (isLL) {
--    j(BMLOOPSTR1_AFTER_LOAD);
--  } else {
--    sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
--    j(BMLOOPSTR1_CMP);
--  }
--
--  bind(BMLOOPSTR1);
--  shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift);
--  (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
--  shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift);
--  (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
--
--  bind(BMLOOPSTR1_AFTER_LOAD);
--  sub(nlen_tmp, nlen_tmp, 1);
--  bltz(nlen_tmp, BMLOOPSTR1_LASTCMP);
--
--  bind(BMLOOPSTR1_CMP);
--  beq(ch1, ch2, BMLOOPSTR1);
--
--  bind(BMSKIP);
--  if (!isLL) {
--    // if we've met UTF symbol while searching Latin1 pattern, then we can
--    // skip needle_len symbols
--    if (needle_isL != haystack_isL) {
--      mv(result_tmp, needle_len);
--    } else {
--      mv(result_tmp, 1);
--    }
--    mv(t0, ASIZE);
--    bgeu(skipch, t0, BMADV);
--  }
--  add(result_tmp, sp, skipch);
--  lbu(result_tmp, Address(result_tmp)); // load skip offset
--
--  bind(BMADV);
--  sub(nlen_tmp, needle_len, 1);
--  // move haystack after bad char skip offset
--  shadd(haystack, result_tmp, haystack, result, haystack_chr_shift);
--  ble(haystack, haystack_end, BMLOOPSTR2);
--  add(sp, sp, ASIZE);
--  j(NOMATCH);
--
--  bind(BMLOOPSTR1_LASTCMP);
--  bne(ch1, ch2, BMSKIP);
--
--  bind(BMMATCH);
--  sub(result, haystack, orig_haystack);
--  if (!haystack_isL) {
--    srli(result, result, 1);
--  }
--  add(sp, sp, ASIZE);
--  j(DONE);
--
--  bind(LINEARSTUB);
--  sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm
--  bltz(t0, LINEARSEARCH);
--  mv(result, zr);
--  RuntimeAddress stub = NULL;
--  if (isLL) {
--    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll());
--    assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
--  } else if (needle_isL) {
--    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul());
--    assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
--  } else {
--    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu());
--    assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
--  }
--  trampoline_call(stub);
--  j(DONE);
--
--  bind(NOMATCH);
--  mv(result, -1);
--  j(DONE);
--
--  bind(LINEARSEARCH);
--  string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae);
--
--  bind(DONE);
--  BLOCK_COMMENT("} string_indexof");
--}
--
--// string_indexof
--// result: x10
--// src: x11
--// src_count: x12
--// pattern: x13
--// pattern_count: x14 or 1/2/3/4
--void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register needle,
--                                               Register haystack_len, Register needle_len,
--                                               Register tmp1, Register tmp2,
--                                               Register tmp3, Register tmp4,
--                                               int needle_con_cnt, Register result, int ae)
--{
--  // Note:
--  // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant
--  // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1
--  assert(needle_con_cnt <= 4, "Invalid needle constant count");
--  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
--
--  Register ch1 = t0;
--  Register ch2 = t1;
--  Register hlen_neg = haystack_len, nlen_neg = needle_len;
--  Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4;
--
--  bool isLL = ae == StrIntrinsicNode::LL;
--
--  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
--  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
--  int needle_chr_shift = needle_isL ? 0 : 1;
--  int haystack_chr_shift = haystack_isL ? 0 : 1;
--  int needle_chr_size = needle_isL ? 1 : 2;
--  int haystack_chr_size = haystack_isL ? 1 : 2;
--
--  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
--                              (load_chr_insn)&MacroAssembler::lhu;
--  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
--                                (load_chr_insn)&MacroAssembler::lhu;
--  load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu;
--  load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld;
--
--  Label DO1, DO2, DO3, MATCH, NOMATCH, DONE;
--
--  Register first = tmp3;
--
--  if (needle_con_cnt == -1) {
--    Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
--
--    sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2);
--    bltz(t0, DOSHORT);
--
--    (this->*needle_load_1chr)(first, Address(needle), noreg);
--    slli(t0, needle_len, needle_chr_shift);
--    add(needle, needle, t0);
--    neg(nlen_neg, t0);
--    slli(t0, result_tmp, haystack_chr_shift);
--    add(haystack, haystack, t0);
--    neg(hlen_neg, t0);
--
--    bind(FIRST_LOOP);
--    add(t0, haystack, hlen_neg);
--    (this->*haystack_load_1chr)(ch2, Address(t0), noreg);
--    beq(first, ch2, STR1_LOOP);
--
--    bind(STR2_NEXT);
--    add(hlen_neg, hlen_neg, haystack_chr_size);
--    blez(hlen_neg, FIRST_LOOP);
--    j(NOMATCH);
--
--    bind(STR1_LOOP);
--    add(nlen_tmp, nlen_neg, needle_chr_size);
--    add(hlen_tmp, hlen_neg, haystack_chr_size);
--    bgez(nlen_tmp, MATCH);
--
--    bind(STR1_NEXT);
--    add(ch1, needle, nlen_tmp);
--    (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
--    add(ch2, haystack, hlen_tmp);
--    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
--    bne(ch1, ch2, STR2_NEXT);
--    add(nlen_tmp, nlen_tmp, needle_chr_size);
--    add(hlen_tmp, hlen_tmp, haystack_chr_size);
--    bltz(nlen_tmp, STR1_NEXT);
--    j(MATCH);
--
--    bind(DOSHORT);
--    if (needle_isL == haystack_isL) {
--      sub(t0, needle_len, 2);
--      bltz(t0, DO1);
--      bgtz(t0, DO3);
--    }
--  }
--
--  if (needle_con_cnt == 4) {
--    Label CH1_LOOP;
--    (this->*load_4chr)(ch1, Address(needle), noreg);
--    sub(result_tmp, haystack_len, 4);
--    slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp
--    add(haystack, haystack, tmp3);
--    neg(hlen_neg, tmp3);
--
--    bind(CH1_LOOP);
--    add(ch2, haystack, hlen_neg);
--    (this->*load_4chr)(ch2, Address(ch2), noreg);
--    beq(ch1, ch2, MATCH);
--    add(hlen_neg, hlen_neg, haystack_chr_size);
--    blez(hlen_neg, CH1_LOOP);
--    j(NOMATCH);
--  }
--
--  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) {
--    Label CH1_LOOP;
--    BLOCK_COMMENT("string_indexof DO2 {");
--    bind(DO2);
--    (this->*load_2chr)(ch1, Address(needle), noreg);
--    if (needle_con_cnt == 2) {
--      sub(result_tmp, haystack_len, 2);
--    }
--    slli(tmp3, result_tmp, haystack_chr_shift);
--    add(haystack, haystack, tmp3);
--    neg(hlen_neg, tmp3);
--
--    bind(CH1_LOOP);
--    add(tmp3, haystack, hlen_neg);
--    (this->*load_2chr)(ch2, Address(tmp3), noreg);
--    beq(ch1, ch2, MATCH);
--    add(hlen_neg, hlen_neg, haystack_chr_size);
--    blez(hlen_neg, CH1_LOOP);
--    j(NOMATCH);
--    BLOCK_COMMENT("} string_indexof DO2");
--  }
--
--  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) {
--    Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
--    BLOCK_COMMENT("string_indexof DO3 {");
--
--    bind(DO3);
--    (this->*load_2chr)(first, Address(needle), noreg);
--    (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg);
--    if (needle_con_cnt == 3) {
--      sub(result_tmp, haystack_len, 3);
--    }
--    slli(hlen_tmp, result_tmp, haystack_chr_shift);
--    add(haystack, haystack, hlen_tmp);
--    neg(hlen_neg, hlen_tmp);
--
--    bind(FIRST_LOOP);
--    add(ch2, haystack, hlen_neg);
--    (this->*load_2chr)(ch2, Address(ch2), noreg);
--    beq(first, ch2, STR1_LOOP);
--
--    bind(STR2_NEXT);
--    add(hlen_neg, hlen_neg, haystack_chr_size);
--    blez(hlen_neg, FIRST_LOOP);
--    j(NOMATCH);
--
--    bind(STR1_LOOP);
--    add(hlen_tmp, hlen_neg, 2 * haystack_chr_size);
--    add(ch2, haystack, hlen_tmp);
--    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
--    bne(ch1, ch2, STR2_NEXT);
--    j(MATCH);
--    BLOCK_COMMENT("} string_indexof DO3");
--  }
--
--  if (needle_con_cnt == -1 || needle_con_cnt == 1) {
--    Label DO1_LOOP;
--
--    BLOCK_COMMENT("string_indexof DO1 {");
--    bind(DO1);
--    (this->*needle_load_1chr)(ch1, Address(needle), noreg);
--    sub(result_tmp, haystack_len, 1);
--    mv(tmp3, result_tmp);
--    if (haystack_chr_shift) {
--      slli(tmp3, result_tmp, haystack_chr_shift);
--    }
--    add(haystack, haystack, tmp3);
--    neg(hlen_neg, tmp3);
--
--    bind(DO1_LOOP);
--    add(tmp3, haystack, hlen_neg);
--    (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg);
--    beq(ch1, ch2, MATCH);
--    add(hlen_neg, hlen_neg, haystack_chr_size);
--    blez(hlen_neg, DO1_LOOP);
--    BLOCK_COMMENT("} string_indexof DO1");
--  }
--
--  bind(NOMATCH);
--  mv(result, -1);
--  j(DONE);
--
--  bind(MATCH);
--  srai(t0, hlen_neg, haystack_chr_shift);
--  add(result, result_tmp, t0);
--
--  bind(DONE);
--}
--
--// Compare strings.
--void C2_MacroAssembler::string_compare(Register str1, Register str2,
--                                    Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
--                                    Register tmp3, int ae)
--{
--  Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
--      DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
--      SHORT_LOOP_START, TAIL_CHECK, L;
--
--  const int STUB_THRESHOLD = 64 + 8;
--  bool isLL = ae == StrIntrinsicNode::LL;
--  bool isLU = ae == StrIntrinsicNode::LU;
--  bool isUL = ae == StrIntrinsicNode::UL;
--
--  bool str1_isL = isLL || isLU;
--  bool str2_isL = isLL || isUL;
--
--  // for L strings, 1 byte for 1 character
--  // for U strings, 2 bytes for 1 character
--  int str1_chr_size = str1_isL ? 1 : 2;
--  int str2_chr_size = str2_isL ? 1 : 2;
--  int minCharsInWord = isLL ? wordSize : wordSize / 2;
--
--  load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
--  load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
--
--  BLOCK_COMMENT("string_compare {");
--
--  // Bizzarely, the counts are passed in bytes, regardless of whether they
--  // are L or U strings, however the result is always in characters.
--  if (!str1_isL) {
--    sraiw(cnt1, cnt1, 1);
--  }
--  if (!str2_isL) {
--    sraiw(cnt2, cnt2, 1);
--  }
--
--  // Compute the minimum of the string lengths and save the difference in result.
--  sub(result, cnt1, cnt2);
--  bgt(cnt1, cnt2, L);
--  mv(cnt2, cnt1);
--  bind(L);
--
--  // A very short string
--  li(t0, minCharsInWord);
--  ble(cnt2, t0, SHORT_STRING);
--
--  // Compare longwords
--  // load first parts of strings and finish initialization while loading
--  {
--    if (str1_isL == str2_isL) { // LL or UU
--      // load 8 bytes once to compare
--      ld(tmp1, Address(str1));
--      beq(str1, str2, DONE);
--      ld(tmp2, Address(str2));
--      li(t0, STUB_THRESHOLD);
--      bge(cnt2, t0, STUB);
--      sub(cnt2, cnt2, minCharsInWord);
--      beqz(cnt2, TAIL_CHECK);
--      // convert cnt2 from characters to bytes
--      if (!str1_isL) {
--        slli(cnt2, cnt2, 1);
--      }
--      add(str2, str2, cnt2);
--      add(str1, str1, cnt2);
--      sub(cnt2, zr, cnt2);
--    } else if (isLU) { // LU case
--      lwu(tmp1, Address(str1));
--      ld(tmp2, Address(str2));
--      li(t0, STUB_THRESHOLD);
--      bge(cnt2, t0, STUB);
--      addi(cnt2, cnt2, -4);
--      add(str1, str1, cnt2);
--      sub(cnt1, zr, cnt2);
--      slli(cnt2, cnt2, 1);
--      add(str2, str2, cnt2);
--      inflate_lo32(tmp3, tmp1);
--      mv(tmp1, tmp3);
--      sub(cnt2, zr, cnt2);
--      addi(cnt1, cnt1, 4);
--    } else { // UL case
--      ld(tmp1, Address(str1));
--      lwu(tmp2, Address(str2));
--      li(t0, STUB_THRESHOLD);
--      bge(cnt2, t0, STUB);
--      addi(cnt2, cnt2, -4);
--      slli(t0, cnt2, 1);
--      sub(cnt1, zr, t0);
--      add(str1, str1, t0);
--      add(str2, str2, cnt2);
--      inflate_lo32(tmp3, tmp2);
--      mv(tmp2, tmp3);
--      sub(cnt2, zr, cnt2);
--      addi(cnt1, cnt1, 8);
--    }
--    addi(cnt2, cnt2, isUL ? 4 : 8);
--    bgez(cnt2, TAIL);
--    xorr(tmp3, tmp1, tmp2);
--    bnez(tmp3, DIFFERENCE);
--
--    // main loop
--    bind(NEXT_WORD);
--    if (str1_isL == str2_isL) { // LL or UU
--      add(t0, str1, cnt2);
--      ld(tmp1, Address(t0));
--      add(t0, str2, cnt2);
--      ld(tmp2, Address(t0));
--      addi(cnt2, cnt2, 8);
--    } else if (isLU) { // LU case
--      add(t0, str1, cnt1);
--      lwu(tmp1, Address(t0));
--      add(t0, str2, cnt2);
--      ld(tmp2, Address(t0));
--      addi(cnt1, cnt1, 4);
--      inflate_lo32(tmp3, tmp1);
--      mv(tmp1, tmp3);
--      addi(cnt2, cnt2, 8);
--    } else { // UL case
--      add(t0, str2, cnt2);
--      lwu(tmp2, Address(t0));
--      add(t0, str1, cnt1);
--      ld(tmp1, Address(t0));
--      inflate_lo32(tmp3, tmp2);
--      mv(tmp2, tmp3);
--      addi(cnt1, cnt1, 8);
--      addi(cnt2, cnt2, 4);
--    }
--    bgez(cnt2, TAIL);
--
--    xorr(tmp3, tmp1, tmp2);
--    beqz(tmp3, NEXT_WORD);
--    j(DIFFERENCE);
--    bind(TAIL);
--    xorr(tmp3, tmp1, tmp2);
--    bnez(tmp3, DIFFERENCE);
--    // Last longword.  In the case where length == 4 we compare the
--    // same longword twice, but that's still faster than another
--    // conditional branch.
--    if (str1_isL == str2_isL) { // LL or UU
--      ld(tmp1, Address(str1));
--      ld(tmp2, Address(str2));
--    } else if (isLU) { // LU case
--      lwu(tmp1, Address(str1));
--      ld(tmp2, Address(str2));
--      inflate_lo32(tmp3, tmp1);
--      mv(tmp1, tmp3);
--    } else { // UL case
--      lwu(tmp2, Address(str2));
--      ld(tmp1, Address(str1));
--      inflate_lo32(tmp3, tmp2);
--      mv(tmp2, tmp3);
--    }
--    bind(TAIL_CHECK);
--    xorr(tmp3, tmp1, tmp2);
--    beqz(tmp3, DONE);
--
--    // Find the first different characters in the longwords and
--    // compute their difference.
--    bind(DIFFERENCE);
--    ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb
--    srl(tmp1, tmp1, result);
--    srl(tmp2, tmp2, result);
--    if (isLL) {
--      andi(tmp1, tmp1, 0xFF);
--      andi(tmp2, tmp2, 0xFF);
--    } else {
--      andi(tmp1, tmp1, 0xFFFF);
--      andi(tmp2, tmp2, 0xFFFF);
--    }
--    sub(result, tmp1, tmp2);
--    j(DONE);
--  }
--
--  bind(STUB);
--  RuntimeAddress stub = NULL;
--  switch (ae) {
--    case StrIntrinsicNode::LL:
--      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL());
--      break;
--    case StrIntrinsicNode::UU:
--      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU());
--      break;
--    case StrIntrinsicNode::LU:
--      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU());
--      break;
--    case StrIntrinsicNode::UL:
--      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL());
--      break;
--    default:
--      ShouldNotReachHere();
--  }
--  assert(stub.target() != NULL, "compare_long_string stub has not been generated");
--  trampoline_call(stub);
--  j(DONE);
--
--  bind(SHORT_STRING);
--  // Is the minimum length zero?
--  beqz(cnt2, DONE);
--  // arrange code to do most branches while loading and loading next characters
--  // while comparing previous
--  (this->*str1_load_chr)(tmp1, Address(str1), t0);
--  addi(str1, str1, str1_chr_size);
--  addi(cnt2, cnt2, -1);
--  beqz(cnt2, SHORT_LAST_INIT);
--  (this->*str2_load_chr)(cnt1, Address(str2), t0);
--  addi(str2, str2, str2_chr_size);
--  j(SHORT_LOOP_START);
--  bind(SHORT_LOOP);
--  addi(cnt2, cnt2, -1);
--  beqz(cnt2, SHORT_LAST);
--  bind(SHORT_LOOP_START);
--  (this->*str1_load_chr)(tmp2, Address(str1), t0);
--  addi(str1, str1, str1_chr_size);
--  (this->*str2_load_chr)(t0, Address(str2), t0);
--  addi(str2, str2, str2_chr_size);
--  bne(tmp1, cnt1, SHORT_LOOP_TAIL);
--  addi(cnt2, cnt2, -1);
--  beqz(cnt2, SHORT_LAST2);
--  (this->*str1_load_chr)(tmp1, Address(str1), t0);
--  addi(str1, str1, str1_chr_size);
--  (this->*str2_load_chr)(cnt1, Address(str2), t0);
--  addi(str2, str2, str2_chr_size);
--  beq(tmp2, t0, SHORT_LOOP);
--  sub(result, tmp2, t0);
--  j(DONE);
--  bind(SHORT_LOOP_TAIL);
--  sub(result, tmp1, cnt1);
--  j(DONE);
--  bind(SHORT_LAST2);
--  beq(tmp2, t0, DONE);
--  sub(result, tmp2, t0);
--
--  j(DONE);
--  bind(SHORT_LAST_INIT);
--  (this->*str2_load_chr)(cnt1, Address(str2), t0);
--  addi(str2, str2, str2_chr_size);
--  bind(SHORT_LAST);
--  beq(tmp1, cnt1, DONE);
--  sub(result, tmp1, cnt1);
--
--  bind(DONE);
--
--  BLOCK_COMMENT("} string_compare");
--}
--
--void C2_MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
--                                      Register tmp4, Register tmp5, Register tmp6, Register result,
--                                      Register cnt1, int elem_size) {
--  Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR;
--  Register tmp1 = t0;
--  Register tmp2 = t1;
--  Register cnt2 = tmp2;  // cnt2 only used in array length compare
--  Register elem_per_word = tmp6;
--  int log_elem_size = exact_log2(elem_size);
--  int length_offset = arrayOopDesc::length_offset_in_bytes();
--  int base_offset   = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
--
--  assert(elem_size == 1 || elem_size == 2, "must be char or byte");
--  assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6);
--  li(elem_per_word, wordSize / elem_size);
--
--  BLOCK_COMMENT("arrays_equals {");
--
--  // if (a1 == a2), return true
--  beq(a1, a2, SAME);
--
--  mv(result, false);
--  beqz(a1, DONE);
--  beqz(a2, DONE);
--  lwu(cnt1, Address(a1, length_offset));
--  lwu(cnt2, Address(a2, length_offset));
--  bne(cnt2, cnt1, DONE);
--  beqz(cnt1, SAME);
--
--  slli(tmp5, cnt1, 3 + log_elem_size);
--  sub(tmp5, zr, tmp5);
--  add(a1, a1, base_offset);
--  add(a2, a2, base_offset);
--  ld(tmp3, Address(a1, 0));
--  ld(tmp4, Address(a2, 0));
--  ble(cnt1, elem_per_word, SHORT); // short or same
--
--  // Main 16 byte comparison loop with 2 exits
--  bind(NEXT_DWORD); {
--    ld(tmp1, Address(a1, wordSize));
--    ld(tmp2, Address(a2, wordSize));
--    sub(cnt1, cnt1, 2 * wordSize / elem_size);
--    blez(cnt1, TAIL);
--    bne(tmp3, tmp4, DONE);
--    ld(tmp3, Address(a1, 2 * wordSize));
--    ld(tmp4, Address(a2, 2 * wordSize));
--    add(a1, a1, 2 * wordSize);
--    add(a2, a2, 2 * wordSize);
--    ble(cnt1, elem_per_word, TAIL2);
--  } beq(tmp1, tmp2, NEXT_DWORD);
--  j(DONE);
--
--  bind(TAIL);
--  xorr(tmp4, tmp3, tmp4);
--  xorr(tmp2, tmp1, tmp2);
--  sll(tmp2, tmp2, tmp5);
--  orr(tmp5, tmp4, tmp2);
--  j(IS_TMP5_ZR);
--
--  bind(TAIL2);
--  bne(tmp1, tmp2, DONE);
--
--  bind(SHORT);
--  xorr(tmp4, tmp3, tmp4);
--  sll(tmp5, tmp4, tmp5);
--
--  bind(IS_TMP5_ZR);
--  bnez(tmp5, DONE);
--
--  bind(SAME);
--  mv(result, true);
--  // That's it.
--  bind(DONE);
--
--  BLOCK_COMMENT("} array_equals");
--}
--
--// Compare Strings
--
--// For Strings we're passed the address of the first characters in a1
--// and a2 and the length in cnt1.
--// elem_size is the element size in bytes: either 1 or 2.
--// There are two implementations.  For arrays >= 8 bytes, all
--// comparisons (including the final one, which may overlap) are
--// performed 8 bytes at a time.  For strings < 8 bytes, we compare a
--// halfword, then a short, and then a byte.
--
--void C2_MacroAssembler::string_equals(Register a1, Register a2,
--                                      Register result, Register cnt1, int elem_size)
--{
--  Label SAME, DONE, SHORT, NEXT_WORD;
--  Register tmp1 = t0;
--  Register tmp2 = t1;
--
--  assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte");
--  assert_different_registers(a1, a2, result, cnt1, t0, t1);
--
--  BLOCK_COMMENT("string_equals {");
--
--  mv(result, false);
--
--  // Check for short strings, i.e. smaller than wordSize.
--  sub(cnt1, cnt1, wordSize);
--  bltz(cnt1, SHORT);
--
--  // Main 8 byte comparison loop.
--  bind(NEXT_WORD); {
--    ld(tmp1, Address(a1, 0));
--    add(a1, a1, wordSize);
--    ld(tmp2, Address(a2, 0));
--    add(a2, a2, wordSize);
--    sub(cnt1, cnt1, wordSize);
--    bne(tmp1, tmp2, DONE);
--  } bgtz(cnt1, NEXT_WORD);
--
--  // Last longword.  In the case where length == 4 we compare the
--  // same longword twice, but that's still faster than another
--  // conditional branch.
--  // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
--  // length == 4.
--  add(tmp1, a1, cnt1);
--  ld(tmp1, Address(tmp1, 0));
--  add(tmp2, a2, cnt1);
--  ld(tmp2, Address(tmp2, 0));
--  bne(tmp1, tmp2, DONE);
--  j(SAME);
--
--  bind(SHORT);
--  Label TAIL03, TAIL01;
--
--  // 0-7 bytes left.
--  andi(t0, cnt1, 4);
--  beqz(t0, TAIL03);
--  {
--    lwu(tmp1, Address(a1, 0));
--    add(a1, a1, 4);
--    lwu(tmp2, Address(a2, 0));
--    add(a2, a2, 4);
--    bne(tmp1, tmp2, DONE);
--  }
--
--  bind(TAIL03);
--  // 0-3 bytes left.
--  andi(t0, cnt1, 2);
--  beqz(t0, TAIL01);
--  {
--    lhu(tmp1, Address(a1, 0));
--    add(a1, a1, 2);
--    lhu(tmp2, Address(a2, 0));
--    add(a2, a2, 2);
--    bne(tmp1, tmp2, DONE);
--  }
--
--  bind(TAIL01);
--  if (elem_size == 1) { // Only needed when comparing 1-byte elements
--    // 0-1 bytes left.
--    andi(t0, cnt1, 1);
--    beqz(t0, SAME);
--    {
--      lbu(tmp1, a1, 0);
--      lbu(tmp2, a2, 0);
--      bne(tmp1, tmp2, DONE);
--    }
--  }
--
--  // Arrays are equal.
--  bind(SAME);
--  mv(result, true);
--
--  // That's it.
--  bind(DONE);
--  BLOCK_COMMENT("} string_equals");
--}
--
--typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far);
--typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label,
--                                                              bool is_far, bool is_unordered);
--
--static conditional_branch_insn conditional_branches[] =
--{
--  /* SHORT branches */
--  (conditional_branch_insn)&Assembler::beq,
--  (conditional_branch_insn)&Assembler::bgt,
--  NULL, // BoolTest::overflow
--  (conditional_branch_insn)&Assembler::blt,
--  (conditional_branch_insn)&Assembler::bne,
--  (conditional_branch_insn)&Assembler::ble,
--  NULL, // BoolTest::no_overflow
--  (conditional_branch_insn)&Assembler::bge,
--
--  /* UNSIGNED branches */
--  (conditional_branch_insn)&Assembler::beq,
--  (conditional_branch_insn)&Assembler::bgtu,
--  NULL,
--  (conditional_branch_insn)&Assembler::bltu,
--  (conditional_branch_insn)&Assembler::bne,
--  (conditional_branch_insn)&Assembler::bleu,
--  NULL,
--  (conditional_branch_insn)&Assembler::bgeu
--};
--
--static float_conditional_branch_insn float_conditional_branches[] =
--{
--  /* FLOAT SHORT branches */
--  (float_conditional_branch_insn)&MacroAssembler::float_beq,
--  (float_conditional_branch_insn)&MacroAssembler::float_bgt,
--  NULL,  // BoolTest::overflow
--  (float_conditional_branch_insn)&MacroAssembler::float_blt,
--  (float_conditional_branch_insn)&MacroAssembler::float_bne,
--  (float_conditional_branch_insn)&MacroAssembler::float_ble,
--  NULL, // BoolTest::no_overflow
--  (float_conditional_branch_insn)&MacroAssembler::float_bge,
--
--  /* DOUBLE SHORT branches */
--  (float_conditional_branch_insn)&MacroAssembler::double_beq,
--  (float_conditional_branch_insn)&MacroAssembler::double_bgt,
--  NULL,
--  (float_conditional_branch_insn)&MacroAssembler::double_blt,
--  (float_conditional_branch_insn)&MacroAssembler::double_bne,
--  (float_conditional_branch_insn)&MacroAssembler::double_ble,
--  NULL,
--  (float_conditional_branch_insn)&MacroAssembler::double_bge
--};
--
--void C2_MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) {
--  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])),
--         "invalid conditional branch index");
--  (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far);
--}
--
--// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use
--// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode().
--void C2_MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) {
--  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])),
--         "invalid float conditional branch index");
--  int booltest_flag = cmpFlag & ~(C2_MacroAssembler::double_branch_mask);
--  (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far,
--    (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true);
--}
--
--void C2_MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
--  switch (cmpFlag) {
--    case BoolTest::eq:
--    case BoolTest::le:
--      beqz(op1, L, is_far);
--      break;
--    case BoolTest::ne:
--    case BoolTest::gt:
--      bnez(op1, L, is_far);
--      break;
--    default:
--      ShouldNotReachHere();
--  }
--}
--
--void C2_MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
--  switch (cmpFlag) {
--    case BoolTest::eq:
--      beqz(op1, L, is_far);
--      break;
--    case BoolTest::ne:
--      bnez(op1, L, is_far);
--      break;
--    default:
--      ShouldNotReachHere();
--  }
--}
--
--void C2_MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) {
--  Label L;
--  cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L);
--  mv(dst, src);
--  bind(L);
--}
--
--// Set dst to NaN if any NaN input.
--void C2_MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2,
--                                  bool is_double, bool is_min) {
--  assert_different_registers(dst, src1, src2);
--
--  Label Done;
--  fsflags(zr);
--  if (is_double) {
--    is_min ? fmin_d(dst, src1, src2)
--           : fmax_d(dst, src1, src2);
--    // Checking NaNs
--    flt_d(zr, src1, src2);
--  } else {
--    is_min ? fmin_s(dst, src1, src2)
--           : fmax_s(dst, src1, src2);
--    // Checking NaNs
--    flt_s(zr, src1, src2);
--  }
--
--  frflags(t0);
--  beqz(t0, Done);
--
--  // In case of NaNs
--  is_double ? fadd_d(dst, src1, src2)
--            : fadd_s(dst, src1, src2);
--
--  bind(Done);
--}
-diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
-deleted file mode 100644
-index 90b6554af02..00000000000
---- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
-+++ /dev/null
-@@ -1,141 +0,0 @@
--/*
-- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#ifndef CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP
--#define CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP
--
--// C2_MacroAssembler contains high-level macros for C2
--
-- public:
--
--  void string_compare(Register str1, Register str2,
--                      Register cnt1, Register cnt2, Register result,
--                      Register tmp1, Register tmp2, Register tmp3,
--                      int ae);
--
--  void string_indexof_char_short(Register str1, Register cnt1,
--                                 Register ch, Register result,
--                                 bool isL);
--
--  void string_indexof_char(Register str1, Register cnt1,
--                           Register ch, Register result,
--                           Register tmp1, Register tmp2,
--                           Register tmp3, Register tmp4,
--                           bool isL);
--
--  void string_indexof(Register str1, Register str2,
--                      Register cnt1, Register cnt2,
--                      Register tmp1, Register tmp2,
--                      Register tmp3, Register tmp4,
--                      Register tmp5, Register tmp6,
--                      Register result, int ae);
--
--  void string_indexof_linearscan(Register haystack, Register needle,
--                                 Register haystack_len, Register needle_len,
--                                 Register tmp1, Register tmp2,
--                                 Register tmp3, Register tmp4,
--                                 int needle_con_cnt, Register result, int ae);
--
--  void arrays_equals(Register r1, Register r2,
--                     Register tmp3, Register tmp4,
--                     Register tmp5, Register tmp6,
--                     Register result, Register cnt1,
--                     int elem_size);
--
--  void string_equals(Register r1, Register r2,
--                     Register result, Register cnt1,
--                     int elem_size);
--
--  // refer to conditional_branches and float_conditional_branches
--  static const int bool_test_bits = 3;
--  static const int neg_cond_bits = 2;
--  static const int unsigned_branch_mask = 1 << bool_test_bits;
--  static const int double_branch_mask = 1 << bool_test_bits;
--
--  // cmp
--  void cmp_branch(int cmpFlag,
--                  Register op1, Register op2,
--                  Label& label, bool is_far = false);
--
--  void float_cmp_branch(int cmpFlag,
--                        FloatRegister op1, FloatRegister op2,
--                        Label& label, bool is_far = false);
--
--  void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op,
--                                    Label& L, bool is_far = false);
--
--  void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op,
--                               Label& L, bool is_far = false);
--
--  void enc_cmove(int cmpFlag,
--                 Register op1, Register op2,
--                 Register dst, Register src);
--
--  void spill(Register r, bool is64, int offset) {
--    is64 ? sd(r, Address(sp, offset))
--         : sw(r, Address(sp, offset));
--  }
--
--  void spill(FloatRegister f, bool is64, int offset) {
--    is64 ? fsd(f, Address(sp, offset))
--         : fsw(f, Address(sp, offset));
--  }
--
--  void spill(VectorRegister v, int offset) {
--    add(t0, sp, offset);
--    vs1r_v(v, t0);
--  }
--
--  void unspill(Register r, bool is64, int offset) {
--    is64 ? ld(r, Address(sp, offset))
--         : lw(r, Address(sp, offset));
--  }
--
--  void unspillu(Register r, bool is64, int offset) {
--    is64 ? ld(r, Address(sp, offset))
--         : lwu(r, Address(sp, offset));
--  }
--
--  void unspill(FloatRegister f, bool is64, int offset) {
--    is64 ? fld(f, Address(sp, offset))
--         : flw(f, Address(sp, offset));
--  }
--
--  void unspill(VectorRegister v, int offset) {
--    add(t0, sp, offset);
--    vl1r_v(v, t0);
--  }
--
--  void spill_copy_vector_stack_to_stack(int src_offset, int dst_offset, int vec_reg_size_in_bytes) {
--    assert(vec_reg_size_in_bytes % 16 == 0, "unexpected vector reg size");
--    unspill(v0, src_offset);
--    spill(v0, dst_offset);
--  }
--
--  void minmax_FD(FloatRegister dst,
--                 FloatRegister src1, FloatRegister src2,
--                 bool is_double, bool is_min);
--
--#endif // CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index 304b6f2b06c..d175a62aeeb 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -4125,3 +4125,1285 @@ void MacroAssembler::safepoint_ifence() {
-   ifence();
- }
- 
-+#ifdef COMPILER2
-+// short string
-+// StringUTF16.indexOfChar
-+// StringLatin1.indexOfChar
-+void MacroAssembler::string_indexof_char_short(Register str1, Register cnt1,
-+                                                  Register ch, Register result,
-+                                                  bool isL)
-+{
-+  Register ch1 = t0;
-+  Register index = t1;
-+
-+  BLOCK_COMMENT("string_indexof_char_short {");
-+
-+  Label LOOP, LOOP1, LOOP4, LOOP8;
-+  Label MATCH,  MATCH1, MATCH2, MATCH3,
-+          MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH;
-+
-+  mv(result, -1);
-+  mv(index, zr);
-+
-+  bind(LOOP);
-+  addi(t0, index, 8);
-+  ble(t0, cnt1, LOOP8);
-+  addi(t0, index, 4);
-+  ble(t0, cnt1, LOOP4);
-+  j(LOOP1);
-+
-+  bind(LOOP8);
-+  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
-+  beq(ch, ch1, MATCH);
-+  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
-+  beq(ch, ch1, MATCH1);
-+  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
-+  beq(ch, ch1, MATCH2);
-+  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
-+  beq(ch, ch1, MATCH3);
-+  isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8));
-+  beq(ch, ch1, MATCH4);
-+  isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10));
-+  beq(ch, ch1, MATCH5);
-+  isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12));
-+  beq(ch, ch1, MATCH6);
-+  isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14));
-+  beq(ch, ch1, MATCH7);
-+  addi(index, index, 8);
-+  addi(str1, str1, isL ? 8 : 16);
-+  blt(index, cnt1, LOOP);
-+  j(NOMATCH);
-+
-+  bind(LOOP4);
-+  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
-+  beq(ch, ch1, MATCH);
-+  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
-+  beq(ch, ch1, MATCH1);
-+  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
-+  beq(ch, ch1, MATCH2);
-+  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
-+  beq(ch, ch1, MATCH3);
-+  addi(index, index, 4);
-+  addi(str1, str1, isL ? 4 : 8);
-+  bge(index, cnt1, NOMATCH);
-+
-+  bind(LOOP1);
-+  isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1));
-+  beq(ch, ch1, MATCH);
-+  addi(index, index, 1);
-+  addi(str1, str1, isL ? 1 : 2);
-+  blt(index, cnt1, LOOP1);
-+  j(NOMATCH);
-+
-+  bind(MATCH1);
-+  addi(index, index, 1);
-+  j(MATCH);
-+
-+  bind(MATCH2);
-+  addi(index, index, 2);
-+  j(MATCH);
-+
-+  bind(MATCH3);
-+  addi(index, index, 3);
-+  j(MATCH);
-+
-+  bind(MATCH4);
-+  addi(index, index, 4);
-+  j(MATCH);
-+
-+  bind(MATCH5);
-+  addi(index, index, 5);
-+  j(MATCH);
-+
-+  bind(MATCH6);
-+  addi(index, index, 6);
-+  j(MATCH);
-+
-+  bind(MATCH7);
-+  addi(index, index, 7);
-+
-+  bind(MATCH);
-+  mv(result, index);
-+  bind(NOMATCH);
-+  BLOCK_COMMENT("} string_indexof_char_short");
-+}
-+
-+// StringUTF16.indexOfChar
-+// StringLatin1.indexOfChar
-+void MacroAssembler::string_indexof_char(Register str1, Register cnt1,
-+                                            Register ch, Register result,
-+                                            Register tmp1, Register tmp2,
-+                                            Register tmp3, Register tmp4,
-+                                            bool isL)
-+{
-+  Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG;
-+  Register ch1 = t0;
-+  Register orig_cnt = t1;
-+  Register mask1 = tmp3;
-+  Register mask2 = tmp2;
-+  Register match_mask = tmp1;
-+  Register trailing_char = tmp4;
-+  Register unaligned_elems = tmp4;
-+
-+  BLOCK_COMMENT("string_indexof_char {");
-+  beqz(cnt1, NOMATCH);
-+
-+  addi(t0, cnt1, isL ? -32 : -16);
-+  bgtz(t0, DO_LONG);
-+  string_indexof_char_short(str1, cnt1, ch, result, isL);
-+  j(DONE);
-+
-+  bind(DO_LONG);
-+  mv(orig_cnt, cnt1);
-+  if (AvoidUnalignedAccesses) {
-+    Label ALIGNED;
-+    andi(unaligned_elems, str1, 0x7);
-+    beqz(unaligned_elems, ALIGNED);
-+    sub(unaligned_elems, unaligned_elems, 8);
-+    neg(unaligned_elems, unaligned_elems);
-+    if (!isL) {
-+      srli(unaligned_elems, unaligned_elems, 1);
-+    }
-+    // do unaligned part per element
-+    string_indexof_char_short(str1, unaligned_elems, ch, result, isL);
-+    bgez(result, DONE);
-+    mv(orig_cnt, cnt1);
-+    sub(cnt1, cnt1, unaligned_elems);
-+    bind(ALIGNED);
-+  }
-+
-+  // duplicate ch
-+  if (isL) {
-+    slli(ch1, ch, 8);
-+    orr(ch, ch1, ch);
-+  }
-+  slli(ch1, ch, 16);
-+  orr(ch, ch1, ch);
-+  slli(ch1, ch, 32);
-+  orr(ch, ch1, ch);
-+
-+  if (!isL) {
-+    slli(cnt1, cnt1, 1);
-+  }
-+
-+  uint64_t mask0101 = UCONST64(0x0101010101010101);
-+  uint64_t mask0001 = UCONST64(0x0001000100010001);
-+  mv(mask1, isL ? mask0101 : mask0001);
-+  uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f);
-+  uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff);
-+  mv(mask2, isL ? mask7f7f : mask7fff);
-+
-+  bind(CH1_LOOP);
-+  ld(ch1, Address(str1));
-+  addi(str1, str1, 8);
-+  addi(cnt1, cnt1, -8);
-+  compute_match_mask(ch1, ch, match_mask, mask1, mask2);
-+  bnez(match_mask, HIT);
-+  bgtz(cnt1, CH1_LOOP);
-+  j(NOMATCH);
-+
-+  bind(HIT);
-+  ctzc_bit(trailing_char, match_mask, isL, ch1, result);
-+  srli(trailing_char, trailing_char, 3);
-+  addi(cnt1, cnt1, 8);
-+  ble(cnt1, trailing_char, NOMATCH);
-+  // match case
-+  if (!isL) {
-+    srli(cnt1, cnt1, 1);
-+    srli(trailing_char, trailing_char, 1);
-+  }
-+
-+  sub(result, orig_cnt, cnt1);
-+  add(result, result, trailing_char);
-+  j(DONE);
-+
-+  bind(NOMATCH);
-+  mv(result, -1);
-+
-+  bind(DONE);
-+  BLOCK_COMMENT("} string_indexof_char");
-+}
-+
-+typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp);
-+
-+// Search for needle in haystack and return index or -1
-+// x10: result
-+// x11: haystack
-+// x12: haystack_len
-+// x13: needle
-+// x14: needle_len
-+void MacroAssembler::string_indexof(Register haystack, Register needle,
-+                                       Register haystack_len, Register needle_len,
-+                                       Register tmp1, Register tmp2,
-+                                       Register tmp3, Register tmp4,
-+                                       Register tmp5, Register tmp6,
-+                                       Register result, int ae)
-+{
-+  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
-+
-+  Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH;
-+
-+  Register ch1 = t0;
-+  Register ch2 = t1;
-+  Register nlen_tmp = tmp1; // needle len tmp
-+  Register hlen_tmp = tmp2; // haystack len tmp
-+  Register result_tmp = tmp4;
-+
-+  bool isLL = ae == StrIntrinsicNode::LL;
-+
-+  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
-+  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
-+  int needle_chr_shift = needle_isL ? 0 : 1;
-+  int haystack_chr_shift = haystack_isL ? 0 : 1;
-+  int needle_chr_size = needle_isL ? 1 : 2;
-+  int haystack_chr_size = haystack_isL ? 1 : 2;
-+  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
-+                                   (load_chr_insn)&MacroAssembler::lhu;
-+  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
-+                                     (load_chr_insn)&MacroAssembler::lhu;
-+
-+  BLOCK_COMMENT("string_indexof {");
-+
-+  // Note, inline_string_indexOf() generates checks:
-+  // if (pattern.count > src.count) return -1;
-+  // if (pattern.count == 0) return 0;
-+
-+  // We have two strings, a source string in haystack, haystack_len and a pattern string
-+  // in needle, needle_len. Find the first occurence of pattern in source or return -1.
-+
-+  // For larger pattern and source we use a simplified Boyer Moore algorithm.
-+  // With a small pattern and source we use linear scan.
-+
-+  // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm.
-+  sub(result_tmp, haystack_len, needle_len);
-+  // needle_len < 8, use linear scan
-+  sub(t0, needle_len, 8);
-+  bltz(t0, LINEARSEARCH);
-+  // needle_len >= 256, use linear scan
-+  sub(t0, needle_len, 256);
-+  bgez(t0, LINEARSTUB);
-+  // needle_len >= haystack_len/4, use linear scan
-+  srli(t0, haystack_len, 2);
-+  bge(needle_len, t0, LINEARSTUB);
-+
-+  // Boyer-Moore-Horspool introduction:
-+  // The Boyer Moore alogorithm is based on the description here:-
-+  //
-+  // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
-+  //
-+  // This describes and algorithm with 2 shift rules. The 'Bad Character' rule
-+  // and the 'Good Suffix' rule.
-+  //
-+  // These rules are essentially heuristics for how far we can shift the
-+  // pattern along the search string.
-+  //
-+  // The implementation here uses the 'Bad Character' rule only because of the
-+  // complexity of initialisation for the 'Good Suffix' rule.
-+  //
-+  // This is also known as the Boyer-Moore-Horspool algorithm:
-+  //
-+  // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
-+  //
-+  // #define ASIZE 256
-+  //
-+  //    int bm(unsigned char *pattern, int m, unsigned char *src, int n) {
-+  //      int i, j;
-+  //      unsigned c;
-+  //      unsigned char bc[ASIZE];
-+  //
-+  //      /* Preprocessing */
-+  //      for (i = 0; i < ASIZE; ++i)
-+  //        bc[i] = m;
-+  //      for (i = 0; i < m - 1; ) {
-+  //        c = pattern[i];
-+  //        ++i;
-+  //        // c < 256 for Latin1 string, so, no need for branch
-+  //        #ifdef PATTERN_STRING_IS_LATIN1
-+  //        bc[c] = m - i;
-+  //        #else
-+  //        if (c < ASIZE) bc[c] = m - i;
-+  //        #endif
-+  //      }
-+  //
-+  //      /* Searching */
-+  //      j = 0;
-+  //      while (j <= n - m) {
-+  //        c = src[i+j];
-+  //        if (pattern[m-1] == c)
-+  //          int k;
-+  //          for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
-+  //          if (k < 0) return j;
-+  //          // c < 256 for Latin1 string, so, no need for branch
-+  //          #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1
-+  //          // LL case: (c< 256) always true. Remove branch
-+  //          j += bc[pattern[j+m-1]];
-+  //          #endif
-+  //          #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF
-+  //          // UU case: need if (c<ASIZE) check. Skip 1 character if not.
-+  //          if (c < ASIZE)
-+  //            j += bc[pattern[j+m-1]];
-+  //          else
-+  //            j += 1
-+  //          #endif
-+  //          #ifdef SOURCE_IS_UTF_AND_PATTERN_IS_LATIN1
-+  //          // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
-+  //          if (c < ASIZE)
-+  //            j += bc[pattern[j+m-1]];
-+  //          else
-+  //            j += m
-+  //          #endif
-+  //      }
-+  //      return -1;
-+  //    }
-+
-+  // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result
-+  Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
-+          BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;
-+
-+  Register haystack_end = haystack_len;
-+  Register skipch = tmp2;
-+
-+  // pattern length is >=8, so, we can read at least 1 register for cases when
-+  // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
-+  // UL case. We'll re-read last character in inner pre-loop code to have
-+  // single outer pre-loop load
-+  const int firstStep = isLL ? 7 : 3;
-+
-+  const int ASIZE = 256;
-+  const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd)
-+
-+  sub(sp, sp, ASIZE);
-+
-+  // init BC offset table with default value: needle_len
-+  slli(t0, needle_len, 8);
-+  orr(t0, t0, needle_len); // [63...16][needle_len][needle_len]
-+  slli(tmp1, t0, 16);
-+  orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len]
-+  slli(tmp1, t0, 32);
-+  orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len]
-+
-+  mv(ch1, sp);  // ch1 is t0
-+  mv(tmp6, ASIZE / STORE_BYTES); // loop iterations
-+
-+  bind(BM_INIT_LOOP);
-+  // for (i = 0; i < ASIZE; ++i)
-+  //   bc[i] = m;
-+  for (int i = 0; i < 4; i++) {
-+    sd(tmp5, Address(ch1, i * wordSize));
-+  }
-+  add(ch1, ch1, 32);
-+  sub(tmp6, tmp6, 4);
-+  bgtz(tmp6, BM_INIT_LOOP);
-+
-+  sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern
-+  Register orig_haystack = tmp5;
-+  mv(orig_haystack, haystack);
-+  // result_tmp = tmp4
-+  shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift);
-+  sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1
-+  mv(tmp3, needle);
-+
-+  //  for (i = 0; i < m - 1; ) {
-+  //    c = pattern[i];
-+  //    ++i;
-+  //    // c < 256 for Latin1 string, so, no need for branch
-+  //    #ifdef PATTERN_STRING_IS_LATIN1
-+  //    bc[c] = m - i;
-+  //    #else
-+  //    if (c < ASIZE) bc[c] = m - i;
-+  //    #endif
-+  //  }
-+  bind(BCLOOP);
-+  (this->*needle_load_1chr)(ch1, Address(tmp3), noreg);
-+  add(tmp3, tmp3, needle_chr_size);
-+  if (!needle_isL) {
-+    // ae == StrIntrinsicNode::UU
-+    mv(tmp6, ASIZE);
-+    bgeu(ch1, tmp6, BCSKIP);
-+  }
-+  add(tmp4, sp, ch1);
-+  sb(ch2, Address(tmp4)); // store skip offset to BC offset table
-+
-+  bind(BCSKIP);
-+  sub(ch2, ch2, 1); // for next pattern element, skip distance -1
-+  bgtz(ch2, BCLOOP);
-+
-+  // tmp6: pattern end, address after needle
-+  shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift);
-+  if (needle_isL == haystack_isL) {
-+    // load last 8 bytes (8LL/4UU symbols)
-+    ld(tmp6, Address(tmp6, -wordSize));
-+  } else {
-+    // UL: from UTF-16(source) search Latin1(pattern)
-+    lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols)
-+    // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d
-+    // We'll have to wait until load completed, but it's still faster than per-character loads+checks
-+    srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a
-+    slli(ch2, tmp6, XLEN - 24);
-+    srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b
-+    slli(ch1, tmp6, XLEN - 16);
-+    srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c
-+    andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d
-+    slli(ch2, ch2, 16);
-+    orr(ch2, ch2, ch1); // 0x00000b0c
-+    slli(result, tmp3, 48); // use result as temp register
-+    orr(tmp6, tmp6, result); // 0x0a00000d
-+    slli(result, ch2, 16);
-+    orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d
-+  }
-+
-+  // i = m - 1;
-+  // skipch = j + i;
-+  // if (skipch == pattern[m - 1]
-+  //   for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
-+  // else
-+  //   move j with bad char offset table
-+  bind(BMLOOPSTR2);
-+  // compare pattern to source string backward
-+  shadd(result, nlen_tmp, haystack, result, haystack_chr_shift);
-+  (this->*haystack_load_1chr)(skipch, Address(result), noreg);
-+  sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8
-+  if (needle_isL == haystack_isL) {
-+    // re-init tmp3. It's for free because it's executed in parallel with
-+    // load above. Alternative is to initialize it before loop, but it'll
-+    // affect performance on in-order systems with 2 or more ld/st pipelines
-+    srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1]
-+  }
-+  if (!isLL) { // UU/UL case
-+    slli(ch2, nlen_tmp, 1); // offsets in bytes
-+  }
-+  bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char
-+  add(result, haystack, isLL ? nlen_tmp : ch2);
-+  ld(ch2, Address(result)); // load 8 bytes from source string
-+  mv(ch1, tmp6);
-+  if (isLL) {
-+    j(BMLOOPSTR1_AFTER_LOAD);
-+  } else {
-+    sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
-+    j(BMLOOPSTR1_CMP);
-+  }
-+
-+  bind(BMLOOPSTR1);
-+  shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift);
-+  (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
-+  shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift);
-+  (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
-+
-+  bind(BMLOOPSTR1_AFTER_LOAD);
-+  sub(nlen_tmp, nlen_tmp, 1);
-+  bltz(nlen_tmp, BMLOOPSTR1_LASTCMP);
-+
-+  bind(BMLOOPSTR1_CMP);
-+  beq(ch1, ch2, BMLOOPSTR1);
-+
-+  bind(BMSKIP);
-+  if (!isLL) {
-+    // if we've met UTF symbol while searching Latin1 pattern, then we can
-+    // skip needle_len symbols
-+    if (needle_isL != haystack_isL) {
-+      mv(result_tmp, needle_len);
-+    } else {
-+      mv(result_tmp, 1);
-+    }
-+    mv(t0, ASIZE);
-+    bgeu(skipch, t0, BMADV);
-+  }
-+  add(result_tmp, sp, skipch);
-+  lbu(result_tmp, Address(result_tmp)); // load skip offset
-+
-+  bind(BMADV);
-+  sub(nlen_tmp, needle_len, 1);
-+  // move haystack after bad char skip offset
-+  shadd(haystack, result_tmp, haystack, result, haystack_chr_shift);
-+  ble(haystack, haystack_end, BMLOOPSTR2);
-+  add(sp, sp, ASIZE);
-+  j(NOMATCH);
-+
-+  bind(BMLOOPSTR1_LASTCMP);
-+  bne(ch1, ch2, BMSKIP);
-+
-+  bind(BMMATCH);
-+  sub(result, haystack, orig_haystack);
-+  if (!haystack_isL) {
-+    srli(result, result, 1);
-+  }
-+  add(sp, sp, ASIZE);
-+  j(DONE);
-+
-+  bind(LINEARSTUB);
-+  sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm
-+  bltz(t0, LINEARSEARCH);
-+  mv(result, zr);
-+  RuntimeAddress stub = NULL;
-+  if (isLL) {
-+    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll());
-+    assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
-+  } else if (needle_isL) {
-+    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul());
-+    assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
-+  } else {
-+    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu());
-+    assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
-+  }
-+  trampoline_call(stub);
-+  j(DONE);
-+
-+  bind(NOMATCH);
-+  mv(result, -1);
-+  j(DONE);
-+
-+  bind(LINEARSEARCH);
-+  string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae);
-+
-+  bind(DONE);
-+  BLOCK_COMMENT("} string_indexof");
-+}
-+
-+// string_indexof
-+// result: x10
-+// src: x11
-+// src_count: x12
-+// pattern: x13
-+// pattern_count: x14 or 1/2/3/4
-+void MacroAssembler::string_indexof_linearscan(Register haystack, Register needle,
-+                                                  Register haystack_len, Register needle_len,
-+                                                  Register tmp1, Register tmp2,
-+                                                  Register tmp3, Register tmp4,
-+                                                  int needle_con_cnt, Register result, int ae)
-+{
-+  // Note:
-+  // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant
-+  // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1
-+  assert(needle_con_cnt <= 4, "Invalid needle constant count");
-+  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
-+
-+  Register ch1 = t0;
-+  Register ch2 = t1;
-+  Register hlen_neg = haystack_len, nlen_neg = needle_len;
-+  Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4;
-+
-+  bool isLL = ae == StrIntrinsicNode::LL;
-+
-+  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
-+  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
-+  int needle_chr_shift = needle_isL ? 0 : 1;
-+  int haystack_chr_shift = haystack_isL ? 0 : 1;
-+  int needle_chr_size = needle_isL ? 1 : 2;
-+  int haystack_chr_size = haystack_isL ? 1 : 2;
-+
-+  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
-+                                   (load_chr_insn)&MacroAssembler::lhu;
-+  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
-+                                     (load_chr_insn)&MacroAssembler::lhu;
-+  load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu;
-+  load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld;
-+
-+  Label DO1, DO2, DO3, MATCH, NOMATCH, DONE;
-+
-+  Register first = tmp3;
-+
-+  if (needle_con_cnt == -1) {
-+    Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
-+
-+    sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2);
-+    bltz(t0, DOSHORT);
-+
-+    (this->*needle_load_1chr)(first, Address(needle), noreg);
-+    slli(t0, needle_len, needle_chr_shift);
-+    add(needle, needle, t0);
-+    neg(nlen_neg, t0);
-+    slli(t0, result_tmp, haystack_chr_shift);
-+    add(haystack, haystack, t0);
-+    neg(hlen_neg, t0);
-+
-+    bind(FIRST_LOOP);
-+    add(t0, haystack, hlen_neg);
-+    (this->*haystack_load_1chr)(ch2, Address(t0), noreg);
-+    beq(first, ch2, STR1_LOOP);
-+
-+    bind(STR2_NEXT);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, FIRST_LOOP);
-+    j(NOMATCH);
-+
-+    bind(STR1_LOOP);
-+    add(nlen_tmp, nlen_neg, needle_chr_size);
-+    add(hlen_tmp, hlen_neg, haystack_chr_size);
-+    bgez(nlen_tmp, MATCH);
-+
-+    bind(STR1_NEXT);
-+    add(ch1, needle, nlen_tmp);
-+    (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
-+    add(ch2, haystack, hlen_tmp);
-+    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
-+    bne(ch1, ch2, STR2_NEXT);
-+    add(nlen_tmp, nlen_tmp, needle_chr_size);
-+    add(hlen_tmp, hlen_tmp, haystack_chr_size);
-+    bltz(nlen_tmp, STR1_NEXT);
-+    j(MATCH);
-+
-+    bind(DOSHORT);
-+    if (needle_isL == haystack_isL) {
-+      sub(t0, needle_len, 2);
-+      bltz(t0, DO1);
-+      bgtz(t0, DO3);
-+    }
-+  }
-+
-+  if (needle_con_cnt == 4) {
-+    Label CH1_LOOP;
-+    (this->*load_4chr)(ch1, Address(needle), noreg);
-+    sub(result_tmp, haystack_len, 4);
-+    slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp
-+    add(haystack, haystack, tmp3);
-+    neg(hlen_neg, tmp3);
-+
-+    bind(CH1_LOOP);
-+    add(ch2, haystack, hlen_neg);
-+    (this->*load_4chr)(ch2, Address(ch2), noreg);
-+    beq(ch1, ch2, MATCH);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, CH1_LOOP);
-+    j(NOMATCH);
-+  }
-+
-+  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) {
-+    Label CH1_LOOP;
-+    BLOCK_COMMENT("string_indexof DO2 {");
-+    bind(DO2);
-+    (this->*load_2chr)(ch1, Address(needle), noreg);
-+    if (needle_con_cnt == 2) {
-+      sub(result_tmp, haystack_len, 2);
-+    }
-+    slli(tmp3, result_tmp, haystack_chr_shift);
-+    add(haystack, haystack, tmp3);
-+    neg(hlen_neg, tmp3);
-+
-+    bind(CH1_LOOP);
-+    add(tmp3, haystack, hlen_neg);
-+    (this->*load_2chr)(ch2, Address(tmp3), noreg);
-+    beq(ch1, ch2, MATCH);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, CH1_LOOP);
-+    j(NOMATCH);
-+    BLOCK_COMMENT("} string_indexof DO2");
-+  }
-+
-+  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) {
-+    Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
-+    BLOCK_COMMENT("string_indexof DO3 {");
-+
-+    bind(DO3);
-+    (this->*load_2chr)(first, Address(needle), noreg);
-+    (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg);
-+    if (needle_con_cnt == 3) {
-+      sub(result_tmp, haystack_len, 3);
-+    }
-+    slli(hlen_tmp, result_tmp, haystack_chr_shift);
-+    add(haystack, haystack, hlen_tmp);
-+    neg(hlen_neg, hlen_tmp);
-+
-+    bind(FIRST_LOOP);
-+    add(ch2, haystack, hlen_neg);
-+    (this->*load_2chr)(ch2, Address(ch2), noreg);
-+    beq(first, ch2, STR1_LOOP);
-+
-+    bind(STR2_NEXT);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, FIRST_LOOP);
-+    j(NOMATCH);
-+
-+    bind(STR1_LOOP);
-+    add(hlen_tmp, hlen_neg, 2 * haystack_chr_size);
-+    add(ch2, haystack, hlen_tmp);
-+    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
-+    bne(ch1, ch2, STR2_NEXT);
-+    j(MATCH);
-+    BLOCK_COMMENT("} string_indexof DO3");
-+  }
-+
-+  if (needle_con_cnt == -1 || needle_con_cnt == 1) {
-+    Label DO1_LOOP;
-+
-+    BLOCK_COMMENT("string_indexof DO1 {");
-+    bind(DO1);
-+    (this->*needle_load_1chr)(ch1, Address(needle), noreg);
-+    sub(result_tmp, haystack_len, 1);
-+    mv(tmp3, result_tmp);
-+    if (haystack_chr_shift) {
-+      slli(tmp3, result_tmp, haystack_chr_shift);
-+    }
-+    add(haystack, haystack, tmp3);
-+    neg(hlen_neg, tmp3);
-+
-+    bind(DO1_LOOP);
-+    add(tmp3, haystack, hlen_neg);
-+    (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg);
-+    beq(ch1, ch2, MATCH);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, DO1_LOOP);
-+    BLOCK_COMMENT("} string_indexof DO1");
-+  }
-+
-+  bind(NOMATCH);
-+  mv(result, -1);
-+  j(DONE);
-+
-+  bind(MATCH);
-+  srai(t0, hlen_neg, haystack_chr_shift);
-+  add(result, result_tmp, t0);
-+
-+  bind(DONE);
-+}
-+
-+// Compare strings.
-+void MacroAssembler::string_compare(Register str1, Register str2,
-+                                       Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
-+                                       Register tmp3, int ae)
-+{
-+  Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
-+          DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
-+          SHORT_LOOP_START, TAIL_CHECK, L;
-+
-+  const int STUB_THRESHOLD = 64 + 8;
-+  bool isLL = ae == StrIntrinsicNode::LL;
-+  bool isLU = ae == StrIntrinsicNode::LU;
-+  bool isUL = ae == StrIntrinsicNode::UL;
-+
-+  bool str1_isL = isLL || isLU;
-+  bool str2_isL = isLL || isUL;
-+
-+  // for L strings, 1 byte for 1 character
-+  // for U strings, 2 bytes for 1 character
-+  int str1_chr_size = str1_isL ? 1 : 2;
-+  int str2_chr_size = str2_isL ? 1 : 2;
-+  int minCharsInWord = isLL ? wordSize : wordSize / 2;
-+
-+  load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
-+  load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
-+
-+  BLOCK_COMMENT("string_compare {");
-+
-+  // Bizzarely, the counts are passed in bytes, regardless of whether they
-+  // are L or U strings, however the result is always in characters.
-+  if (!str1_isL) {
-+    sraiw(cnt1, cnt1, 1);
-+  }
-+  if (!str2_isL) {
-+    sraiw(cnt2, cnt2, 1);
-+  }
-+
-+  // Compute the minimum of the string lengths and save the difference in result.
-+  sub(result, cnt1, cnt2);
-+  bgt(cnt1, cnt2, L);
-+  mv(cnt2, cnt1);
-+  bind(L);
-+
-+  // A very short string
-+  li(t0, minCharsInWord);
-+  ble(cnt2, t0, SHORT_STRING);
-+
-+  // Compare longwords
-+  // load first parts of strings and finish initialization while loading
-+  {
-+    if (str1_isL == str2_isL) { // LL or UU
-+      // load 8 bytes once to compare
-+      ld(tmp1, Address(str1));
-+      beq(str1, str2, DONE);
-+      ld(tmp2, Address(str2));
-+      li(t0, STUB_THRESHOLD);
-+      bge(cnt2, t0, STUB);
-+      sub(cnt2, cnt2, minCharsInWord);
-+      beqz(cnt2, TAIL_CHECK);
-+      // convert cnt2 from characters to bytes
-+      if (!str1_isL) {
-+        slli(cnt2, cnt2, 1);
-+      }
-+      add(str2, str2, cnt2);
-+      add(str1, str1, cnt2);
-+      sub(cnt2, zr, cnt2);
-+    } else if (isLU) { // LU case
-+      lwu(tmp1, Address(str1));
-+      ld(tmp2, Address(str2));
-+      li(t0, STUB_THRESHOLD);
-+      bge(cnt2, t0, STUB);
-+      addi(cnt2, cnt2, -4);
-+      add(str1, str1, cnt2);
-+      sub(cnt1, zr, cnt2);
-+      slli(cnt2, cnt2, 1);
-+      add(str2, str2, cnt2);
-+      inflate_lo32(tmp3, tmp1);
-+      mv(tmp1, tmp3);
-+      sub(cnt2, zr, cnt2);
-+      addi(cnt1, cnt1, 4);
-+    } else { // UL case
-+      ld(tmp1, Address(str1));
-+      lwu(tmp2, Address(str2));
-+      li(t0, STUB_THRESHOLD);
-+      bge(cnt2, t0, STUB);
-+      addi(cnt2, cnt2, -4);
-+      slli(t0, cnt2, 1);
-+      sub(cnt1, zr, t0);
-+      add(str1, str1, t0);
-+      add(str2, str2, cnt2);
-+      inflate_lo32(tmp3, tmp2);
-+      mv(tmp2, tmp3);
-+      sub(cnt2, zr, cnt2);
-+      addi(cnt1, cnt1, 8);
-+    }
-+    addi(cnt2, cnt2, isUL ? 4 : 8);
-+    bgez(cnt2, TAIL);
-+    xorr(tmp3, tmp1, tmp2);
-+    bnez(tmp3, DIFFERENCE);
-+
-+    // main loop
-+    bind(NEXT_WORD);
-+    if (str1_isL == str2_isL) { // LL or UU
-+      add(t0, str1, cnt2);
-+      ld(tmp1, Address(t0));
-+      add(t0, str2, cnt2);
-+      ld(tmp2, Address(t0));
-+      addi(cnt2, cnt2, 8);
-+    } else if (isLU) { // LU case
-+      add(t0, str1, cnt1);
-+      lwu(tmp1, Address(t0));
-+      add(t0, str2, cnt2);
-+      ld(tmp2, Address(t0));
-+      addi(cnt1, cnt1, 4);
-+      inflate_lo32(tmp3, tmp1);
-+      mv(tmp1, tmp3);
-+      addi(cnt2, cnt2, 8);
-+    } else { // UL case
-+      add(t0, str2, cnt2);
-+      lwu(tmp2, Address(t0));
-+      add(t0, str1, cnt1);
-+      ld(tmp1, Address(t0));
-+      inflate_lo32(tmp3, tmp2);
-+      mv(tmp2, tmp3);
-+      addi(cnt1, cnt1, 8);
-+      addi(cnt2, cnt2, 4);
-+    }
-+    bgez(cnt2, TAIL);
-+
-+    xorr(tmp3, tmp1, tmp2);
-+    beqz(tmp3, NEXT_WORD);
-+    j(DIFFERENCE);
-+    bind(TAIL);
-+    xorr(tmp3, tmp1, tmp2);
-+    bnez(tmp3, DIFFERENCE);
-+    // Last longword.  In the case where length == 4 we compare the
-+    // same longword twice, but that's still faster than another
-+    // conditional branch.
-+    if (str1_isL == str2_isL) { // LL or UU
-+      ld(tmp1, Address(str1));
-+      ld(tmp2, Address(str2));
-+    } else if (isLU) { // LU case
-+      lwu(tmp1, Address(str1));
-+      ld(tmp2, Address(str2));
-+      inflate_lo32(tmp3, tmp1);
-+      mv(tmp1, tmp3);
-+    } else { // UL case
-+      lwu(tmp2, Address(str2));
-+      ld(tmp1, Address(str1));
-+      inflate_lo32(tmp3, tmp2);
-+      mv(tmp2, tmp3);
-+    }
-+    bind(TAIL_CHECK);
-+    xorr(tmp3, tmp1, tmp2);
-+    beqz(tmp3, DONE);
-+
-+    // Find the first different characters in the longwords and
-+    // compute their difference.
-+    bind(DIFFERENCE);
-+    ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb
-+    srl(tmp1, tmp1, result);
-+    srl(tmp2, tmp2, result);
-+    if (isLL) {
-+      andi(tmp1, tmp1, 0xFF);
-+      andi(tmp2, tmp2, 0xFF);
-+    } else {
-+      andi(tmp1, tmp1, 0xFFFF);
-+      andi(tmp2, tmp2, 0xFFFF);
-+    }
-+    sub(result, tmp1, tmp2);
-+    j(DONE);
-+  }
-+
-+  bind(STUB);
-+  RuntimeAddress stub = NULL;
-+  switch (ae) {
-+    case StrIntrinsicNode::LL:
-+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL());
-+      break;
-+    case StrIntrinsicNode::UU:
-+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU());
-+      break;
-+    case StrIntrinsicNode::LU:
-+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU());
-+      break;
-+    case StrIntrinsicNode::UL:
-+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL());
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
-+  assert(stub.target() != NULL, "compare_long_string stub has not been generated");
-+  trampoline_call(stub);
-+  j(DONE);
-+
-+  bind(SHORT_STRING);
-+  // Is the minimum length zero?
-+  beqz(cnt2, DONE);
-+  // arrange code to do most branches while loading and loading next characters
-+  // while comparing previous
-+  (this->*str1_load_chr)(tmp1, Address(str1), t0);
-+  addi(str1, str1, str1_chr_size);
-+  addi(cnt2, cnt2, -1);
-+  beqz(cnt2, SHORT_LAST_INIT);
-+  (this->*str2_load_chr)(cnt1, Address(str2), t0);
-+  addi(str2, str2, str2_chr_size);
-+  j(SHORT_LOOP_START);
-+  bind(SHORT_LOOP);
-+  addi(cnt2, cnt2, -1);
-+  beqz(cnt2, SHORT_LAST);
-+  bind(SHORT_LOOP_START);
-+  (this->*str1_load_chr)(tmp2, Address(str1), t0);
-+  addi(str1, str1, str1_chr_size);
-+  (this->*str2_load_chr)(t0, Address(str2), t0);
-+  addi(str2, str2, str2_chr_size);
-+  bne(tmp1, cnt1, SHORT_LOOP_TAIL);
-+  addi(cnt2, cnt2, -1);
-+  beqz(cnt2, SHORT_LAST2);
-+  (this->*str1_load_chr)(tmp1, Address(str1), t0);
-+  addi(str1, str1, str1_chr_size);
-+  (this->*str2_load_chr)(cnt1, Address(str2), t0);
-+  addi(str2, str2, str2_chr_size);
-+  beq(tmp2, t0, SHORT_LOOP);
-+  sub(result, tmp2, t0);
-+  j(DONE);
-+  bind(SHORT_LOOP_TAIL);
-+  sub(result, tmp1, cnt1);
-+  j(DONE);
-+  bind(SHORT_LAST2);
-+  beq(tmp2, t0, DONE);
-+  sub(result, tmp2, t0);
-+
-+  j(DONE);
-+  bind(SHORT_LAST_INIT);
-+  (this->*str2_load_chr)(cnt1, Address(str2), t0);
-+  addi(str2, str2, str2_chr_size);
-+  bind(SHORT_LAST);
-+  beq(tmp1, cnt1, DONE);
-+  sub(result, tmp1, cnt1);
-+
-+  bind(DONE);
-+
-+  BLOCK_COMMENT("} string_compare");
-+}
-+
-+void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
-+                                      Register tmp4, Register tmp5, Register tmp6, Register result,
-+                                      Register cnt1, int elem_size) {
-+  Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR;
-+  Register tmp1 = t0;
-+  Register tmp2 = t1;
-+  Register cnt2 = tmp2;  // cnt2 only used in array length compare
-+  Register elem_per_word = tmp6;
-+  int log_elem_size = exact_log2(elem_size);
-+  int length_offset = arrayOopDesc::length_offset_in_bytes();
-+  int base_offset   = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
-+
-+  assert(elem_size == 1 || elem_size == 2, "must be char or byte");
-+  assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6);
-+  li(elem_per_word, wordSize / elem_size);
-+
-+  BLOCK_COMMENT("arrays_equals {");
-+
-+  // if (a1 == a2), return true
-+  beq(a1, a2, SAME);
-+
-+  mv(result, false);
-+  beqz(a1, DONE);
-+  beqz(a2, DONE);
-+  lwu(cnt1, Address(a1, length_offset));
-+  lwu(cnt2, Address(a2, length_offset));
-+  bne(cnt2, cnt1, DONE);
-+  beqz(cnt1, SAME);
-+
-+  slli(tmp5, cnt1, 3 + log_elem_size);
-+  sub(tmp5, zr, tmp5);
-+  add(a1, a1, base_offset);
-+  add(a2, a2, base_offset);
-+  ld(tmp3, Address(a1, 0));
-+  ld(tmp4, Address(a2, 0));
-+  ble(cnt1, elem_per_word, SHORT); // short or same
-+
-+  // Main 16 byte comparison loop with 2 exits
-+  bind(NEXT_DWORD); {
-+    ld(tmp1, Address(a1, wordSize));
-+    ld(tmp2, Address(a2, wordSize));
-+    sub(cnt1, cnt1, 2 * wordSize / elem_size);
-+    blez(cnt1, TAIL);
-+    bne(tmp3, tmp4, DONE);
-+    ld(tmp3, Address(a1, 2 * wordSize));
-+    ld(tmp4, Address(a2, 2 * wordSize));
-+    add(a1, a1, 2 * wordSize);
-+    add(a2, a2, 2 * wordSize);
-+    ble(cnt1, elem_per_word, TAIL2);
-+  } beq(tmp1, tmp2, NEXT_DWORD);
-+  j(DONE);
-+
-+  bind(TAIL);
-+  xorr(tmp4, tmp3, tmp4);
-+  xorr(tmp2, tmp1, tmp2);
-+  sll(tmp2, tmp2, tmp5);
-+  orr(tmp5, tmp4, tmp2);
-+  j(IS_TMP5_ZR);
-+
-+  bind(TAIL2);
-+  bne(tmp1, tmp2, DONE);
-+
-+  bind(SHORT);
-+  xorr(tmp4, tmp3, tmp4);
-+  sll(tmp5, tmp4, tmp5);
-+
-+  bind(IS_TMP5_ZR);
-+  bnez(tmp5, DONE);
-+
-+  bind(SAME);
-+  mv(result, true);
-+  // That's it.
-+  bind(DONE);
-+
-+  BLOCK_COMMENT("} array_equals");
-+}
-+
-+// Compare Strings
-+
-+// For Strings we're passed the address of the first characters in a1
-+// and a2 and the length in cnt1.
-+// elem_size is the element size in bytes: either 1 or 2.
-+// There are two implementations.  For arrays >= 8 bytes, all
-+// comparisons (including the final one, which may overlap) are
-+// performed 8 bytes at a time.  For strings < 8 bytes, we compare a
-+// halfword, then a short, and then a byte.
-+
-+void MacroAssembler::string_equals(Register a1, Register a2,
-+                                      Register result, Register cnt1, int elem_size)
-+{
-+  Label SAME, DONE, SHORT, NEXT_WORD;
-+  Register tmp1 = t0;
-+  Register tmp2 = t1;
-+
-+  assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte");
-+  assert_different_registers(a1, a2, result, cnt1, t0, t1);
-+
-+  BLOCK_COMMENT("string_equals {");
-+
-+  mv(result, false);
-+
-+  // Check for short strings, i.e. smaller than wordSize.
-+  sub(cnt1, cnt1, wordSize);
-+  bltz(cnt1, SHORT);
-+
-+  // Main 8 byte comparison loop.
-+  bind(NEXT_WORD); {
-+    ld(tmp1, Address(a1, 0));
-+    add(a1, a1, wordSize);
-+    ld(tmp2, Address(a2, 0));
-+    add(a2, a2, wordSize);
-+    sub(cnt1, cnt1, wordSize);
-+    bne(tmp1, tmp2, DONE);
-+  } bgtz(cnt1, NEXT_WORD);
-+
-+  // Last longword.  In the case where length == 4 we compare the
-+  // same longword twice, but that's still faster than another
-+  // conditional branch.
-+  // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
-+  // length == 4.
-+  add(tmp1, a1, cnt1);
-+  ld(tmp1, Address(tmp1, 0));
-+  add(tmp2, a2, cnt1);
-+  ld(tmp2, Address(tmp2, 0));
-+  bne(tmp1, tmp2, DONE);
-+  j(SAME);
-+
-+  bind(SHORT);
-+  Label TAIL03, TAIL01;
-+
-+  // 0-7 bytes left.
-+  andi(t0, cnt1, 4);
-+  beqz(t0, TAIL03);
-+  {
-+    lwu(tmp1, Address(a1, 0));
-+    add(a1, a1, 4);
-+    lwu(tmp2, Address(a2, 0));
-+    add(a2, a2, 4);
-+    bne(tmp1, tmp2, DONE);
-+  }
-+
-+  bind(TAIL03);
-+  // 0-3 bytes left.
-+  andi(t0, cnt1, 2);
-+  beqz(t0, TAIL01);
-+  {
-+    lhu(tmp1, Address(a1, 0));
-+    add(a1, a1, 2);
-+    lhu(tmp2, Address(a2, 0));
-+    add(a2, a2, 2);
-+    bne(tmp1, tmp2, DONE);
-+  }
-+
-+  bind(TAIL01);
-+  if (elem_size == 1) { // Only needed when comparing 1-byte elements
-+    // 0-1 bytes left.
-+    andi(t0, cnt1, 1);
-+    beqz(t0, SAME);
-+    {
-+      lbu(tmp1, a1, 0);
-+      lbu(tmp2, a2, 0);
-+      bne(tmp1, tmp2, DONE);
-+    }
-+  }
-+
-+  // Arrays are equal.
-+  bind(SAME);
-+  mv(result, true);
-+
-+  // That's it.
-+  bind(DONE);
-+  BLOCK_COMMENT("} string_equals");
-+}
-+
-+typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far);
-+typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label,
-+                                                              bool is_far, bool is_unordered);
-+
-+static conditional_branch_insn conditional_branches[] =
-+{
-+  /* SHORT branches */
-+  (conditional_branch_insn)&Assembler::beq,
-+  (conditional_branch_insn)&Assembler::bgt,
-+  NULL, // BoolTest::overflow
-+  (conditional_branch_insn)&Assembler::blt,
-+  (conditional_branch_insn)&Assembler::bne,
-+  (conditional_branch_insn)&Assembler::ble,
-+  NULL, // BoolTest::no_overflow
-+  (conditional_branch_insn)&Assembler::bge,
-+
-+  /* UNSIGNED branches */
-+  (conditional_branch_insn)&Assembler::beq,
-+  (conditional_branch_insn)&Assembler::bgtu,
-+  NULL,
-+  (conditional_branch_insn)&Assembler::bltu,
-+  (conditional_branch_insn)&Assembler::bne,
-+  (conditional_branch_insn)&Assembler::bleu,
-+  NULL,
-+  (conditional_branch_insn)&Assembler::bgeu
-+};
-+
-+static float_conditional_branch_insn float_conditional_branches[] =
-+{
-+  /* FLOAT SHORT branches */
-+  (float_conditional_branch_insn)&MacroAssembler::float_beq,
-+  (float_conditional_branch_insn)&MacroAssembler::float_bgt,
-+  NULL,  // BoolTest::overflow
-+  (float_conditional_branch_insn)&MacroAssembler::float_blt,
-+  (float_conditional_branch_insn)&MacroAssembler::float_bne,
-+  (float_conditional_branch_insn)&MacroAssembler::float_ble,
-+  NULL, // BoolTest::no_overflow
-+  (float_conditional_branch_insn)&MacroAssembler::float_bge,
-+
-+  /* DOUBLE SHORT branches */
-+  (float_conditional_branch_insn)&MacroAssembler::double_beq,
-+  (float_conditional_branch_insn)&MacroAssembler::double_bgt,
-+  NULL,
-+  (float_conditional_branch_insn)&MacroAssembler::double_blt,
-+  (float_conditional_branch_insn)&MacroAssembler::double_bne,
-+  (float_conditional_branch_insn)&MacroAssembler::double_ble,
-+  NULL,
-+  (float_conditional_branch_insn)&MacroAssembler::double_bge
-+};
-+
-+void MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) {
-+  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])),
-+         "invalid conditional branch index");
-+  (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far);
-+}
-+
-+// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use
-+// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode().
-+void MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) {
-+  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])),
-+         "invalid float conditional branch index");
-+  int booltest_flag = cmpFlag & ~(MacroAssembler::double_branch_mask);
-+  (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far,
-+                                               (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true);
-+}
-+
-+void MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
-+  switch (cmpFlag) {
-+    case BoolTest::eq:
-+    case BoolTest::le:
-+      beqz(op1, L, is_far);
-+      break;
-+    case BoolTest::ne:
-+    case BoolTest::gt:
-+      bnez(op1, L, is_far);
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
-+}
-+
-+void MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
-+  switch (cmpFlag) {
-+    case BoolTest::eq:
-+      beqz(op1, L, is_far);
-+      break;
-+    case BoolTest::ne:
-+      bnez(op1, L, is_far);
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
-+}
-+
-+void MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) {
-+  Label L;
-+  cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L);
-+  mv(dst, src);
-+  bind(L);
-+}
-+
-+// Set dst to NaN if any NaN input.
-+void MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2,
-+                                  bool is_double, bool is_min) {
-+  assert_different_registers(dst, src1, src2);
-+
-+  Label Done;
-+  fsflags(zr);
-+  if (is_double) {
-+    is_min ? fmin_d(dst, src1, src2)
-+           : fmax_d(dst, src1, src2);
-+    // Checking NaNs
-+    flt_d(zr, src1, src2);
-+  } else {
-+    is_min ? fmin_s(dst, src1, src2)
-+           : fmax_s(dst, src1, src2);
-+    // Checking NaNs
-+    flt_s(zr, src1, src2);
-+  }
-+
-+  frflags(t0);
-+  beqz(t0, Done);
-+
-+  // In case of NaNs
-+  is_double ? fadd_d(dst, src1, src2)
-+            : fadd_s(dst, src1, src2);
-+
-+  bind(Done);
-+}
-+
-+#endif // COMPILER2
-+
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index c6b71bdbc3c..2ef28771e2e 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -851,6 +851,109 @@ class MacroAssembler: public Assembler {
-   void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire);
-   void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release);
- 
-+public:
-+  void string_compare(Register str1, Register str2,
-+                      Register cnt1, Register cnt2, Register result,
-+                      Register tmp1, Register tmp2, Register tmp3,
-+                      int ae);
-+
-+  void string_indexof_char_short(Register str1, Register cnt1,
-+                                 Register ch, Register result,
-+                                 bool isL);
-+
-+  void string_indexof_char(Register str1, Register cnt1,
-+                           Register ch, Register result,
-+                           Register tmp1, Register tmp2,
-+                           Register tmp3, Register tmp4,
-+                           bool isL);
-+
-+  void string_indexof(Register str1, Register str2,
-+                      Register cnt1, Register cnt2,
-+                      Register tmp1, Register tmp2,
-+                      Register tmp3, Register tmp4,
-+                      Register tmp5, Register tmp6,
-+                      Register result, int ae);
-+
-+  void string_indexof_linearscan(Register haystack, Register needle,
-+                                 Register haystack_len, Register needle_len,
-+                                 Register tmp1, Register tmp2,
-+                                 Register tmp3, Register tmp4,
-+                                 int needle_con_cnt, Register result, int ae);
-+
-+  void arrays_equals(Register r1, Register r2,
-+                     Register tmp3, Register tmp4,
-+                     Register tmp5, Register tmp6,
-+                     Register result, Register cnt1,
-+                     int elem_size);
-+
-+  void string_equals(Register r1, Register r2,
-+                     Register result, Register cnt1,
-+                     int elem_size);
-+
-+  // refer to conditional_branches and float_conditional_branches
-+  static const int bool_test_bits = 3;
-+  static const int neg_cond_bits = 2;
-+  static const int unsigned_branch_mask = 1 << bool_test_bits;
-+  static const int double_branch_mask = 1 << bool_test_bits;
-+
-+  // cmp
-+  void cmp_branch(int cmpFlag,
-+                  Register op1, Register op2,
-+                  Label& label, bool is_far = false);
-+
-+  void float_cmp_branch(int cmpFlag,
-+                        FloatRegister op1, FloatRegister op2,
-+                        Label& label, bool is_far = false);
-+
-+  void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op,
-+                                    Label& L, bool is_far = false);
-+
-+  void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op,
-+                               Label& L, bool is_far = false);
-+
-+  void enc_cmove(int cmpFlag,
-+                 Register op1, Register op2,
-+                 Register dst, Register src);
-+
-+  void spill(Register r, bool is64, int offset) {
-+    is64 ? sd(r, Address(sp, offset))
-+         : sw(r, Address(sp, offset));
-+  }
-+
-+  void spill(FloatRegister f, bool is64, int offset) {
-+    is64 ? fsd(f, Address(sp, offset))
-+         : fsw(f, Address(sp, offset));
-+  }
-+
-+  void spill(VectorRegister v, int offset) {
-+    add(t0, sp, offset);
-+    vs1r_v(v, t0);
-+  }
-+
-+  void unspill(Register r, bool is64, int offset) {
-+    is64 ? ld(r, Address(sp, offset))
-+         : lw(r, Address(sp, offset));
-+  }
-+
-+  void unspillu(Register r, bool is64, int offset) {
-+    is64 ? ld(r, Address(sp, offset))
-+         : lwu(r, Address(sp, offset));
-+  }
-+
-+  void unspill(FloatRegister f, bool is64, int offset) {
-+    is64 ? fld(f, Address(sp, offset))
-+         : flw(f, Address(sp, offset));
-+  }
-+
-+  void unspill(VectorRegister v, int offset) {
-+    add(t0, sp, offset);
-+    vl1r_v(v, t0);
-+  }
-+
-+  void minmax_FD(FloatRegister dst,
-+                 FloatRegister src1, FloatRegister src2,
-+                 bool is_double, bool is_min);
-+
- };
- 
- #ifdef ASSERT
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 13546ab328b..2e7eed8fb52 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -997,7 +997,7 @@ void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
- #endif
- 
- void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
--  C2_MacroAssembler _masm(&cbuf);
-+  MacroAssembler _masm(&cbuf);
-   Assembler::CompressibleRegion cr(&_masm);
-   __ ebreak();
- }
-@@ -1015,7 +1015,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
- #endif
- 
-   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Assembler::CompressibleRegion cr(&_masm); // nops shall be 2-byte under RVC for alignment purposes.
-     for (int i = 0; i < _count; i++) {
-       __ nop();
-@@ -1074,7 +1074,7 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
- void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-   assert_cond(ra_ != NULL);
-   Compile* C = ra_->C;
--  C2_MacroAssembler _masm(&cbuf);
-+  MacroAssembler _masm(&cbuf);
- 
-   // n.b. frame size includes space for return pc and fp
-   const int framesize = C->output()->frame_size_in_bytes();
-@@ -1150,7 +1150,7 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
- void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-   assert_cond(ra_ != NULL);
-   Compile* C = ra_->C;
--  C2_MacroAssembler _masm(&cbuf);
-+  MacroAssembler _masm(&cbuf);
-   assert_cond(C != NULL);
-   int framesize = C->output()->frame_size_in_bytes();
- 
-@@ -1251,7 +1251,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
-   int dst_offset = ra_->reg2offset(dst_lo);
- 
-   if (cbuf != NULL) {
--    C2_MacroAssembler _masm(cbuf);
-+    MacroAssembler _masm(cbuf);
-     Assembler::CompressibleRegion cr(&_masm);
-     switch (src_lo_rc) {
-       case rc_int:
-@@ -1371,7 +1371,7 @@ void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
- #endif
- 
- void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
--  C2_MacroAssembler _masm(&cbuf);
-+  MacroAssembler _masm(&cbuf);
- 
-   assert_cond(ra_ != NULL);
-   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
-@@ -1422,7 +1422,7 @@ void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
- void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
- {
-   // This is the unverified entry point.
--  C2_MacroAssembler _masm(&cbuf);
-+  MacroAssembler _masm(&cbuf);
- 
-   Label skip;
-   __ cmp_klass(j_rarg0, t1, t0, skip);
-@@ -1449,7 +1449,7 @@ int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
-   // j #exception_blob_entry_point
-   // Note that the code buffer's insts_mark is always relative to insts.
-   // That's why we must use the macroassembler to generate a handler.
--  C2_MacroAssembler _masm(&cbuf);
-+  MacroAssembler _masm(&cbuf);
-   address base = __ start_a_stub(size_exception_handler());
-   if (base == NULL) {
-     ciEnv::current()->record_failure("CodeCache is full");
-@@ -1467,7 +1467,7 @@ int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
- {
-   // Note that the code buffer's insts_mark is always relative to insts.
-   // That's why we must use the macroassembler to generate a handler.
--  C2_MacroAssembler _masm(&cbuf);
-+  MacroAssembler _masm(&cbuf);
-   address base = __ start_a_stub(size_deopt_handler());
-   if (base == NULL) {
-     ciEnv::current()->record_failure("CodeCache is full");
-@@ -1848,7 +1848,7 @@ encode %{
-   // BEGIN Non-volatile memory access
- 
-   enc_class riscv_enc_li_imm(iRegIorL dst, immIorL src) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Assembler::CompressibleRegion cr(&_masm);
-     int64_t con = (int64_t)$src$$constant;
-     Register dst_reg = as_Register($dst$$reg);
-@@ -1856,7 +1856,7 @@ encode %{
-   %}
- 
-   enc_class riscv_enc_mov_p(iRegP dst, immP src) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Register dst_reg = as_Register($dst$$reg);
-     address con = (address)$src$$constant;
-     if (con == NULL || con == (address)1) {
-@@ -1875,7 +1875,7 @@ encode %{
-   %}
- 
-   enc_class riscv_enc_mov_p1(iRegP dst) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Assembler::CompressibleRegion cr(&_masm);
-     Register dst_reg = as_Register($dst$$reg);
-     __ li(dst_reg, 1);
-@@ -1893,12 +1893,12 @@ encode %{
-   %}
- 
-   enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     __ load_byte_map_base($dst$$Register);
-   %}
- 
-   enc_class riscv_enc_mov_n(iRegN dst, immN src) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Register dst_reg = as_Register($dst$$reg);
-     address con = (address)$src$$constant;
-     if (con == NULL) {
-@@ -1911,13 +1911,13 @@ encode %{
-   %}
- 
-   enc_class riscv_enc_mov_zero(iRegNorP dst) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Register dst_reg = as_Register($dst$$reg);
-     __ mv(dst_reg, zr);
-   %}
- 
-   enc_class riscv_enc_mov_nk(iRegN dst, immNKlass src) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Register dst_reg = as_Register($dst$$reg);
-     address con = (address)$src$$constant;
-     if (con == NULL) {
-@@ -1930,42 +1930,42 @@ encode %{
-   %}
- 
-   enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
-                /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
-                /*result as bool*/ true);
-   %}
- 
-   enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
-                /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
-                /*result as bool*/ true);
-   %}
- 
-   enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-                /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
-                /*result as bool*/ true);
-   %}
- 
-   enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
-                /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
-                /*result as bool*/ true);
-   %}
- 
-   enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
-                /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
-                /*result as bool*/ true);
-   %}
- 
-   enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-                /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
-                /*result as bool*/ true);
-@@ -1974,13 +1974,13 @@ encode %{
-   // compare and branch instruction encodings
- 
-   enc_class riscv_enc_j(label lbl) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Label* L = $lbl$$label;
-     __ j(*L);
-   %}
- 
-   enc_class riscv_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Label* L = $lbl$$label;
-     switch ($cmp$$cmpcode) {
-       case(BoolTest::ge):
-@@ -2004,7 +2004,7 @@ encode %{
- 
-     Label miss;
-     Label done;
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
-                                      NULL, &miss);
-     if ($primary) {
-@@ -2023,7 +2023,7 @@ encode %{
-   %}
- 
-   enc_class riscv_enc_java_static_call(method meth) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
- 
-     address addr = (address)$meth$$method;
-     address call = NULL;
-@@ -2055,7 +2055,7 @@ encode %{
-   %}
- 
-   enc_class riscv_enc_java_dynamic_call(method meth) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     int method_index = resolved_method_index(cbuf);
-     address call = __ ic_call((address)$meth$$method, method_index);
-     if (call == NULL) {
-@@ -2065,7 +2065,7 @@ encode %{
-   %}
- 
-   enc_class riscv_enc_call_epilog() %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     if (VerifyStackAtCalls) {
-       // Check that stack depth is unchanged: find majik cookie on stack
-       __ call_Unimplemented();
-@@ -2073,7 +2073,7 @@ encode %{
-   %}
- 
-   enc_class riscv_enc_java_to_runtime(method meth) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
- 
-     // some calls to generated routines (arraycopy code) are scheduled
-     // by C2 as runtime calls. if so we can call them using a jr (they
-@@ -2102,7 +2102,7 @@ encode %{
- 
-   // using the cr register as the bool result: 0 for success; others failed.
-   enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Register flag = t1;
-     Register oop = as_Register($object$$reg);
-     Register box = as_Register($box$$reg);
-@@ -2189,7 +2189,7 @@ encode %{
- 
-   // using cr flag to indicate the fast_unlock result: 0 for success; others failed.
-   enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Register flag = t1;
-     Register oop = as_Register($object$$reg);
-     Register box = as_Register($box$$reg);
-@@ -2262,7 +2262,7 @@ encode %{
-   // arithmetic encodings
- 
-   enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Register dst_reg = as_Register($dst$$reg);
-     Register src1_reg = as_Register($src1$$reg);
-     Register src2_reg = as_Register($src2$$reg);
-@@ -2270,7 +2270,7 @@ encode %{
-   %}
- 
-   enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Register dst_reg = as_Register($dst$$reg);
-     Register src1_reg = as_Register($src1$$reg);
-     Register src2_reg = as_Register($src2$$reg);
-@@ -2278,7 +2278,7 @@ encode %{
-   %}
- 
-   enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Register dst_reg = as_Register($dst$$reg);
-     Register src1_reg = as_Register($src1$$reg);
-     Register src2_reg = as_Register($src2$$reg);
-@@ -2286,7 +2286,7 @@ encode %{
-   %}
- 
-   enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Register dst_reg = as_Register($dst$$reg);
-     Register src1_reg = as_Register($src1$$reg);
-     Register src2_reg = as_Register($src2$$reg);
-@@ -2294,14 +2294,14 @@ encode %{
-   %}
- 
-   enc_class riscv_enc_tail_call(iRegP jump_target) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Assembler::CompressibleRegion cr(&_masm);
-     Register target_reg = as_Register($jump_target$$reg);
-     __ jr(target_reg);
-   %}
- 
-   enc_class riscv_enc_tail_jmp(iRegP jump_target) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Assembler::CompressibleRegion cr(&_masm);
-     Register target_reg = as_Register($jump_target$$reg);
-     // exception oop should be in x10
-@@ -2312,12 +2312,12 @@ encode %{
-   %}
- 
-   enc_class riscv_enc_rethrow() %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
-   %}
- 
-   enc_class riscv_enc_ret() %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Assembler::CompressibleRegion cr(&_masm);
-     __ ret();
-   %}
-@@ -8506,7 +8506,7 @@ instruct cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl)
-   format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpU_branch" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                   as_Register($op2$$reg), *($lbl$$label));
-   %}
- 
-@@ -8526,7 +8526,7 @@ instruct cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl)
-   format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpU_loop" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                   as_Register($op2$$reg), *($lbl$$label));
-   %}
- 
-@@ -8585,7 +8585,7 @@ instruct cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl)
-   format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpUL_branch" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                   as_Register($op2$$reg), *($lbl$$label));
-   %}
- 
-@@ -8604,7 +8604,7 @@ instruct cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl)
-   format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpUL_loop" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                   as_Register($op2$$reg), *($lbl$$label));
-   %}
- 
-@@ -8625,7 +8625,7 @@ instruct cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
-   format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpP_branch" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                   as_Register($op2$$reg), *($lbl$$label));
-   %}
- 
-@@ -8645,7 +8645,7 @@ instruct cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
-   format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpP_loop" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                   as_Register($op2$$reg), *($lbl$$label));
-   %}
- 
-@@ -8666,7 +8666,7 @@ instruct cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
-   format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpN_branch" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                   as_Register($op2$$reg), *($lbl$$label));
-   %}
- 
-@@ -8686,7 +8686,7 @@ instruct cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
-   format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpN_loop" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                   as_Register($op2$$reg), *($lbl$$label));
-   %}
- 
-@@ -8741,7 +8741,7 @@ instruct cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
-   format %{ "double_b$cmp $op1, $op2\t#@cmpD_branch"%}
- 
-   ins_encode %{
--    __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-+    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-                         as_FloatRegister($op2$$reg), *($lbl$$label));
-   %}
- 
-@@ -8759,7 +8759,7 @@ instruct cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
-   format %{ "double_b$cmp $op1, $op2\t#@cmpD_loop"%}
- 
-   ins_encode %{
--    __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-+    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-                         as_FloatRegister($op2$$reg), *($lbl$$label));
-   %}
- 
-@@ -9080,7 +9080,7 @@ instruct far_cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{
-   format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_branch" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                        as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
-   %}
- 
-@@ -9095,7 +9095,7 @@ instruct far_cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{
-   format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_loop" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                        as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
-   %}
- 
-@@ -9138,7 +9138,7 @@ instruct far_cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{
-   format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpUL_branch" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                        as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
-   %}
- 
-@@ -9153,7 +9153,7 @@ instruct far_cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{
-   format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpUL_loop" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                        as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
-   %}
- 
-@@ -9171,7 +9171,7 @@ instruct far_cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
-   format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpP_branch" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                        as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
-   %}
- 
-@@ -9189,7 +9189,7 @@ instruct far_cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
-   format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpP_loop" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                        as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
-   %}
- 
-@@ -9207,7 +9207,7 @@ instruct far_cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
-   format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpN_branch" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                        as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
-   %}
- 
-@@ -9225,7 +9225,7 @@ instruct far_cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
-   format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpN_loop" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                   as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
-   %}
- 
-@@ -9276,7 +9276,7 @@ instruct far_cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
-   format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_branch"%}
- 
-   ins_encode %{
--    __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-+    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-                         as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true);
-   %}
- 
-@@ -9292,7 +9292,7 @@ instruct far_cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
-   format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_loop"%}
- 
-   ins_encode %{
--    __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-+    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-                         as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true);
-   %}
- 
-@@ -9616,7 +9616,7 @@ instruct cmovI_cmpU(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOpU cop)
-          %}
- 
-   ins_encode %{
--    __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
-+    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
-                  as_Register($op1$$reg), as_Register($op2$$reg),
-                  as_Register($dst$$reg), as_Register($src$$reg));
-   %}
-@@ -9673,7 +9673,7 @@ instruct cmovL_cmpUL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOpU cop)
-          %}
- 
-   ins_encode %{
--    __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
-+    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
-                  as_Register($op1$$reg), as_Register($op2$$reg),
-                  as_Register($dst$$reg), as_Register($src$$reg));
-   %}
-@@ -9691,7 +9691,7 @@ instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop)
-          %}
- 
-   ins_encode %{
--    __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
-+    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
-                  as_Register($op1$$reg), as_Register($op2$$reg),
-                  as_Register($dst$$reg), as_Register($src$$reg));
-   %}
-
-From 115cd21290080b157d0ca8b7080e66ebd814fbdb Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 16:15:18 +0800
-Subject: [PATCH 091/140] Revert JDK-8222297: IRT_ENTRY/IRT_LEAF etc are the
- same as JRT && JDK-8263709: Cleanup THREAD/TRAPS/CHECK usage in JRT_ENTRY
- routines
-
----
- src/hotspot/cpu/riscv/interpreterRT_riscv.cpp | 8 ++++----
- 1 file changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
-index d93530d8564..776b0787238 100644
---- a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
-@@ -278,12 +278,12 @@ class SlowSignatureHandler
- };
- 
- 
--JRT_ENTRY(address,
--          InterpreterRuntime::slow_signature_handler(JavaThread* current,
-+IRT_ENTRY(address,
-+          InterpreterRuntime::slow_signature_handler(JavaThread* thread,
-                                                      Method* method,
-                                                      intptr_t* from,
-                                                      intptr_t* to))
--  methodHandle m(current, (Method*)method);
-+  methodHandle m(thread, (Method*)method);
-   assert(m->is_native(), "sanity check");
- 
-   // handle arguments
-@@ -292,4 +292,4 @@ JRT_ENTRY(address,
- 
-   // return result handler
-   return Interpreter::result_handler(m->result_type());
--JRT_END
-+IRT_END
-
-From 6cbf43d5f095aef93ef0bf595f51019a03cc1989 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 16:20:06 +0800
-Subject: [PATCH 092/140] Revert JDK-8245289: Clean up offset code in
- JavaClasses
-
----
- src/hotspot/cpu/riscv/methodHandles_riscv.cpp  | 18 +++++++++---------
- .../templateInterpreterGenerator_riscv.cpp     |  2 +-
- 2 files changed, 10 insertions(+), 10 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-index 4442b5991b1..e070b8096a6 100644
---- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-@@ -53,7 +53,7 @@ void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_
-     verify_klass(_masm, klass_reg, VM_CLASS_ID(java_lang_Class),
-                  "MH argument is a Class");
-   }
--  __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset()));
-+  __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
- }
- 
- #ifdef ASSERT
-@@ -140,13 +140,13 @@ void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
- 
-   // Load the invoker, as MH -> MH.form -> LF.vmentry
-   __ verify_oop(recv);
--  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset())), temp2);
-+  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2);
-   __ verify_oop(method_temp);
--  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset())), temp2);
-+  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2);
-   __ verify_oop(method_temp);
--  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset())), temp2);
-+  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())), temp2);
-   __ verify_oop(method_temp);
--  __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())), noreg, noreg);
-+  __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), noreg, noreg);
- 
-   if (VerifyMethodHandles && !for_compiler_entry) {
-     // make sure recv is already on stack
-@@ -284,10 +284,10 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
-                    "MemberName required for invokeVirtual etc.");
-     }
- 
--    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset()));
--    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset()));
--    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset()));
--    Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset()));
-+    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()));
-+    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()));
-+    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes()));
-+    Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes()));
- 
-     Register temp1_recv_klass = temp1;
-     if (iid != vmIntrinsics::_linkToStatic) {
-diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-index 8aea4eca048..ce6166030b4 100644
---- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-@@ -894,7 +894,7 @@ address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
- 
-   address entry = __ pc();
- 
--  const int referent_offset = java_lang_ref_Reference::referent_offset();
-+  const int referent_offset = java_lang_ref_Reference::referent_offset;
-   guarantee(referent_offset > 0, "referent offset not initialized");
- 
-   Label slow_path;
-
-From 8c9b9f4246f4ede3c31f59749f9d4bc625f106b3 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 16:30:35 +0800
-Subject: [PATCH 093/140] Revert JDK-8242629: Remove references to deprecated
- java.util.Observer and Observable
-
----
- .../runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java   | 2 --
- .../classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java   | 2 --
- .../sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java | 2 --
- 3 files changed, 6 deletions(-)
-
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
-index f2e224f28ee..5c2b6e0e3ea 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
-@@ -34,8 +34,6 @@
- import sun.jvm.hotspot.runtime.riscv64.*;
- import sun.jvm.hotspot.types.*;
- import sun.jvm.hotspot.utilities.*;
--import sun.jvm.hotspot.utilities.Observable;
--import sun.jvm.hotspot.utilities.Observer;
- 
- public class LinuxRISCV64JavaThreadPDAccess implements JavaThreadPDAccess {
-   private static AddressField  lastJavaFPField;
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
-index df280005d72..e372bc5f7be 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
-@@ -34,8 +34,6 @@
- import sun.jvm.hotspot.runtime.*;
- import sun.jvm.hotspot.types.*;
- import sun.jvm.hotspot.utilities.*;
--import sun.jvm.hotspot.utilities.Observable;
--import sun.jvm.hotspot.utilities.Observer;
- 
- /** Specialization of and implementation of abstract methods of the
-     Frame class for the riscv64 family of CPUs. */
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
-index d0ad2b559a6..850758a7ed4 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
-@@ -31,8 +31,6 @@
- import sun.jvm.hotspot.types.*;
- import sun.jvm.hotspot.runtime.*;
- import sun.jvm.hotspot.utilities.*;
--import sun.jvm.hotspot.utilities.Observable;
--import sun.jvm.hotspot.utilities.Observer;
- 
- public class RISCV64JavaCallWrapper extends JavaCallWrapper {
-   private static AddressField lastJavaFPField;
-
-From 43f2a4fec6b4922fa8c187deda310ad636aeed2e Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 16:33:56 +0800
-Subject: [PATCH 094/140] Revert JDK-8256155: Allow multiple large page sizes
- to be used on Linux
-
----
- src/hotspot/os/linux/os_linux.cpp | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
-index 6f75e623a9a..7fc9588301b 100644
---- a/src/hotspot/os/linux/os_linux.cpp
-+++ b/src/hotspot/os/linux/os_linux.cpp
-@@ -4078,7 +4078,8 @@ size_t os::Linux::find_large_page_size() {
-     IA64_ONLY(256 * M)
-     PPC_ONLY(4 * M)
-     S390_ONLY(1 * M)
--    SPARC_ONLY(4 * M);
-+    SPARC_ONLY(4 * M)
-+    RISCV64_ONLY(2 * M);
- #endif // ZERO
- 
-   FILE *fp = fopen("/proc/meminfo", "r");
-
-From a93191be0155882a0f4d92bba4de9fdf4f508a4a Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 16:38:53 +0800
-Subject: [PATCH 095/140] Revert JDK-8252204: AArch64: Implement SHA3
- accelerator/intrinsic
-
----
- src/hotspot/cpu/riscv/vm_version_riscv.cpp | 5 -----
- 1 file changed, 5 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-index d4b79162d84..50ee7edb708 100644
---- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-@@ -82,11 +82,6 @@ void VM_Version::initialize() {
-     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
-   }
- 
--  if (UseSHA3Intrinsics) {
--    warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
--    FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
--  }
--
-   if (UseCRC32Intrinsics) {
-     warning("CRC32 intrinsics are not available on this CPU.");
-     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
-
-From 29acd4f1bb99e856418f7d9d3da4f205812b1663 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 16:43:23 +0800
-Subject: [PATCH 096/140] Revert JDK-8253717: Relocate stack overflow code out
- of thread.hpp/cpp && JDK-8255766: Fix linux+arm64 build after 8254072
-
----
- src/hotspot/cpu/riscv/macroAssembler_riscv.cpp               | 2 +-
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp                | 4 ++--
- src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp | 2 +-
- 3 files changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index d175a62aeeb..d94074b4a3c 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -1666,7 +1666,7 @@ void MacroAssembler::bang_stack_size(Register size, Register tmp) {
-   // was post-decremented.)  Skip this address by starting at i=1, and
-   // touch a few more pages below.  N.B.  It is important to touch all
-   // the way down to and including i=StackShadowPages.
--  for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) {
-+  for (int i = 0; i < (int)(JavaThread::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) {
-     // this could be any sized move but this is can be a debugging crumb
-     // so the bigger the better.
-     sub(tmp, tmp, os::vm_page_size());
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-index ae414224c5b..dc3ac548d73 100644
---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -1252,7 +1252,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
-   __ nop();
- 
-   // Generate stack overflow check
--  __ bang_stack_with_offset(checked_cast<int>(StackOverflow::stack_shadow_zone_size()));
-+  __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size());
- 
-   // Generate a new frame for the wrapper.
-   __ enter();
-@@ -1551,7 +1551,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
-   Label reguard;
-   Label reguard_done;
-   __ lbu(t0, Address(xthread, JavaThread::stack_guard_state_offset()));
--  __ mv(t1, StackOverflow::stack_guard_yellow_reserved_disabled);
-+  __ mv(t1, JavaThread::stack_guard_yellow_reserved_disabled);
-   __ beq(t0, t1, reguard);
-   __ bind(reguard_done);
- 
-diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-index ce6166030b4..e639fa7e12f 100644
---- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-@@ -1248,7 +1248,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
-   {
-     Label no_reguard;
-     __ lwu(t0, Address(xthread, in_bytes(JavaThread::stack_guard_state_offset())));
--    __ addi(t1, zr, (u1)StackOverflow::stack_guard_yellow_reserved_disabled);
-+    __ addi(t1, zr, (u1)JavaThread::stack_guard_yellow_reserved_disabled);
-     __ bne(t0, t1, no_reguard);
- 
-     __ pusha(); // only save smashed registers
-
-From 6fa17c662dd2488108809e77dcff921bb475813c Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 16:50:51 +0800
-Subject: [PATCH 097/140] Revert JDK-8258459: Decouple gc_globals.hpp from
- globals.hpp
-
----
- src/hotspot/cpu/riscv/templateTable_riscv.cpp | 1 -
- 1 file changed, 1 deletion(-)
-
-diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-index 1f4409a9c9a..84b1afc7dc6 100644
---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-@@ -28,7 +28,6 @@
- #include "asm/macroAssembler.inline.hpp"
- #include "gc/shared/barrierSetAssembler.hpp"
- #include "gc/shared/collectedHeap.hpp"
--#include "gc/shared/tlab_globals.hpp"
- #include "interpreter/interp_masm.hpp"
- #include "interpreter/interpreter.hpp"
- #include "interpreter/interpreterRuntime.hpp"
-
-From bcc26e749ccc20db5a4ba51c2cf8740a908a8a74 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 16:56:58 +0800
-Subject: [PATCH 098/140] Revert JDK-8223136: Move compressed oops functions to
- CompressedOops class
-
----
- .../cpu/riscv/macroAssembler_riscv.cpp        | 64 +++++++++----------
- .../cpu/riscv/macroAssembler_riscv.hpp        |  1 -
- src/hotspot/cpu/riscv/riscv.ad                | 10 +--
- 3 files changed, 37 insertions(+), 38 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index d94074b4a3c..becc1656358 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -1318,10 +1318,10 @@ int MacroAssembler::patch_oop(address insn_addr, address o) {
- void MacroAssembler::reinit_heapbase() {
-   if (UseCompressedOops) {
-     if (Universe::is_fully_initialized()) {
--      mv(xheapbase, CompressedOops::ptrs_base());
-+      mv(xheapbase, Universe::narrow_ptrs_base());
-     } else {
-       int32_t offset = 0;
--      la_patchable(xheapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()), offset);
-+      la_patchable(xheapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()), offset);
-       ld(xheapbase, Address(xheapbase, offset));
-     }
-   }
-@@ -1596,8 +1596,8 @@ void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, R
- void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L) {
-   if (UseCompressedClassPointers) {
-       lwu(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
--    if (CompressedKlassPointers::base() == NULL) {
--      slli(tmp, tmp, CompressedKlassPointers::shift());
-+    if (Universe::narrow_klass_base() == NULL) {
-+      slli(tmp, tmp, Universe::narrow_klass_shift());
-       beq(trial_klass, tmp, L);
-       return;
-     }
-@@ -1745,9 +1745,9 @@ void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
- // Algorithm must match CompressedOops::encode.
- void MacroAssembler::encode_heap_oop(Register d, Register s) {
-   verify_oop(s, "broken oop in encode_heap_oop");
--  if (CompressedOops::base() == NULL) {
--    if (CompressedOops::shift() != 0) {
--      assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
-+  if (Universe::narrow_oop_base() == NULL) {
-+    if (Universe::narrow_oop_shift() != 0) {
-+      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-       srli(d, s, LogMinObjAlignmentInBytes);
-     } else {
-       mv(d, s);
-@@ -1758,9 +1758,9 @@ void MacroAssembler::encode_heap_oop(Register d, Register s) {
-     bgez(d, notNull);
-     mv(d, zr);
-     bind(notNull);
--    if (CompressedOops::shift() != 0) {
--      assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
--      srli(d, d, CompressedOops::shift());
-+    if (Universe::narrow_oop_shift() != 0) {
-+      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-+      srli(d, d, Universe::narrow_oop_shift());
-     }
-   }
- }
-@@ -1799,9 +1799,9 @@ void  MacroAssembler::decode_klass_not_null(Register r) {
- void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) {
-   assert(UseCompressedClassPointers, "should only be used for compressed headers");
- 
--  if (CompressedKlassPointers::base() == NULL) {
--    if (CompressedKlassPointers::shift() != 0) {
--      assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
-+  if (Universe::narrow_klass_base() == NULL) {
-+    if (Universe::narrow_klass_shift() != 0) {
-+      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-       slli(dst, src, LogKlassAlignmentInBytes);
-     } else {
-       mv(dst, src);
-@@ -1815,10 +1815,10 @@ void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register
-   }
- 
-   assert_different_registers(src, xbase);
--  li(xbase, (uintptr_t)CompressedKlassPointers::base());
-+  li(xbase, (uintptr_t)Universe::narrow_klass_base());
- 
--  if (CompressedKlassPointers::shift() != 0) {
--    assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
-+  if (Universe::narrow_klass_shift() != 0) {
-+    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-     assert_different_registers(t0, xbase);
-     shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes);
-   } else {
-@@ -1835,9 +1835,9 @@ void MacroAssembler::encode_klass_not_null(Register r) {
- void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) {
-   assert(UseCompressedClassPointers, "should only be used for compressed headers");
- 
--  if (CompressedKlassPointers::base() == NULL) {
--    if (CompressedKlassPointers::shift() != 0) {
--      assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
-+  if (Universe::narrow_klass_base() == NULL) {
-+    if (Universe::narrow_klass_shift() != 0) {
-+      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-       srli(dst, src, LogKlassAlignmentInBytes);
-     } else {
-       mv(dst, src);
-@@ -1845,8 +1845,8 @@ void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register
-     return;
-   }
- 
--  if (((uint64_t)(uintptr_t)CompressedKlassPointers::base() & 0xffffffff) == 0 &&
--      CompressedKlassPointers::shift() == 0) {
-+  if (((uint64_t)(uintptr_t)Universe::narrow_klass_base() & 0xffffffff) == 0 &&
-+      Universe::narrow_klass_shift() == 0) {
-     zero_extend(dst, src, 32);
-     return;
-   }
-@@ -1857,10 +1857,10 @@ void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register
-   }
- 
-   assert_different_registers(src, xbase);
--  li(xbase, (intptr_t)CompressedKlassPointers::base());
-+  li(xbase, (intptr_t)Universe::narrow_klass_base());
-   sub(dst, src, xbase);
--  if (CompressedKlassPointers::shift() != 0) {
--    assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
-+  if (Universe::narrow_klass_shift() != 0) {
-+    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-     srli(dst, dst, LogKlassAlignmentInBytes);
-   }
-   if (xbase == xheapbase) {
-@@ -1878,22 +1878,22 @@ void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
-   // Cannot assert, unverified entry point counts instructions (see .ad file)
-   // vtableStubs also counts instructions in pd_code_size_limit.
-   // Also do not verify_oop as this is called by verify_oop.
--  if (CompressedOops::shift() != 0) {
--    assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
-+  if (Universe::narrow_oop_shift() != 0) {
-+    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-     slli(dst, src, LogMinObjAlignmentInBytes);
--    if (CompressedOops::base() != NULL) {
-+    if (Universe::narrow_oop_base() != NULL) {
-       add(dst, xheapbase, dst);
-     }
-   } else {
--    assert(CompressedOops::base() == NULL, "sanity");
-+    assert(Universe::narrow_oop_base() == NULL, "sanity");
-     mv(dst, src);
-   }
- }
- 
- void  MacroAssembler::decode_heap_oop(Register d, Register s) {
--  if (CompressedOops::base() == NULL) {
--    if (CompressedOops::shift() != 0 || d != s) {
--      slli(d, s, CompressedOops::shift());
-+  if (Universe::narrow_oop_base() == NULL) {
-+    if (Universe::narrow_oop_shift() != 0 || d != s) {
-+      slli(d, s, Universe::narrow_oop_shift());
-     }
-   } else {
-     Label done;
-@@ -3004,7 +3004,7 @@ void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
-   InstructionMark im(this);
-   RelocationHolder rspec = metadata_Relocation::spec(index);
-   code_section()->relocate(inst_mark(), rspec);
--  narrowKlass nk = CompressedKlassPointers::encode(k);
-+  narrowKlass nk = Klass::encode_klass(k);
-   li32(dst, nk);
-   zero_extend(dst, dst, 32);
- }
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index 2ef28771e2e..953bca3cbd8 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -29,7 +29,6 @@
- 
- #include "asm/assembler.hpp"
- #include "metaprogramming/enableIf.hpp"
--#include "oops/compressedOops.hpp"
- 
- // MacroAssembler extends Assembler by frequently used macros.
- //
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 2e7eed8fb52..24214964243 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1407,7 +1407,7 @@ void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
-   st->print_cr("# MachUEPNode");
-   if (UseCompressedClassPointers) {
-     st->print_cr("\tlwu t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass");
--    if (CompressedKlassPointers::shift() != 0) {
-+    if (Universe::narrow_klass_shift() != 0) {
-       st->print_cr("\tdecode_klass_not_null t0, t0");
-     }
-   } else {
-@@ -3255,7 +3255,7 @@ operand indOffL(iRegP reg, immLOffset off)
- 
- operand indirectN(iRegN reg)
- %{
--  predicate(CompressedOops::shift() == 0);
-+  predicate(Universe::narrow_oop_shift() == 0);
-   constraint(ALLOC_IN_RC(ptr_reg));
-   match(DecodeN reg);
-   op_cost(0);
-@@ -3270,7 +3270,7 @@ operand indirectN(iRegN reg)
- 
- operand indOffIN(iRegN reg, immIOffset off)
- %{
--  predicate(CompressedOops::shift() == 0);
-+  predicate(Universe::narrow_oop_shift() == 0);
-   constraint(ALLOC_IN_RC(ptr_reg));
-   match(AddP (DecodeN reg) off);
-   op_cost(0);
-@@ -3285,7 +3285,7 @@ operand indOffIN(iRegN reg, immIOffset off)
- 
- operand indOffLN(iRegN reg, immLOffset off)
- %{
--  predicate(CompressedOops::shift() == 0);
-+  predicate(Universe::narrow_oop_shift() == 0);
-   constraint(ALLOC_IN_RC(ptr_reg));
-   match(AddP (DecodeN reg) off);
-   op_cost(0);
-@@ -7947,7 +7947,7 @@ instruct convP2I(iRegINoSp dst, iRegP src) %{
- // in case of 32bit oops (heap < 4Gb).
- instruct convN2I(iRegINoSp dst, iRegN src)
- %{
--  predicate(CompressedOops::shift() == 0);
-+  predicate(Universe::narrow_oop_shift() == 0);
-   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
- 
-   ins_cost(ALU_COST);
-
-From 81d8ea9077484f1dd20033390cbd3c1844b1b966 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 17:11:20 +0800
-Subject: [PATCH 099/140] Revert JDK-8247912: Make narrowOop a scoped enum
-
----
- src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index becc1656358..e2841c28c37 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -1305,7 +1305,7 @@ int MacroAssembler::patch_oop(address insn_addr, address o) {
-   // instruction.
-   if (NativeInstruction::is_li32_at(insn_addr)) {
-     // Move narrow OOP
--    uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o));
-+    narrowOop n = CompressedOops::encode((oop)o);
-     return patch_imm_in_li32(insn_addr, (int32_t)n);
-   } else if (NativeInstruction::is_movptr_at(insn_addr)) {
-     // Move wide OOP
-
-From f980e03cb17804ff72958dd13505058048c04da8 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 17:20:05 +0800
-Subject: [PATCH 100/140] Revert JDK-8260467: Move well-known classes from
- systemDictionary.hpp to vmClasses.hpp
-
----
- src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 11 +++++------
- src/hotspot/cpu/riscv/methodHandles_riscv.hpp |  4 ++--
- 2 files changed, 7 insertions(+), 8 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-index e070b8096a6..fd907f77afb 100644
---- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-@@ -27,7 +27,6 @@
- #include "precompiled.hpp"
- #include "asm/macroAssembler.hpp"
- #include "classfile/javaClasses.inline.hpp"
--#include "classfile/vmClasses.hpp"
- #include "interpreter/interpreter.hpp"
- #include "interpreter/interpreterRuntime.hpp"
- #include "memory/allocation.inline.hpp"
-@@ -50,7 +49,7 @@
- void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
-   assert_cond(_masm != NULL);
-   if (VerifyMethodHandles) {
--    verify_klass(_masm, klass_reg, VM_CLASS_ID(java_lang_Class),
-+    verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class),
-                  "MH argument is a Class");
-   }
-   __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
-@@ -68,11 +67,11 @@ static int check_nonzero(const char* xname, int x) {
- 
- #ifdef ASSERT
- void MethodHandles::verify_klass(MacroAssembler* _masm,
--                                 Register obj, vmClassID klass_id,
-+                                 Register obj, SystemDictionary::WKID klass_id,
-                                  const char* error_message) {
-   assert_cond(_masm != NULL);
--  InstanceKlass** klass_addr = vmClasses::klass_addr_at(klass_id);
--  Klass* klass = vmClasses::klass_at(klass_id);
-+  InstanceKlass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id);
-+  Klass* klass = SystemDictionary::well_known_klass(klass_id);
-   Register temp = t1;
-   Register temp2 = t0; // used by MacroAssembler::cmpptr
-   Label L_ok, L_bad;
-@@ -280,7 +279,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
-     // The method is a member invoker used by direct method handles.
-     if (VerifyMethodHandles) {
-       // make sure the trailing argument really is a MemberName (caller responsibility)
--      verify_klass(_masm, member_reg, VM_CLASS_ID(java_lang_invoke_MemberName),
-+      verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName),
-                    "MemberName required for invokeVirtual etc.");
-     }
- 
-diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
-index f73aba29d67..65493eba764 100644
---- a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
-@@ -36,11 +36,11 @@ enum /* platform_dependent_constants */ {
-   static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg);
- 
-   static void verify_klass(MacroAssembler* _masm,
--                           Register obj, vmClassID klass_id,
-+                           Register obj, SystemDictionary::WKID klass_id,
-                            const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
- 
-   static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) {
--    verify_klass(_masm, mh_reg, VM_CLASS_ID(java_lang_invoke_MethodHandle),
-+    verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle),
-                  "reference is a MH");
-   }
- 
-
-From 2c68b064100b5abaca80926e213280ea82ff161a Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 17:32:15 +0800
-Subject: [PATCH 101/140] Revert JDK-8268858: Determine register pressure
- automatically by the number of available registers for allocation
-
----
- src/hotspot/cpu/riscv/c2_globals_riscv.hpp |  2 ++
- src/hotspot/cpu/riscv/riscv.ad             | 27 ----------------------
- 2 files changed, 2 insertions(+), 27 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-index 6c301cdae04..33d78fb2f6f 100644
---- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-@@ -44,8 +44,10 @@ define_pd_global(intx, CompileThreshold,             10000);
- 
- define_pd_global(intx, OnStackReplacePercentage,     140);
- define_pd_global(intx, ConditionalMoveLimit,         0);
-+define_pd_global(intx, FLOATPRESSURE,                32);
- define_pd_global(intx, FreqInlineSize,               325);
- define_pd_global(intx, MinJumpTableSize,             10);
-+define_pd_global(intx, INTPRESSURE,                  24);
- define_pd_global(intx, InteriorEntryAlignment,       16);
- define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
- define_pd_global(intx, LoopUnrollLimit,              60);
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 24214964243..c5e0ae23029 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1719,33 +1719,6 @@ bool Matcher::is_spillable_arg(int reg)
-   return can_be_java_arg(reg);
- }
- 
--uint Matcher::int_pressure_limit()
--{
--  // A derived pointer is live at CallNode and then is flagged by RA
--  // as a spilled LRG. Spilling heuristics(Spill-USE) explicitly skip
--  // derived pointers and lastly fail to spill after reaching maximum
--  // number of iterations. Lowering the default pressure threshold to
--  // (_NO_SPECIAL_REG32_mask.Size() minus 1) forces CallNode to become
--  // a high register pressure area of the code so that split_DEF can
--  // generate DefinitionSpillCopy for the derived pointer.
--  uint default_int_pressure_threshold = _NO_SPECIAL_REG32_mask.Size() - 1;
--  if (!PreserveFramePointer) {
--    // When PreserveFramePointer is off, frame pointer is allocatable,
--    // but different from other SOC registers, it is excluded from
--    // fatproj's mask because its save type is No-Save. Decrease 1 to
--    // ensure high pressure at fatproj when PreserveFramePointer is off.
--    // See check_pressure_at_fatproj().
--    default_int_pressure_threshold--;
--  }
--  return (INTPRESSURE == -1) ? default_int_pressure_threshold : INTPRESSURE;
--}
--
--uint Matcher::float_pressure_limit()
--{
--  // _FLOAT_REG_mask is generated by adlc from the float_reg register class.
--  return (FLOATPRESSURE == -1) ? _FLOAT_REG_mask.Size() : FLOATPRESSURE;
--}
--
- bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
-   return false;
- }
-
-From 932ebd6238ea7703dc3164e4506af332f6847592 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 17:51:12 +0800
-Subject: [PATCH 102/140] Revert JDK-8276563: Undefined Behaviour in class
- Assembler && 8257882: Implement linkToNative intrinsic on AArch64 (the
- register part)
-
----
- .../cpu/riscv/globalDefinitions_riscv.hpp     |   2 -
- src/hotspot/cpu/riscv/register_riscv.cpp      |   4 -
- src/hotspot/cpu/riscv/register_riscv.hpp      | 123 +++++++++++++-----
- 3 files changed, 91 insertions(+), 38 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
-index 2936837d951..ffd420da024 100644
---- a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
-@@ -47,6 +47,4 @@ const bool CCallingConventionRequiresIntsAsLongs = false;
- 
- #define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS false
- 
--#define USE_POINTERS_TO_REGISTER_IMPL_ARRAY
--
- #endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp
-index 96cf1996a83..ef60cb3bb05 100644
---- a/src/hotspot/cpu/riscv/register_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/register_riscv.cpp
-@@ -26,10 +26,6 @@
- #include "precompiled.hpp"
- #include "register_riscv.hpp"
- 
--REGISTER_IMPL_DEFINITION(Register, RegisterImpl, RegisterImpl::number_of_registers);
--REGISTER_IMPL_DEFINITION(FloatRegister, FloatRegisterImpl, FloatRegisterImpl::number_of_registers);
--REGISTER_IMPL_DEFINITION(VectorRegister, VectorRegisterImpl, VectorRegisterImpl::number_of_registers);
--
- const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers *
-                                           RegisterImpl::max_slots_per_register;
- 
-diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp
-index d697751f55f..f64a06eb89f 100644
---- a/src/hotspot/cpu/riscv/register_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/register_riscv.hpp
-@@ -47,13 +47,13 @@ typedef VMRegImpl* VMReg;
- 
- // Use Register as shortcut
- class RegisterImpl;
--typedef const RegisterImpl* Register;
-+typedef RegisterImpl* Register;
- 
--inline constexpr Register as_Register(int encoding);
-+inline Register as_Register(int encoding) {
-+  return (Register)(intptr_t) encoding;
-+}
- 
- class RegisterImpl: public AbstractRegisterImpl {
--  static constexpr Register first();
--
-  public:
-   enum {
-     number_of_registers      = 32,
-@@ -66,16 +66,16 @@ class RegisterImpl: public AbstractRegisterImpl {
-   };
- 
-   // derived registers, offsets, and addresses
--  const Register successor() const { return this + 1; }
-+  const Register successor() const { return as_Register(encoding() + 1); }
- 
-   // construction
--  inline friend constexpr Register as_Register(int encoding);
-+  inline friend Register as_Register(int encoding);
- 
-   VMReg as_VMReg() const;
- 
-   // accessors
-   int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
--  int encoding_nocheck() const    { return this - first(); }
-+  int encoding_nocheck() const    { return (intptr_t)this; }
-   bool is_valid() const           { return (unsigned)encoding_nocheck() < number_of_registers; }
-   const char* name() const;
- 
-@@ -93,9 +93,11 @@ class RegisterImpl: public AbstractRegisterImpl {
-     return encoding_nocheck() >= compressed_register_base &&
-            encoding_nocheck() <= compressed_register_top;
-   }
--};
- 
--REGISTER_IMPL_DECLARATION(Register, RegisterImpl, RegisterImpl::number_of_registers);
-+  // Return the bit which represents this register.  This is intended
-+  // to be ORed into a bitmask: for usage see class RegSet below.
-+  uint64_t bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; }
-+};
- 
- // The integer registers of the RISCV architecture
- 
-@@ -136,14 +138,14 @@ CONSTANT_REGISTER_DECLARATION(Register, x31,  (31));
- 
- // Use FloatRegister as shortcut
- class FloatRegisterImpl;
--typedef const FloatRegisterImpl* FloatRegister;
-+typedef FloatRegisterImpl* FloatRegister;
- 
--inline constexpr FloatRegister as_FloatRegister(int encoding);
-+inline FloatRegister as_FloatRegister(int encoding) {
-+  return (FloatRegister)(intptr_t) encoding;
-+}
- 
- // The implementation of floating point registers for the architecture
- class FloatRegisterImpl: public AbstractRegisterImpl {
--  static constexpr FloatRegister first();
--
-  public:
-   enum {
-     number_of_registers     = 32,
-@@ -155,18 +157,16 @@ class FloatRegisterImpl: public AbstractRegisterImpl {
-   };
- 
-   // construction
--  inline friend constexpr FloatRegister as_FloatRegister(int encoding);
-+  inline friend FloatRegister as_FloatRegister(int encoding);
- 
-   VMReg as_VMReg() const;
- 
-   // derived registers, offsets, and addresses
--  FloatRegister successor() const {
--    return as_FloatRegister((encoding() + 1) % (unsigned)number_of_registers);
--  }
-+  FloatRegister successor() const { return as_FloatRegister(encoding() + 1); }
- 
-   // accessors
-   int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
--  int encoding_nocheck() const    { return this - first(); }
-+  int encoding_nocheck() const    { return (intptr_t)this; }
-   int is_valid() const            { return (unsigned)encoding_nocheck() < number_of_registers; }
-   const char* name() const;
- 
-@@ -186,8 +186,6 @@ class FloatRegisterImpl: public AbstractRegisterImpl {
-   }
- };
- 
--REGISTER_IMPL_DECLARATION(FloatRegister, FloatRegisterImpl, FloatRegisterImpl::number_of_registers);
--
- // The float registers of the RISCV architecture
- 
- CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1));
-@@ -227,14 +225,14 @@ CONSTANT_REGISTER_DECLARATION(FloatRegister, f31    , (31));
- 
- // Use VectorRegister as shortcut
- class VectorRegisterImpl;
--typedef const VectorRegisterImpl* VectorRegister;
-+typedef VectorRegisterImpl* VectorRegister;
- 
--inline constexpr VectorRegister as_VectorRegister(int encoding);
-+inline VectorRegister as_VectorRegister(int encoding) {
-+  return (VectorRegister)(intptr_t) encoding;
-+}
- 
- // The implementation of vector registers for RVV
- class VectorRegisterImpl: public AbstractRegisterImpl {
--  static constexpr VectorRegister first();
--
-  public:
-   enum {
-     number_of_registers    = 32,
-@@ -242,23 +240,21 @@ class VectorRegisterImpl: public AbstractRegisterImpl {
-   };
- 
-   // construction
--  inline friend constexpr VectorRegister as_VectorRegister(int encoding);
-+  inline friend VectorRegister as_VectorRegister(int encoding);
- 
-   VMReg as_VMReg() const;
- 
-   // derived registers, offsets, and addresses
--  VectorRegister successor() const { return this + 1; }
-+  VectorRegister successor() const { return as_VectorRegister(encoding() + 1); }
- 
-   // accessors
-   int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
--  int encoding_nocheck() const    { return this - first(); }
-+  int encoding_nocheck() const    { return (intptr_t)this; }
-   bool is_valid() const           { return (unsigned)encoding_nocheck() < number_of_registers; }
-   const char* name() const;
- 
- };
- 
--REGISTER_IMPL_DECLARATION(VectorRegister, VectorRegisterImpl, VectorRegisterImpl::number_of_registers);
--
- // The vector registers of RVV
- CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg , (-1));
- 
-@@ -315,8 +311,71 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl {
-   static const int max_fpr;
- };
- 
--typedef AbstractRegSet<Register> RegSet;
--typedef AbstractRegSet<FloatRegister> FloatRegSet;
--typedef AbstractRegSet<VectorRegister> VectorRegSet;
-+// A set of registers
-+class RegSet {
-+  uint32_t _bitset;
++    if (DEBUG) {
++      System.out.println("senderForCompiledFrame");
++    }
 +
-+  RegSet(uint32_t bitset) : _bitset(bitset) { }
++    //
++    // NOTE: some of this code is (unfortunately) duplicated  RISCV64CurrentFrameGuess
++    //
 +
-+public:
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
 +
-+  RegSet() : _bitset(0) { }
++    // frame owned by optimizing compiler
++    if (Assert.ASSERTS_ENABLED) {
++        Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size");
++    }
++    Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize());
 +
-+  RegSet(Register r1) : _bitset(r1->bit()) { }
++    // The return_address is always the word on the stack
++    Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize());
 +
-+  RegSet operator+(const RegSet aSet) const {
-+    RegSet result(_bitset | aSet._bitset);
-+    return result;
++    // This is the saved value of FP which may or may not really be an FP.
++    // It is only an FP if the sender is an interpreter frame.
++    Address savedFPAddr = senderSP.addOffsetTo(-2 * VM.getVM().getAddressSize());
++
++    if (map.getUpdateMap()) {
++      // Tell GC to use argument oopmaps for some runtime stubs that need it.
++      // For C1, the runtime stub might not have oop maps, so set this flag
++      // outside of update_register_map.
++      map.setIncludeArgumentOops(cb.callerMustGCArguments());
++
++      if (cb.getOopMaps() != null) {
++        ImmutableOopMapSet.updateRegisterMap(this, cb, map, true);
++      }
++
++      // Since the prolog does the save and restore of FP there is no oopmap
++      // for it so we must fill in its location as if there was an oopmap entry
++      // since if our caller was compiled code there could be live jvm state in it.
++      updateMapWithSavedLink(map, savedFPAddr);
++    }
++
++    return new RISCV64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC);
 +  }
 +
-+  RegSet operator-(const RegSet aSet) const {
-+    RegSet result(_bitset & ~aSet._bitset);
-+    return result;
++  protected boolean hasSenderPD() {
++    return true;
 +  }
 +
-+  RegSet &operator+=(const RegSet aSet) {
-+    *this = *this + aSet;
-+    return *this;
++  public long frameSize() {
++    return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize());
 +  }
 +
-+  RegSet &operator-=(const RegSet aSet) {
-+    *this = *this - aSet;
-+    return *this;
++    public Address getLink() {
++        try {
++            if (DEBUG) {
++                System.out.println("Reading link at " + addressOfStackSlot(LINK_OFFSET)
++                        + " = " + addressOfStackSlot(LINK_OFFSET).getAddressAt(0));
++            }
++            return addressOfStackSlot(LINK_OFFSET).getAddressAt(0);
++        } catch (Exception e) {
++            if (DEBUG)
++                System.out.println("Returning null");
++            return null;
++        }
++    }
++
++  public Address getUnextendedSP() { return raw_unextendedSP; }
++
++  // Return address:
++  public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); }
++  public Address getSenderPC()     { return getSenderPCAddr().getAddressAt(0);      }
++
++  // return address of param, zero origin index.
++  public Address getNativeParamAddr(int idx) {
++    return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx);
 +  }
 +
-+  static RegSet of(Register r1) {
-+    return RegSet(r1);
++  public Address getSenderSP()     { return addressOfStackSlot(SENDER_SP_OFFSET); }
++
++  public Address addressOfInterpreterFrameLocals() {
++    return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET);
 +  }
 +
-+  static RegSet of(Register r1, Register r2) {
-+    return of(r1) + r2;
++  private Address addressOfInterpreterFrameBCX() {
++    return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET);
 +  }
 +
-+  static RegSet of(Register r1, Register r2, Register r3) {
-+    return of(r1, r2) + r3;
++  public int getInterpreterFrameBCI() {
++    // FIXME: this is not atomic with respect to GC and is unsuitable
++    // for use in a non-debugging, or reflective, system. Need to
++    // figure out how to express this.
++    Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0);
++    Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0);
++    Method method = (Method)Metadata.instantiateWrapperFor(methodHandle);
++    return bcpToBci(bcp, method);
 +  }
 +
-+  static RegSet of(Register r1, Register r2, Register r3, Register r4) {
-+    return of(r1, r2, r3) + r4;
++  public Address addressOfInterpreterFrameMDX() {
++    return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET);
 +  }
 +
-+  static RegSet range(Register start, Register end) {
-+    uint32_t bits = ~0;
-+    bits <<= start->encoding();
-+    bits <<= 31 - end->encoding();
-+    bits >>= 31 - end->encoding();
++  // expression stack
++  // (the max_stack arguments are used by the GC; see class FrameClosure)
 +
-+    return RegSet(bits);
++  public Address addressOfInterpreterFrameExpressionStack() {
++    Address monitorEnd = interpreterFrameMonitorEnd().address();
++    return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize());
 +  }
 +
-+  uint32_t bits() const { return _bitset; }
++  public int getInterpreterFrameExpressionStackDirection() { return -1; }
 +
-+private:
++  // top of expression stack
++  public Address addressOfInterpreterFrameTOS() {
++    return getSP();
++  }
 +
-+  Register first() {
-+    uint32_t first = _bitset & -_bitset;
-+    return first ? as_Register(exact_log2(first)) : noreg;
++  /** Expression stack from top down */
++  public Address addressOfInterpreterFrameTOSAt(int slot) {
++    return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize());
 +  }
-+};
- 
- #endif // CPU_RISCV_REGISTER_RISCV_HPP
-
-From 9c85aa8d3387d795f9c2f4795ffc7f9d7f814d92 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 19:24:49 +0800
-Subject: [PATCH 103/140] Revert JDK-8240363: Refactor Compile::Output() to its
- own Phase
-
----
- .../cpu/riscv/macroAssembler_riscv.cpp        |  2 +-
- src/hotspot/cpu/riscv/riscv.ad                | 20 +++++++++----------
- 2 files changed, 11 insertions(+), 11 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index e2841c28c37..656334f326b 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -3027,7 +3027,7 @@ address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) {
-     CompileTask* task = ciEnv::current()->task();
-     in_scratch_emit_size =
-       (task != NULL && is_c2_compile(task->comp_level()) &&
--       Compile::current()->output()->in_scratch_emit_size());
-+       Compile::current()->in_scratch_emit_size());
- #endif
-     if (!in_scratch_emit_size) {
-       address stub = emit_trampoline_stub(offset(), entry.target());
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index c5e0ae23029..d736750d02d 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1029,7 +1029,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
- //=============================================================================
- const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
- 
--int ConstantTable::calculate_table_base_offset() const {
-+int Compile::ConstantTable::calculate_table_base_offset() const {
-   return 0;  // absolute addressing, no offset
- }
- 
-@@ -1058,9 +1058,9 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
-   assert_cond(st != NULL && ra_ != NULL);
-   Compile* C = ra_->C;
- 
--  int framesize = C->output()->frame_slots() << LogBytesPerInt;
-+  int framesize = C->frame_slots() << LogBytesPerInt;
- 
--  if (C->output()->need_stack_bang(framesize)) {
-+  if (C->need_stack_bang(framesize)) {
-     st->print("# stack bang size=%d\n\t", framesize);
-   }
- 
-@@ -1077,7 +1077,7 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-   MacroAssembler _masm(&cbuf);
- 
-   // n.b. frame size includes space for return pc and fp
--  const int framesize = C->output()->frame_size_in_bytes();
-+  const int framesize = C->frame_size_in_bytes();
- 
-   // insert a nop at the start of the prolog so we can patch in a
-   // branch if we need to invalidate the method later
-@@ -1085,8 +1085,8 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
- 
-   assert_cond(C != NULL);
- 
--  int bangsize = C->output()->bang_size_in_bytes();
--  if (C->output()->need_stack_bang(bangsize)) {
-+  int bangsize = C->bang_size_in_bytes();
-+  if (C->need_stack_bang(bangsize)) {
-     __ generate_stack_overflow_check(bangsize);
-   }
- 
-@@ -1096,12 +1096,12 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-     Unimplemented();
-   }
- 
--  C->output()->set_frame_complete(cbuf.insts_size());
-+  C->set_frame_complete(cbuf.insts_size());
- 
-   if (C->has_mach_constant_base_node()) {
-     // NOTE: We set the table base offset here because users might be
-     // emitted before MachConstantBaseNode.
--    ConstantTable& constant_table = C->output()->constant_table();
-+    Compile::ConstantTable& constant_table = C->constant_table();
-     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
-   }
- }
-@@ -1125,7 +1125,7 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
-   assert_cond(st != NULL && ra_ != NULL);
-   Compile* C = ra_->C;
-   assert_cond(C != NULL);
--  int framesize = C->output()->frame_size_in_bytes();
-+  int framesize = C->frame_size_in_bytes();
- 
-   st->print("# pop frame %d\n\t", framesize);
- 
-@@ -1152,7 +1152,7 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-   Compile* C = ra_->C;
-   MacroAssembler _masm(&cbuf);
-   assert_cond(C != NULL);
--  int framesize = C->output()->frame_size_in_bytes();
-+  int framesize = C->frame_size_in_bytes();
- 
-   __ remove_frame(framesize);
- 
-
-From 3a58114310a56ebca04ba44b4883d205096eb844 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 19:36:09 +0800
-Subject: [PATCH 104/140] Revert RotateLeft && RotateRight matching rules
-
----
- src/hotspot/cpu/riscv/riscv.ad   |  2 -
- src/hotspot/cpu/riscv/riscv_b.ad | 76 --------------------------------
- 2 files changed, 78 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index d736750d02d..1e6495692da 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1505,8 +1505,6 @@ const bool Matcher::match_rule_supported(int opcode) {
-     case Op_PopCountL:
-       return UsePopCountInstruction;
- 
--    case Op_RotateRight:
--    case Op_RotateLeft:
-     case Op_CountLeadingZerosI:
-     case Op_CountLeadingZerosL:
-     case Op_CountTrailingZerosI:
-diff --git a/src/hotspot/cpu/riscv/riscv_b.ad b/src/hotspot/cpu/riscv/riscv_b.ad
-index 4488c1c4031..b9e04c432e1 100644
---- a/src/hotspot/cpu/riscv/riscv_b.ad
-+++ b/src/hotspot/cpu/riscv/riscv_b.ad
-@@ -25,82 +25,6 @@
- 
- // RISCV Bit-Manipulation Extension Architecture Description File
- 
--instruct rorI_imm_rvb(iRegINoSp dst, iRegI src, immI shift) %{
--  predicate(UseRVB);
--  match(Set dst (RotateRight src shift));
--
--  format %{ "roriw  $dst, $src, ($shift & 0x1f)\t#@rorI_imm_rvb" %}
--
--  ins_cost(ALU_COST);
--  ins_encode %{
--    __ roriw(as_Register($dst$$reg), as_Register($src$$reg), $shift$$constant & 0x1f);
--  %}
--
--  ins_pipe(ialu_reg_shift);
--%}
--
--instruct rorL_imm_rvb(iRegLNoSp dst, iRegL src, immI shift) %{
--  predicate(UseRVB);
--  match(Set dst (RotateRight src shift));
--
--  format %{ "rori  $dst, $src, ($shift & 0x3f)\t#@rorL_imm_rvb" %}
--
--  ins_cost(ALU_COST);
--  ins_encode %{
--    __ rori(as_Register($dst$$reg), as_Register($src$$reg), $shift$$constant & 0x3f);
--  %}
--
--  ins_pipe(ialu_reg_shift);
--%}
--
--instruct rorI_reg_rvb(iRegINoSp dst, iRegI src, iRegI shift) %{
--  predicate(UseRVB);
--  match(Set dst (RotateRight src shift));
--
--  format %{ "rorw  $dst, $src, $shift\t#@rorI_reg_rvb" %}
--  ins_cost(ALU_COST);
--  ins_encode %{
--    __ rorw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
--  %}
--  ins_pipe(ialu_reg_reg);
--%}
--
--instruct rorL_reg_rvb(iRegLNoSp dst, iRegL src, iRegI shift) %{
--  predicate(UseRVB);
--  match(Set dst (RotateRight src shift));
--
--  format %{ "ror  $dst, $src, $shift\t#@rorL_reg_rvb" %}
--  ins_cost(ALU_COST);
--  ins_encode %{
--    __ ror(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
--  %}
--  ins_pipe(ialu_reg_reg);
--%}
--
--instruct rolI_reg_rvb(iRegINoSp dst, iRegI src, iRegI shift) %{
--  predicate(UseRVB);
--  match(Set dst (RotateLeft src shift));
--
--  format %{ "rolw  $dst, $src, $shift\t#@rolI_reg_rvb" %}
--  ins_cost(ALU_COST);
--  ins_encode %{
--    __ rolw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
--  %}
--  ins_pipe(ialu_reg_reg);
--%}
--
--instruct rolL_reg_rvb(iRegLNoSp dst, iRegL src, iRegI shift) %{
--  predicate(UseRVB);
--  match(Set dst (RotateLeft src shift));
--
--  format %{ "rol  $dst, $src, $shift\t#@rolL_reg_rvb" %}
--  ins_cost(ALU_COST);
--  ins_encode %{
--    __ rol(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
--  %}
--  ins_pipe(ialu_reg_reg);
--%}
--
- // Convert oop into int for vectors alignment masking
- instruct convP2I_rvb(iRegINoSp dst, iRegP src) %{
-   predicate(UseRVB);
-
-From 21577388eda0218eeb4b28bc71ecf5737d40639e Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 19:49:28 +0800
-Subject: [PATCH 105/140] Revert JDK-8230565: ZGC: Redesign C2 load barrier to
- expand on the MachNode level
-
----
- src/hotspot/cpu/riscv/riscv.ad | 14 ++++----------
- 1 file changed, 4 insertions(+), 10 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 1e6495692da..533eaf843e3 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -4324,7 +4324,6 @@ instruct loadRange(iRegINoSp dst, memory mem)
- instruct loadP(iRegPNoSp dst, memory mem)
- %{
-   match(Set dst (LoadP mem));
--  predicate(n->as_Load()->barrier_data() == 0);
- 
-   ins_cost(LOAD_COST);
-   format %{ "ld  $dst, $mem\t# ptr, #@loadP" %}
-@@ -5060,8 +5059,6 @@ instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoS
- 
- instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
- %{
--  predicate(n->as_LoadStore()->barrier_data() == 0);
--
-   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
- 
-   ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
-@@ -5181,7 +5178,7 @@ instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegL
- 
- instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
- %{
--  predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));
-+  predicate(needs_acquiring_load_reserved(n));
- 
-   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
- 
-@@ -5327,7 +5324,6 @@ instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN ne
- 
- instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
- %{
--  predicate(n->as_LoadStore()->barrier_data() == 0);
-   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
- 
-   ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
-@@ -5462,7 +5458,7 @@ instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN
- 
- instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
- %{
--  predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));
-+  predicate(needs_acquiring_load_reserved(n));
- 
-   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
- 
-@@ -5592,7 +5588,6 @@ instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN ne
- 
- instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
- %{
--  predicate(n->as_LoadStore()->barrier_data() == 0);
-   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
- 
-   ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
-@@ -5731,7 +5726,7 @@ instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN
- 
- instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
- %{
--  predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));
-+  predicate(needs_acquiring_load_reserved(n));
- 
-   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
- 
-@@ -5798,7 +5793,6 @@ instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev)
- 
- instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev)
- %{
--  predicate(n->as_LoadStore()->barrier_data() == 0);
-   match(Set prev (GetAndSetP mem newv));
- 
-   ins_cost(ALU_COST);
-@@ -5865,7 +5859,7 @@ instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev)
- 
- instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev)
- %{
--  predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));
-+  predicate(needs_acquiring_load_reserved(n));
- 
-   match(Set prev (GetAndSetP mem newv));
- 
-
-From 4673921af60f4779d4322256f92bb60a850cb035 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 19:51:09 +0800
-Subject: [PATCH 106/140] Revert JDK-8252990: Intrinsify Unsafe.storeStoreFence
-
----
- src/hotspot/cpu/riscv/riscv.ad | 1 -
- 1 file changed, 1 deletion(-)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 533eaf843e3..5fa3b85c001 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -7537,7 +7537,6 @@ instruct membar_release() %{
- 
- instruct membar_storestore() %{
-   match(MemBarStoreStore);
--  match(StoreStoreFence);
-   ins_cost(ALU_COST);
- 
-   format %{ "MEMBAR-store-store\t#@membar_storestore" %}
-
-From e254a03e87ffc6d8f563dbd7db1b607a95657263 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 19:54:02 +0800
-Subject: [PATCH 107/140] Revert JDK-8255150: Add utility methods to check long
- indexes and ranges && JDK-8252372: Check if cloning is required to move loads
- out of loops in PhaseIdealLoop::split_if_with_blocks_post()
-
----
- src/hotspot/cpu/riscv/riscv.ad | 33 ---------------------------------
- 1 file changed, 33 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 5fa3b85c001..388e65f623d 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -7621,17 +7621,6 @@ instruct castPP(iRegPNoSp dst)
-   ins_pipe(pipe_class_empty);
- %}
- 
--instruct castLL(iRegL dst)
--%{
--  match(Set dst (CastLL dst));
--
--  size(0);
--  format %{ "# castLL of $dst, #@castLL" %}
--  ins_encode(/* empty encoding */);
--  ins_cost(0);
--  ins_pipe(pipe_class_empty);
--%}
--
- instruct castII(iRegI dst)
- %{
-   match(Set dst (CastII dst));
-@@ -7654,28 +7643,6 @@ instruct checkCastPP(iRegPNoSp dst)
-   ins_pipe(pipe_class_empty);
- %}
- 
--instruct castFF(fRegF dst)
--%{
--  match(Set dst (CastFF dst));
--
--  size(0);
--  format %{ "# castFF of $dst" %}
--  ins_encode(/* empty encoding */);
--  ins_cost(0);
--  ins_pipe(pipe_class_empty);
--%}
--
--instruct castDD(fRegD dst)
--%{
--  match(Set dst (CastDD dst));
--
--  size(0);
--  format %{ "# castDD of $dst" %}
--  ins_encode(/* empty encoding */);
--  ins_cost(0);
--  ins_pipe(pipe_class_empty);
--%}
--
- // ============================================================================
- // Convert Instructions
- 
-
-From 2c1820363992d09ef0cd2ed2553c04e0f7afd91f Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 20:02:14 +0800
-Subject: [PATCH 108/140] Revert reset_label part of JDK-8248411: [aarch64]
- Insufficient error handling when CodeBuffer is exhausted
-
----
- src/hotspot/cpu/riscv/macroAssembler_riscv.cpp |  2 +-
- src/hotspot/cpu/riscv/macroAssembler_riscv.hpp | 14 +++++---------
- 2 files changed, 6 insertions(+), 10 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index 656334f326b..37ccf132986 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -3784,7 +3784,7 @@ address MacroAssembler::zero_words(Register ptr, Register cnt)
-     if (StubRoutines::riscv::complete()) {
-       address tpc = trampoline_call(zero_blocks);
-       if (tpc == NULL) {
--        DEBUG_ONLY(reset_labels(around));
-+        DEBUG_ONLY(reset_labels1(around));
-         postcond(pc() == badAddress);
-         return NULL;
-       }
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index 953bca3cbd8..45ffc663963 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -815,17 +815,13 @@ class MacroAssembler: public Assembler {
- private:
- 
- #ifdef ASSERT
--  // Template short-hand support to clean-up after a failed call to trampoline
-+  // Macro short-hand support to clean-up after a failed call to trampoline
-   // call generation (see trampoline_call() below), when a set of Labels must
-   // be reset (before returning).
--  template<typename Label, typename... More>
--  void reset_labels(Label& lbl, More&... more) {
--    lbl.reset(); reset_labels(more...);
--  }
--  template<typename Label>
--  void reset_labels(Label& lbl) {
--    lbl.reset();
--  }
-+#define reset_labels1(L1) L1.reset()
-+#define reset_labels2(L1, L2) L1.reset(); L2.reset()
-+#define reset_labels3(L1, L2, L3) L1.reset(); reset_labels2(L2, L3)
-+#define reset_labels5(L1, L2, L3, L4, L5) reset_labels2(L1, L2); reset_labels3(L3, L4, L5)
- #endif
-   void repne_scan(Register addr, Register value, Register count, Register tmp);
- 
-
-From 014972a0778b8c5568fae9e92d286b634cb44674 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 20:30:49 +0800
-Subject: [PATCH 109/140] Revert JDK-8242289: C2: Support platform-specific
- node cloning in Matcher
-
----
- src/hotspot/cpu/riscv/riscv.ad | 12 +-----------
- 1 file changed, 1 insertion(+), 11 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 388e65f623d..7cd6c2995ba 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1765,20 +1765,10 @@ bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
- 
- const bool Matcher::convi2l_type_required = false;
- 
--// Should the Matcher clone input 'm' of node 'n'?
--bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
--  assert_cond(m != NULL);
--  if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
--    mstack.push(m, Visit);           // m = ShiftCntV
--    return true;
--  }
--  return false;
--}
--
- // Should the Matcher clone shifts on addressing modes, expecting them
- // to be subsumed into complex addressing expressions or compute them
- // into registers?
--bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
-+bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
-   return clone_base_plus_offset_address(m, mstack, address_visited);
- }
++
++  public Address getInterpreterFrameSenderSP() {
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(isInterpretedFrame(), "interpreted frame expected");
++    }
++    return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
++  }
++
++  // Monitors
++  public BasicObjectLock interpreterFrameMonitorBegin() {
++    return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET));
++  }
++
++  public BasicObjectLock interpreterFrameMonitorEnd() {
++    Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0);
++    if (Assert.ASSERTS_ENABLED) {
++      // make sure the pointer points inside the frame
++      Assert.that(AddressOps.gt(getFP(), result), "result must <  than frame pointer");
++      Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer");
++    }
++    return new BasicObjectLock(result);
++  }
++
++  public int interpreterFrameMonitorSize() {
++    return BasicObjectLock.size();
++  }
++
++  // Method
++  public Address addressOfInterpreterFrameMethod() {
++    return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET);
++  }
++
++  // Constant pool cache
++  public Address addressOfInterpreterFrameCPCache() {
++    return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET);
++  }
++
++  // Entry frames
++  public JavaCallWrapper getEntryFrameCallWrapper() {
++    return new RISCV64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0));
++  }
++
++  protected Address addressOfSavedOopResult() {
++    // offset is 2 for compiler2 and 3 for compiler1
++    return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) *
++                               VM.getVM().getAddressSize());
++  }
++
++  protected Address addressOfSavedReceiver() {
++    return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
++  }
++
++  private void dumpStack() {
++    for (Address addr = getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
++         AddressOps.lt(addr, getSP());
++         addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
++      System.out.println(addr + ": " + addr.getAddressAt(0));
++    }
++    System.out.println("-----------------------");
++    for (Address addr = getSP();
++         AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize()));
++         addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
++      System.out.println(addr + ": " + addr.getAddressAt(0));
++    }
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
+new file mode 100644
+index 0000000000..850758a7ed
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
+@@ -0,0 +1,59 @@
++/*
++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.riscv64;
++
++import java.util.*;
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.utilities.*;
++
++public class RISCV64JavaCallWrapper extends JavaCallWrapper {
++  private static AddressField lastJavaFPField;
++
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
++
++  private static synchronized void initialize(TypeDataBase db) {
++    Type type = db.lookupType("JavaFrameAnchor");
++
++    lastJavaFPField  = type.getAddressField("_last_Java_fp");
++  }
++
++  public RISCV64JavaCallWrapper(Address addr) {
++    super(addr);
++  }
++
++  public Address getLastJavaFP() {
++    return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset()));
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java
+new file mode 100644
+index 0000000000..4aeb1c6f55
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java
+@@ -0,0 +1,53 @@
++/*
++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.riscv64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.runtime.*;
++
++public class RISCV64RegisterMap extends RegisterMap {
++
++  /** This is the only public constructor */
++  public RISCV64RegisterMap(JavaThread thread, boolean updateMap) {
++    super(thread, updateMap);
++  }
++
++  protected RISCV64RegisterMap(RegisterMap map) {
++    super(map);
++  }
++
++  public Object clone() {
++    RISCV64RegisterMap retval = new RISCV64RegisterMap(this);
++    return retval;
++  }
++
++  // no PD state to clear or copy:
++  protected void clearPD() {}
++  protected void initializePD() {}
++  protected void initializeFromPD(RegisterMap map) {}
++  protected Address getLocationPD(VMReg reg) { return null; }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
+index 7d7a6107ca..6552ce255f 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -54,7 +54,7 @@ public class PlatformInfo {
  
-
-From d15e155e9b84f4789cfbb1cf75382be859b0a8ca Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 20:40:00 +0800
-Subject: [PATCH 110/140] Revert JDK-8255782: Turn UseTLAB and ResizeTLAB from
- product_pd to product, defaulting to "true"
-
----
- src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 2 ++
- src/hotspot/cpu/riscv/c2_globals_riscv.hpp | 2 ++
- 2 files changed, 4 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-index 8f2f4e0e81d..25e00bea901 100644
---- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-@@ -34,6 +34,8 @@
+   public static boolean knownCPU(String cpu) {
+     final String[] KNOWN =
+-        new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64"};
++        new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64", "riscv64"};
  
- #ifndef TIERED
- define_pd_global(bool, BackgroundCompilation,        true );
-+define_pd_global(bool, UseTLAB,                      true );
-+define_pd_global(bool, ResizeTLAB,                   true );
- define_pd_global(bool, InlineIntrinsics,             true );
- define_pd_global(bool, PreferInterpreterNativeStubs, false);
- define_pd_global(bool, ProfileTraps,                 false);
-diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-index 33d78fb2f6f..3da1f1c6d86 100644
---- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-@@ -33,6 +33,8 @@
- // (see c2_globals.hpp).  Alpha-sorted.
+     for(String s : KNOWN) {
+       if(s.equals(cpu))
+diff --git a/test/hotspot/jtreg/compiler/c2/TestBit.java b/test/hotspot/jtreg/compiler/c2/TestBit.java
+index 7805918c28..823b9f39db 100644
+--- a/test/hotspot/jtreg/compiler/c2/TestBit.java
++++ b/test/hotspot/jtreg/compiler/c2/TestBit.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -34,7 +34,7 @@ import jdk.test.lib.process.ProcessTools;
+  *
+  * @run driver compiler.c2.TestBit
+  *
+- * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le"
++ * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le" | os.arch == "riscv64"
+  * @requires vm.debug == true & vm.compiler2.enabled
+  */
+ public class TestBit {
+@@ -54,7 +54,8 @@ public class TestBit {
+         String expectedTestBitInstruction =
+             "ppc64le".equals(System.getProperty("os.arch")) ? "ANDI" :
+             "aarch64".equals(System.getProperty("os.arch")) ? "tb"   :
+-            "amd64".equals(System.getProperty("os.arch"))   ? "test" : null;
++            "amd64".equals(System.getProperty("os.arch"))   ? "test" :
++            "riscv64".equals(System.getProperty("os.arch")) ? "andi" : null;
  
- define_pd_global(bool, BackgroundCompilation,        true);
-+define_pd_global(bool, UseTLAB,                      true);
-+define_pd_global(bool, ResizeTLAB,                   true);
- define_pd_global(bool, CICompileOSR,                 true);
- define_pd_global(bool, InlineIntrinsics,             true);
- define_pd_global(bool, PreferInterpreterNativeStubs, false);
-
-From f3fa0cfa987743b4ee83332ddf71add421561908 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 20:49:57 +0800
-Subject: [PATCH 111/140] Revert JDK-8265245: depChecker_<cpu> don't have any
- functionalities
-
----
- src/hotspot/cpu/riscv/depChecker_riscv.hpp | 32 ++++++++++++++++++++++
- 1 file changed, 32 insertions(+)
- create mode 100644 src/hotspot/cpu/riscv/depChecker_riscv.hpp
-
-diff --git a/src/hotspot/cpu/riscv/depChecker_riscv.hpp b/src/hotspot/cpu/riscv/depChecker_riscv.hpp
+         if (expectedTestBitInstruction != null) {
+             output.shouldContain(expectedTestBitInstruction);
+diff --git a/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
 new file mode 100644
-index 00000000000..e9ff307b647
+index 0000000000..5a1b659bbe
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/depChecker_riscv.hpp
-@@ -0,0 +1,32 @@
++++ b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
+@@ -0,0 +1,80 @@
 +/*
-+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Alibaba Group Holding Limited. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -80560,1450 +56639,228 @@ index 00000000000..e9ff307b647
 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 + * or visit www.oracle.com if you need additional information or have any
 + * questions.
++ */
++
++/*
++ * @test
++ * @summary Test libm intrinsics
++ * @library /test/lib /
 + *
++ * @build sun.hotspot.WhiteBox
++ * @run driver ClassFileInstaller sun.hotspot.WhiteBox
++ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
++ *                   -XX:-BackgroundCompilation -XX:-UseOnStackReplacement
++ *                   compiler.floatingpoint.TestLibmIntrinsics
 + */
 +
-+#ifndef CPU_RISCV_VM_DEPCHECKER_RISCV_HPP
-+#define CPU_RISCV_VM_DEPCHECKER_RISCV_HPP
++package compiler.floatingpoint;
 +
-+// Nothing to do on riscv
++import compiler.whitebox.CompilerWhiteBoxTest;
++import sun.hotspot.WhiteBox;
 +
-+#endif // CPU_RISCV_VM_DEPCHECKER_RISCV_HPP
-
-From 97a3d4d3b98a450aa316eaa94103cf8473d12d50 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 20:58:34 +0800
-Subject: [PATCH 112/140] Revert JDK-8241438: Move IntelJccErratum mitigation
- code to platform-specific code
-
----
- src/hotspot/cpu/riscv/riscv.ad | 18 ------------------
- 1 file changed, 18 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 7cd6c2995ba..fc6823daf8b 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -740,13 +740,6 @@ class HandlerImpl {
-   }
- };
- 
--class Node::PD {
--public:
--  enum NodeFlags {
--    _last_flag = Node::_last_flag
--  };
--};
--
- bool is_CAS(int opcode, bool maybe_volatile);
- 
- // predicate controlling translation of CompareAndSwapX
-@@ -805,17 +798,6 @@ void reg_mask_init() {
-   }
- }
- 
--void PhaseOutput::pd_perform_mach_node_analysis() {
--}
--
--int MachNode::pd_alignment_required() const {
--  return 1;
--}
--
--int MachNode::compute_padding(int current_offset) const {
--  return 0;
--}
--
- // is_CAS(int opcode, bool maybe_volatile)
- //
- // return true if opcode is one of the possible CompareAndSwapX
-
-From 8a3e7b81b79918a4f2feb4d9226ab8be6c43c28a Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 21:03:47 +0800
-Subject: [PATCH 113/140] Revert JDK-8260355: AArch64: deoptimization stub
- should save vector registers
-
----
- src/hotspot/cpu/riscv/registerMap_riscv.cpp | 45 ---------------------
- src/hotspot/cpu/riscv/registerMap_riscv.hpp |  1 -
- 2 files changed, 46 deletions(-)
- delete mode 100644 src/hotspot/cpu/riscv/registerMap_riscv.cpp
-
-diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.cpp b/src/hotspot/cpu/riscv/registerMap_riscv.cpp
-deleted file mode 100644
-index 26c1edc36ff..00000000000
---- a/src/hotspot/cpu/riscv/registerMap_riscv.cpp
-+++ /dev/null
-@@ -1,45 +0,0 @@
--/*
-- * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#include "precompiled.hpp"
--#include "runtime/registerMap.hpp"
--#include "vmreg_riscv.inline.hpp"
--
--address RegisterMap::pd_location(VMReg base_reg, int slot_idx) const {
--  if (base_reg->is_VectorRegister()) {
--    assert(base_reg->is_concrete(), "must pass base reg");
--    int base_reg_enc = (base_reg->value() - ConcreteRegisterImpl::max_fpr) /
--                       VectorRegisterImpl::max_slots_per_register;
--    intptr_t offset_in_bytes = slot_idx * VMRegImpl::stack_slot_size;
--    address base_location = location(base_reg);
--    if (base_location != NULL) {
--      return base_location + offset_in_bytes;
--    } else {
--      return NULL;
--    }
--  } else {
--    return location(base_reg->next(slot_idx));
--  }
--}
-diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.hpp b/src/hotspot/cpu/riscv/registerMap_riscv.hpp
-index f34349811a9..fef8ca9b64e 100644
---- a/src/hotspot/cpu/riscv/registerMap_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/registerMap_riscv.hpp
-@@ -33,7 +33,6 @@
-   // This is the hook for finding a register in an "well-known" location,
-   // such as a register block of a predetermined format.
-   address pd_location(VMReg reg) const { return NULL; }
--  address pd_location(VMReg base_reg, int slot_idx) const;
- 
-   // no PD state to clear or copy:
-   void pd_clear() {}
-
-From 5fc20f93a312f9189b55c5236c15a55b3da10cf9 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 21:05:37 +0800
-Subject: [PATCH 114/140] Revert JDK-8250914: Matcher::stack_direction() is
- unused
-
----
- src/hotspot/cpu/riscv/riscv.ad | 3 +++
- 1 file changed, 3 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index fc6823daf8b..c21508b6e7c 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -2326,6 +2326,9 @@ encode %{
- //         SP meets the minimum alignment.
- 
- frame %{
-+  // What direction does stack grow in (assumed to be same for C & Java)
-+  stack_direction(TOWARDS_LOW);
++import java.lang.reflect.Method;
 +
-   // These three registers define part of the calling convention
-   // between compiled code and the interpreter.
- 
-
-From aab3322fd2507a3aeae39c69ba871400dd342834 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 21:15:45 +0800
-Subject: [PATCH 115/140] Revert CacheWB*Node matching rules
-
----
- src/hotspot/cpu/riscv/riscv.ad | 8 --------
- 1 file changed, 8 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index c21508b6e7c..e410bd06aa6 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1475,14 +1475,6 @@ const bool Matcher::match_rule_supported(int opcode) {
-   }
- 
-   switch (opcode) {
--    case Op_CacheWB:           // fall through
--    case Op_CacheWBPreSync:    // fall through
--    case Op_CacheWBPostSync:
--      if (!VM_Version::supports_data_cache_line_flush()) {
--        return false;
--      }
--      break;
--
-     case Op_PopCountI:
-     case Op_PopCountL:
-       return UsePopCountInstruction;
-
-From 705981aaff19b442b55df8a038aab9c61133bc3a Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 21:21:10 +0800
-Subject: [PATCH 116/140] Revert JDK-8263595: Remove oop type punning in
- JavaCallArguments
-
----
- src/hotspot/cpu/riscv/jniTypes_riscv.hpp | 5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
-index bc4e5758256..df3c0267eea 100644
---- a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
-@@ -65,8 +65,9 @@ class JNITypes : private AllStatic {
-   }
- 
-   // Oops are stored in native format in one JavaCallArgument slot at *to.
--  static inline void    put_obj(const Handle& from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle.raw_value(); }
--  static inline void    put_obj(jobject       from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle; }
-+  static inline void    put_obj(oop  from, intptr_t *to)                { *(oop *)(to +   0  ) =  from; }
-+  static inline void    put_obj(oop  from, intptr_t *to, int& pos)      { *(oop *)(to + pos++) =  from; }
-+  static inline void    put_obj(oop *from, intptr_t *to, int& pos)      { *(oop *)(to + pos++) = *from; }
- 
-   // Floats are stored in native format in one JavaCallArgument slot at *to.
-   static inline void    put_float(jfloat  from, intptr_t *to)           { *(jfloat *)(to +   0  ) =  from;  }
-
-From bba22725b9f1386d8899941ccee3e8dc7f9a4a6f Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 21:33:01 +0800
-Subject: [PATCH 117/140] Revert JDK-8260012: Reduce inclusion of
- collectedHeap.hpp and heapInspection.hpp
-
----
- src/hotspot/cpu/riscv/frame_riscv.cpp | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
-index 40ec584b994..d4fcbdcbbde 100644
---- a/src/hotspot/cpu/riscv/frame_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
-@@ -598,7 +598,7 @@ BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result)
-         oop* obj_p = (oop*)tos_addr;
-         obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
-       }
--      assert(Universe::is_in_heap_or_null(obj), "sanity check");
-+      assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
-       *oop_result = obj;
-       break;
-     }
-
-From 49000a43408aba29d3dc9ee4e03219e6f85be602 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 21:35:21 +0800
-Subject: [PATCH 118/140] Revert JDK-8271869: AArch64: build errors with GCC11
- in frame::saved_oop_result
-
----
- src/hotspot/cpu/riscv/frame_riscv.inline.hpp | 3 ---
- 1 file changed, 3 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
-index 5ac1bf57f57..abd5bda7e49 100644
---- a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
-+++ b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
-@@ -230,8 +230,6 @@ inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
- 
- 
- // Compiled frames
--PRAGMA_DIAG_PUSH
--PRAGMA_NONNULL_IGNORED
- inline oop frame::saved_oop_result(RegisterMap* map) const {
-   oop* result_adr = (oop *)map->location(x10->as_VMReg());
-   guarantee(result_adr != NULL, "bad register save location");
-@@ -243,6 +241,5 @@ inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
-   guarantee(result_adr != NULL, "bad register save location");
-   *result_adr = obj;
- }
--PRAGMA_DIAG_POP
- 
- #endif // CPU_RISCV_FRAME_RISCV_INLINE_HPP
-
-From 14a46a85e65f6fec09ac566d49a6232216881adb Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 21:40:43 +0800
-Subject: [PATCH 119/140] Revert JDK-8230392: Define AArch64 as
- MULTI_COPY_ATOMIC
-
----
- src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp | 4 ----
- 1 file changed, 4 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
-index ffd420da024..606f0fa0da3 100644
---- a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
-@@ -33,10 +33,6 @@ const int StackAlignmentInBytes = 16;
- // 32-bit integer argument values are extended to 64 bits.
- const bool CCallingConventionRequiresIntsAsLongs = false;
- 
--// RISCV has adopted a multicopy atomic model closely following
--// that of ARMv8.
--#define CPU_MULTI_COPY_ATOMIC
--
- // To be safe, we deoptimize when we come across an access that needs
- // patching. This is similar to what is done on aarch64.
- #define DEOPTIMIZE_WHEN_PATCHING
-
-From 8740928267a831c62f1deb20c910e3c27716bc40 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 21:42:20 +0800
-Subject: [PATCH 120/140] Revert: JDK-8246689: Enable independent compressed
- oops/class ptrs on Aarch64 JDK-8241825: Make compressed oops and compressed
- class pointers independent (x86_64, PPC, S390)
-
----
- src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp | 2 --
- 1 file changed, 2 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
-index 606f0fa0da3..acdf75d324e 100644
---- a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
-@@ -41,6 +41,4 @@ const bool CCallingConventionRequiresIntsAsLongs = false;
- 
- #define SUPPORT_RESERVED_STACK_AREA
- 
--#define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS false
--
- #endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
-
-From 94b40f4efccc19c8ac66eda6c57381a222b02d2d Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 21:50:49 +0800
-Subject: [PATCH 121/140] Revert JDK-8222637: Obsolete NeedsDeoptSuspend
-
----
- src/hotspot/cpu/riscv/globals_riscv.hpp | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
-index b78f258a764..a838a377829 100644
---- a/src/hotspot/cpu/riscv/globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
-@@ -32,6 +32,8 @@
- // Sets the default values for platform dependent flags used by the runtime system.
- // (see globals.hpp)
- 
-+define_pd_global(bool, NeedsDeoptSuspend,        false); // only register window machines need this
++public class TestLibmIntrinsics {
 +
- define_pd_global(bool, ImplicitNullChecks,       true);  // Generate code for implicit null checks
- define_pd_global(bool, TrapBasedNullChecks,      false);
- define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs past to check cast
-
-From 09968c9fc102fd32bc628d3e6fd9d9adcbec4373 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 21:52:44 +0800
-Subject: [PATCH 122/140] Revert JDK-8220051: Remove global safepoint code
-
----
- src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
-index acdf75d324e..d6ce8da07b8 100644
---- a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
-@@ -41,4 +41,6 @@ const bool CCallingConventionRequiresIntsAsLongs = false;
- 
- #define SUPPORT_RESERVED_STACK_AREA
- 
-+#define THREAD_LOCAL_POLL
++    private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox();
 +
- #endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
-
-From 2f4fb2b5ac420d456421592dc09b81244636ba4d Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 22:00:52 +0800
-Subject: [PATCH 123/140] Revert JDK-8272873: C2: Inlining should not depend on
- absolute call site counts
-
----
- src/hotspot/cpu/riscv/globals_riscv.hpp | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
-index a838a377829..b4f71c45ec1 100644
---- a/src/hotspot/cpu/riscv/globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
-@@ -41,6 +41,7 @@ define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs
- define_pd_global(uintx, CodeCacheSegmentSize,    64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment.
- define_pd_global(intx, CodeEntryAlignment,       64);
- define_pd_global(intx, OptoLoopAlignment,        16);
-+define_pd_global(intx, InlineFrequencyCount,     100);
- 
- #define DEFAULT_STACK_YELLOW_PAGES (2)
- #define DEFAULT_STACK_RED_PAGES (1)
-
-From 2df3625eea16fc0d45c0e4cf12c9433f0ec070fd Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 22:02:13 +0800
-Subject: [PATCH 124/140] Revert JDK-8220049: Obsolete ThreadLocalHandshakes
-
----
- src/hotspot/cpu/riscv/globals_riscv.hpp | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
-index b4f71c45ec1..b7d85373c4a 100644
---- a/src/hotspot/cpu/riscv/globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
-@@ -76,6 +76,8 @@ define_pd_global(bool, CompactStrings, true);
- // Clear short arrays bigger than one word in an arch-specific way
- define_pd_global(intx, InitArrayShortSize, BytesPerLong);
- 
-+define_pd_global(bool, ThreadLocalHandshakes, true);
++    private static final double pi = 3.1415926;
 +
- define_pd_global(intx, InlineSmallCode,          1000);
- 
- #define ARCH_FLAGS(develop,                                                      \
-
-From a875c4caa423dd727cea1c891b17f4ded97e57d1 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 22:04:32 +0800
-Subject: [PATCH 125/140] Revert: JDK-8243208: Clean up JVMFlag implementation
- JDK-8236625: Remove writeable macro from JVM flags declaration
-
----
- src/hotspot/cpu/riscv/globals_riscv.hpp | 5 ++++-
- 1 file changed, 4 insertions(+), 1 deletion(-)
-
-diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
-index b7d85373c4a..0becd9efd35 100644
---- a/src/hotspot/cpu/riscv/globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
-@@ -82,9 +82,12 @@ define_pd_global(intx, InlineSmallCode,          1000);
- 
- #define ARCH_FLAGS(develop,                                                      \
-                    product,                                                      \
-+                   diagnostic,                                                   \
-+                   experimental,                                                 \
-                    notproduct,                                                   \
-                    range,                                                        \
--                   constraint)                                                   \
-+                   constraint,                                                   \
-+                   writeable)                                                    \
-                                                                                  \
-   product(bool, NearCpool, true,                                                 \
-          "constant pool is close to instructions")                               \
-
-From 19a9e6e8c3dba77cf8be0f25b1aec394aeca0b25 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Wed, 10 May 2023 09:44:12 +0800
-Subject: [PATCH 126/140] Revert JDK-8213436: Obsolete UseMembar &&
- JDK-8188764: Obsolete AssumeMP and then remove all support for non-MP builds,
- always enabled
-
----
- src/hotspot/cpu/riscv/globals_riscv.hpp    | 2 ++
- src/hotspot/cpu/riscv/vm_version_riscv.cpp | 2 ++
- 2 files changed, 4 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
-index 0becd9efd35..e820898d87f 100644
---- a/src/hotspot/cpu/riscv/globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
-@@ -64,6 +64,8 @@ define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES);
- define_pd_global(bool, RewriteBytecodes,     true);
- define_pd_global(bool, RewriteFrequentPairs, true);
- 
-+define_pd_global(bool, UseMembar,            true);
++    private static final double expected = 2.5355263553695413;
 +
- define_pd_global(bool, PreserveFramePointer, false);
- 
- // GC Ergo Flags
-diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-index 50ee7edb708..f13e4269b77 100644
---- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-@@ -139,6 +139,8 @@ void VM_Version::initialize() {
- #endif // COMPILER2
- 
-   UNSUPPORTED_OPTION(CriticalJNINatives);
++    static double m() {
++        return Math.pow(pi, Math.sin(Math.cos(Math.tan(Math.log(Math.log10(Math.exp(pi)))))));
++    }
 +
-+  FLAG_SET_DEFAULT(UseMembar, true);
- }
- 
- #ifdef COMPILER2
-
-From 0c4a9d1b6b3b3b31a1c105ff311414ae542764bb Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Mon, 1 May 2023 16:04:15 +0800
-Subject: [PATCH 127/140] Misc adaptations to jdk11u
-
----
- src/hotspot/cpu/riscv/c1_globals_riscv.hpp       |  2 +-
- .../linux_riscv/vm_version_linux_riscv.cpp       | 16 ++++++++--------
- 2 files changed, 9 insertions(+), 9 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-index 25e00bea901..9316d4be02e 100644
---- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-@@ -57,7 +57,7 @@ define_pd_global(uintx, CodeCacheMinBlockLength,     1);
- define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
- define_pd_global(uintx, MetaspaceSize,               12*M );
- define_pd_global(bool, NeverActAsServerClassMachine, true );
--define_pd_global(uint64_t, MaxRAM,                  1ULL*G);
-+define_pd_global(uint64_t, MaxRAM,                   1ULL*G);
- define_pd_global(bool, CICompileOSR,                 true );
- #endif // !TIERED
- define_pd_global(bool, UseTypeProfile,               false);
-diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
-index 4623dbfad42..60260854db6 100644
---- a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
-+++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
-@@ -83,14 +83,14 @@ void VM_Version::get_os_cpu_info() {
- 
-   uint64_t auxv = getauxval(AT_HWCAP);
- 
--  static_assert(CPU_I == HWCAP_ISA_I, "Flag CPU_I must follow Linux HWCAP");
--  static_assert(CPU_M == HWCAP_ISA_M, "Flag CPU_M must follow Linux HWCAP");
--  static_assert(CPU_A == HWCAP_ISA_A, "Flag CPU_A must follow Linux HWCAP");
--  static_assert(CPU_F == HWCAP_ISA_F, "Flag CPU_F must follow Linux HWCAP");
--  static_assert(CPU_D == HWCAP_ISA_D, "Flag CPU_D must follow Linux HWCAP");
--  static_assert(CPU_C == HWCAP_ISA_C, "Flag CPU_C must follow Linux HWCAP");
--  static_assert(CPU_V == HWCAP_ISA_V, "Flag CPU_V must follow Linux HWCAP");
--  static_assert(CPU_B == HWCAP_ISA_B, "Flag CPU_B must follow Linux HWCAP");
-+  STATIC_ASSERT(CPU_I == HWCAP_ISA_I);
-+  STATIC_ASSERT(CPU_M == HWCAP_ISA_M);
-+  STATIC_ASSERT(CPU_A == HWCAP_ISA_A);
-+  STATIC_ASSERT(CPU_F == HWCAP_ISA_F);
-+  STATIC_ASSERT(CPU_D == HWCAP_ISA_D);
-+  STATIC_ASSERT(CPU_C == HWCAP_ISA_C);
-+  STATIC_ASSERT(CPU_V == HWCAP_ISA_V);
-+  STATIC_ASSERT(CPU_B == HWCAP_ISA_B);
-   _features = auxv & (
-       HWCAP_ISA_I |
-       HWCAP_ISA_M |
-
-From 4ce5e05526029360ad15eb9639c9c05fac77ac8e Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sat, 20 May 2023 17:51:52 +0800
-Subject: [PATCH 128/140] Save all call-clobbered registers for spark tests may
- crash
-
----
- .../cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp    | 10 ++--------
- 1 file changed, 2 insertions(+), 8 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-index bc847388f68..e191cbcee2a 100644
---- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-@@ -157,21 +157,15 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
-   __ j(done);
- 
-   __ bind(runtime);
--  // save the live input values
--  RegSet saved = RegSet::of(pre_val);
--  if (tosca_live) { saved += RegSet::of(x10); }
--  if (obj != noreg) { saved += RegSet::of(obj); }
--
--  __ push_reg(saved, sp);
- 
-+  __ push_call_clobbered_registers();
-   if (expand_call) {
-     assert(pre_val != c_rarg1, "smashed arg");
-     __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
-   } else {
-     __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
-   }
--
--  __ pop_reg(saved, sp);
-+  __ pop_call_clobbered_registers();
- 
-   __ bind(done);
- 
-
-From 1b8778b0831571e9ac688bbd22afca4cf8f62407 Mon Sep 17 00:00:00 2001
-From: Kuai Wei <kuaiwei.kw@alibaba-inc.com>
-Date: Tue, 22 Aug 2023 16:17:31 +0800
-Subject: [PATCH 129/140] Build with gcc 13
-
----
- src/hotspot/cpu/riscv/macroAssembler_riscv.cpp        | 1 +
- src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp | 1 +
- 2 files changed, 2 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index 37ccf132986..fd18bb77058 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -49,6 +49,7 @@
- #include "runtime/thread.hpp"
- #ifdef COMPILER2
- #include "opto/compile.hpp"
-+#include "opto/intrinsicnode.hpp"
- #include "opto/node.hpp"
- #include "opto/output.hpp"
- #endif
-diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
-index 31d9254d8ad..ccceed643ed 100644
---- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
-+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
-@@ -24,6 +24,7 @@
-  */
- 
- #include "precompiled.hpp"
-+#include "memory/metaspaceShared.hpp"
- #include "runtime/frame.inline.hpp"
- #include "runtime/thread.inline.hpp"
- 
-
-From 4c23be6665aec94462e82e3b4adcf7abb5b23981 Mon Sep 17 00:00:00 2001
-From: Kuai Wei <kuaiwei.kw@alibaba-inc.com>
-Date: Tue, 5 Sep 2023 15:37:43 +0800
-Subject: [PATCH 130/140] Fix copyright information
-
----
- make/autoconf/build-aux/config.guess                            | 2 +-
- .../MyPackage/HeapMonitorEventsForTwoThreadsTest.java           | 1 +
- 2 files changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/make/autoconf/build-aux/config.guess b/make/autoconf/build-aux/config.guess
-index 15111d827ab..a88a9adec3f 100644
---- a/make/autoconf/build-aux/config.guess
-+++ b/make/autoconf/build-aux/config.guess
-@@ -1,6 +1,6 @@
- #!/bin/sh
- #
--# Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved.
-+# Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights reserved.
- # Copyright (c) 2021, Azul Systems, Inc. All rights reserved.
- # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- #
-diff --git a/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java b/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java
-index f0b7aed5ceb..54640b245f8 100644
---- a/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java
-+++ b/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java
-@@ -1,4 +1,5 @@
++    static public void main(String[] args) throws NoSuchMethodException {
++        Method test_method = compiler.floatingpoint.TestLibmIntrinsics.class.getDeclaredMethod("m");
++
++        double interpreter_result = m();
++
++        // Compile with C1 if possible
++        WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_SIMPLE);
++
++        double c1_result = m();
++
++        WHITE_BOX.deoptimizeMethod(test_method);
++
++        // Compile it with C2 if possible
++        WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION);
++
++        double c2_result = m();
++
++        if (interpreter_result != c1_result ||
++            interpreter_result != c2_result ||
++            c1_result != c2_result) {
++            System.out.println("interpreter = " + interpreter_result + " c1 = " + c1_result + " c2 = " + c2_result);
++            throw new RuntimeException("Test Failed");
++        }
++    }
++}
+diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
+index 558b4218f0..55374b116e 100644
+--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
+@@ -1,5 +1,5 @@
  /*
-+ * Copyright (c) 2018, Google and/or its affiliates. All rights reserved.
+- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-
-From 70a060f73c3617e58f881bcee19f1a3ce43f54ff Mon Sep 17 00:00:00 2001
-From: Chris Plummer <cjplummer@openjdk.org>
-Date: Thu, 2 Jul 2020 13:13:10 -0700
-Subject: [PATCH 131/140] 8247533: SA stack walking sometimes fails with
- sun.jvm.hotspot.debugger.DebuggerException: get_thread_regs failed for a lwp
-
-Reviewed-by: sspitsyn, ysuenaga, dtitov
----
- .../native/libsaproc/LinuxDebuggerLocal.c     |  8 ++++++-
- .../linux/native/libsaproc/ps_proc.c          |  3 ++-
- .../native/libsaproc/MacosxDebuggerLocal.m    | 24 ++++++++++++-------
- .../debugger/bsd/BsdDebuggerLocal.java        |  2 +-
- .../jvm/hotspot/debugger/bsd/BsdThread.java   | 10 +++++---
- .../debugger/linux/LinuxDebuggerLocal.java    |  2 +-
- .../hotspot/debugger/linux/LinuxThread.java   | 10 +++++---
- .../windbg/amd64/WindbgAMD64Thread.java       | 15 ++++++++----
- .../windows/native/libsaproc/sawindbg.cpp     | 14 ++++++++---
- 9 files changed, 61 insertions(+), 27 deletions(-)
-
-diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
-index 45a927fb5ee..6f1887f8113 100644
---- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
-+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
-@@ -413,7 +413,13 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
- 
-   struct ps_prochandle* ph = get_proc_handle(env, this_obj);
-   if (get_lwp_regs(ph, lwp_id, &gregs) != true) {
--     THROW_NEW_DEBUGGER_EXCEPTION_("get_thread_regs failed for a lwp", 0);
-+    // This is not considered fatal and does happen on occassion, usually with an
-+    // ESRCH error. The root cause is not fully understood, but by ignoring this error
-+    // and returning NULL, stacking walking code will get null registers and fallback
-+    // to using the "last java frame" if setup.
-+    fprintf(stdout, "WARNING: getThreadIntegerRegisterSet0: get_lwp_regs failed for lwp (%d)\n", lwp_id);
-+    fflush(stdout);
-+    return NULL;
-   }
- 
- #undef NPRGREG
-diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
-index de5254d859e..691c3f6684a 100644
---- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
-+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
-@@ -144,7 +144,8 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use
- 
- #ifdef PTRACE_GETREGS_REQ
-  if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) {
--   print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp %d\n", pid);
-+   print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp(%d) errno(%d) \"%s\"\n", pid,
-+               errno, strerror(errno));
-    return false;
-  }
-  return true;
-diff --git a/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m b/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m
-index 18b8b4282fe..e46370a1f18 100644
---- a/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m
-+++ b/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m
-@@ -685,7 +685,7 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo
-   JNIEnv *env, jobject this_obj,
-   jlong thread_id)
- {
--  print_debug("getThreadRegisterSet0 called\n");
-+  print_debug("getThreadIntegerRegisterSet0 called\n");
- 
-   struct ps_prochandle* ph = get_proc_handle(env, this_obj);
-   if (ph != NULL && ph->core != NULL) {
-@@ -705,7 +705,13 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo
-   result = thread_get_state(tid, HSDB_THREAD_STATE, (thread_state_t)&state, &count);
- 
-   if (result != KERN_SUCCESS) {
--    print_error("getregs: thread_get_state(%d) failed (%d)\n", tid, result);
-+    // This is not considered fatal. Unlike on Linux and Windows, we haven't seen a
-+    // failure to get thread registers, but if it were to fail the response should
-+    // be the same. By ignoring this error and returning NULL, stacking walking code
-+    // will get null registers and fallback to using the "last java frame" if setup.
-+    fprintf(stdout, "WARNING: getThreadIntegerRegisterSet0: thread_get_state failed (%d) for thread (%d)\n",
-+            result, tid);
-+    fflush(stdout);
-     return NULL;
-   }
- 
-@@ -808,25 +814,25 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo
-  */
- JNIEXPORT jint JNICALL
- Java_sun_jvm_hotspot_debugger_macosx_MacOSXDebuggerLocal_translateTID0(
--  JNIEnv *env, jobject this_obj, jint tid) 
-+  JNIEnv *env, jobject this_obj, jint tid)
- {
-   print_debug("translateTID0 called on tid = 0x%x\n", (int)tid);
- 
-   kern_return_t result;
-   thread_t foreign_tid, usable_tid;
-   mach_msg_type_name_t type;
--  
-+
-   foreign_tid = tid;
--    
-+
-   task_t gTask = getTask(env, this_obj);
--  result = mach_port_extract_right(gTask, foreign_tid, 
--				   MACH_MSG_TYPE_COPY_SEND, 
-+  result = mach_port_extract_right(gTask, foreign_tid,
-+				   MACH_MSG_TYPE_COPY_SEND,
- 				   &usable_tid, &type);
-   if (result != KERN_SUCCESS)
-     return -1;
--    
-+
-   print_debug("translateTID0: 0x%x -> 0x%x\n", foreign_tid, usable_tid);
--    
-+
-   return (jint) usable_tid;
- }
+@@ -42,6 +42,7 @@ package compiler.intrinsics.sha.cli;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
+ import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
  
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java
-index 655b450c3fc..d0557a7d254 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java
-@@ -166,7 +166,7 @@ public WorkerThreadTask execute(WorkerThreadTask task) throws DebuggerException
-                 } catch (InterruptedException x) {}
-              }
-              if (lastException != null) {
--                throw new DebuggerException(lastException);
-+                throw new DebuggerException(lastException.getMessage(), lastException);
-              } else {
-                 return task;
-              }
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java
-index 0d637f30f14..c52d3a51d54 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java
+@@ -54,6 +55,8 @@ public class TestUseSHA1IntrinsicsOptionOnUnsupportedCPU {
+                         SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
+                 new GenericTestCaseForUnsupportedAArch64CPU(
+                         SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
++                new GenericTestCaseForUnsupportedRISCV64CPU(
++                        SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
+                 new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU(
+                         SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
+                 new GenericTestCaseForOtherCPU(
+diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
+index 3ed72bf0a9..8fb82ee453 100644
+--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
 @@ -1,5 +1,5 @@
  /*
-- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -67,8 +67,12 @@ public String toString() {
-     public ThreadContext getContext() throws IllegalThreadStateException {
-         long[] data = debugger.getThreadIntegerRegisterSet(unique_thread_id);
-         ThreadContext context = BsdThreadContextFactory.createThreadContext(debugger);
--        for (int i = 0; i < data.length; i++) {
--            context.setRegister(i, data[i]);
-+        // null means we failed to get the register set for some reason. The caller
-+        // is responsible for dealing with the set of null registers in that case.
-+        if (data != null) {
-+            for (int i = 0; i < data.length; i++) {
-+                context.setRegister(i, data[i]);
-+            }
-         }
-         return context;
-     }
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java
-index cb6712b58ee..6a0648f508a 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java
-@@ -173,7 +173,7 @@ public WorkerThreadTask execute(WorkerThreadTask task) throws DebuggerException
-                 } catch (InterruptedException x) {}
-              }
-              if (lastException != null) {
--                throw new DebuggerException(lastException);
-+                throw new DebuggerException(lastException.getMessage(), lastException);
-              } else {
-                 return task;
-              }
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java
-index 52307b9cdcf..3fe795d34bc 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java
+@@ -42,6 +42,7 @@ package compiler.intrinsics.sha.cli;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
+ import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
+ 
+@@ -54,6 +55,8 @@ public class TestUseSHA256IntrinsicsOptionOnUnsupportedCPU {
+                         SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
+                 new GenericTestCaseForUnsupportedAArch64CPU(
+                         SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
++                new GenericTestCaseForUnsupportedRISCV64CPU(
++                        SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
+                 new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU(
+                         SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
+                 new GenericTestCaseForOtherCPU(
+diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
+index c05cf309da..aca32137ed 100644
+--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
 @@ -1,5 +1,5 @@
  /*
-- * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -73,8 +73,12 @@ public String toString() {
-     public ThreadContext getContext() throws IllegalThreadStateException {
-         long[] data = debugger.getThreadIntegerRegisterSet(lwp_id);
-         ThreadContext context = LinuxThreadContextFactory.createThreadContext(debugger);
--        for (int i = 0; i < data.length; i++) {
--            context.setRegister(i, data[i]);
-+        // null means we failed to get the register set for some reason. The caller
-+        // is responsible for dealing with the set of null registers in that case.
-+        if (data != null) {
-+            for (int i = 0; i < data.length; i++) {
-+                context.setRegister(i, data[i]);
-+            }
-         }
-         return context;
-     }
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java
-index ec5aea35e8c..377650a0a1c 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java
+@@ -42,6 +42,7 @@ package compiler.intrinsics.sha.cli;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
+ import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
+ 
+@@ -54,6 +55,8 @@ public class TestUseSHA512IntrinsicsOptionOnUnsupportedCPU {
+                         SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
+                 new GenericTestCaseForUnsupportedAArch64CPU(
+                         SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
++                new GenericTestCaseForUnsupportedRISCV64CPU(
++                        SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
+                 new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU(
+                         SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
+                 new GenericTestCaseForOtherCPU(
+diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
+index 58ce5366ba..8deac4f789 100644
+--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
 @@ -1,5 +1,5 @@
  /*
-- * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -30,9 +30,9 @@
- 
- class WindbgAMD64Thread implements ThreadProxy {
-   private WindbgDebugger debugger;
--  private long           sysId;
-+  private long           sysId; // SystemID for Windows thread, stored in OSThread::_thread_id
-   private boolean        gotID;
--  private long           id;
-+  private long           id;    // ThreadID for Windows thread,  returned by GetThreadIdBySystemId
- 
-   // The address argument must be the address of the OSThread::_thread_id
-   WindbgAMD64Thread(WindbgDebugger debugger, Address addr) {
-@@ -50,8 +50,12 @@ class WindbgAMD64Thread implements ThreadProxy {
-   public ThreadContext getContext() throws IllegalThreadStateException {
-     long[] data = debugger.getThreadIntegerRegisterSet(getThreadID());
-     WindbgAMD64ThreadContext context = new WindbgAMD64ThreadContext(debugger);
--    for (int i = 0; i < data.length; i++) {
--      context.setRegister(i, data[i]);
-+    // null means we failed to get the register set for some reason. The caller
-+    // is responsible for dealing with the set of null registers in that case.
-+    if (data != null) {
-+        for (int i = 0; i < data.length; i++) {
-+            context.setRegister(i, data[i]);
-+        }
-     }
-     return context;
-   }
-@@ -86,6 +90,7 @@ public String toString() {
-   private long getThreadID() {
-     if (!gotID) {
-        id = debugger.getThreadIdFromSysId(sysId);
-+       gotID = true;
-     }
- 
-     return id;
-diff --git a/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp b/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp
-index 314cf69c957..e3b218b4dae 100644
---- a/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp
-+++ b/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp
-@@ -45,6 +45,7 @@
- 
- #include <limits.h>
- #include <windows.h>
-+#include <inttypes.h>
- 
- #define DEBUG_NO_IMPLEMENTATION
- #include <dbgeng.h>
-@@ -765,9 +766,16 @@ JNIEXPORT jlong JNICALL Java_sun_jvm_hotspot_debugger_windbg_WindbgDebuggerLocal
-   CHECK_EXCEPTION_(0);
- 
-   ULONG id = 0;
--  COM_VERIFY_OK_(ptrIDebugSystemObjects->GetThreadIdBySystemId((ULONG)sysId, &id),
--                 "Windbg Error: GetThreadIdBySystemId failed!", 0);
--
-+  HRESULT hr = ptrIDebugSystemObjects->GetThreadIdBySystemId((ULONG)sysId, &id);
-+  if (hr != S_OK) {
-+    // This is not considered fatal and does happen on occassion, usually with an
-+    // 0x80004002 "No such interface supported". The root cause is not fully understood,
-+    // but by ignoring this error and returning NULL, stacking walking code will get
-+    // null registers and fallback to using the "last java frame" if setup.
-+   printf("WARNING: GetThreadIdBySystemId failed with 0x%x for sysId (%" PRIu64 ")\n",
-+           hr, sysId);
-+    return -1;
-+  }
-   return (jlong) id;
- }
- 
-
-From 2cadd133d25e05be6ab9b16024a37bed79af1f15 Mon Sep 17 00:00:00 2001
-From: Xiaolin Zheng <xlinzheng@openjdk.org>
-Date: Wed, 30 Mar 2022 09:04:55 +0000
-Subject: [PATCH 132/140] 8283737: riscv: MacroAssembler::stop() should emit
- fixed-length instruction sequence
-
-Reviewed-by: fyang, shade
----
- src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 7 +++++--
- 1 file changed, 5 insertions(+), 2 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index fd18bb77058..b72a553da2f 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -542,8 +542,11 @@ void MacroAssembler::resolve_jobject(Register value, Register thread, Register t
- void MacroAssembler::stop(const char* msg) {
-   address ip = pc();
-   pusha();
--  li(c_rarg0, (uintptr_t)(address)msg);
--  li(c_rarg1, (uintptr_t)(address)ip);
-+  // The length of the instruction sequence emitted should be independent
-+  // of the values of msg and ip so that the size of mach nodes for scratch
-+  // emit and normal emit matches.
-+  mv(c_rarg0, (address)msg);
-+  mv(c_rarg1, (address)ip);
-   mv(c_rarg2, sp);
-   mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64));
-   jalr(c_rarg3);
-
-From 729e0db14cb320aedf1f12051e667513bddbb8e8 Mon Sep 17 00:00:00 2001
-From: Xiaolin Zheng <xlinzheng@openjdk.org>
-Date: Sun, 24 Apr 2022 02:17:03 +0000
-Subject: [PATCH 133/140] 8285437: riscv: Fix MachNode size mismatch for
- MacroAssembler::verify_oops*
-
-Reviewed-by: shade, fyang
----
- src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 10 ++++++++--
- 1 file changed, 8 insertions(+), 2 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index b72a553da2f..9f80f7e2650 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -389,7 +389,10 @@ void MacroAssembler::verify_oop(Register reg, const char* s) {
-   push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
- 
-   mv(c_rarg0, reg); // c_rarg0 : x10
--  li(t0, (uintptr_t)(address)b);
-+  // The length of the instruction sequence emitted should be independent
-+  // of the values of the local char buffer address so that the size of mach
-+  // nodes for scratch emit and normal emit matches.
-+  mv(t0, (address)b);
- 
-   // call indirectly to solve generation ordering problem
-   int32_t offset = 0;
-@@ -425,7 +428,10 @@ void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
-     ld(x10, addr);
-   }
- 
--  li(t0, (uintptr_t)(address)b);
-+  // The length of the instruction sequence emitted should be independent
-+  // of the values of the local char buffer address so that the size of mach
-+  // nodes for scratch emit and normal emit matches.
-+  mv(t0, (address)b);
- 
-   // call indirectly to solve generation ordering problem
-   int32_t offset = 0;
-
-From 5cab06c6f09f4b62d54d8d291b1a23f796a085c1 Mon Sep 17 00:00:00 2001
-From: Xiaolin Zheng <xlinzheng@openjdk.org>
-Date: Mon, 30 May 2022 07:45:50 +0000
-Subject: [PATCH 134/140] 8287418: riscv: Fix correctness issue of
- MacroAssembler::movptr
-
-Reviewed-by: fjiang, yadongwang, fyang
----
- src/hotspot/cpu/riscv/assembler_riscv.cpp      | 14 +++++++-------
- src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 18 +++++++++---------
- src/hotspot/cpu/riscv/macroAssembler_riscv.hpp |  3 ++-
- src/hotspot/cpu/riscv/nativeInst_riscv.cpp     |  2 +-
- 4 files changed, 19 insertions(+), 18 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/assembler_riscv.cpp b/src/hotspot/cpu/riscv/assembler_riscv.cpp
-index f15ef5304c5..a5f688cda1f 100644
---- a/src/hotspot/cpu/riscv/assembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/assembler_riscv.cpp
-@@ -282,9 +282,9 @@ void Assembler::movptr_with_offset(Register Rd, address addr, int32_t &offset) {
-   }
- #endif
-   assert(is_unsigned_imm_in_range(imm64, 47, 0) || (imm64 == (uintptr_t)-1),
--         "48-bit overflow in address constant");
--  // Load upper 32 bits
--  int32_t imm = imm64 >> 16;
-+         "bit 47 overflows in address constant");
-+  // Load upper 31 bits
-+  int32_t imm = imm64 >> 17;
-   int64_t upper = imm, lower = imm;
-   lower = (lower << 52) >> 52;
-   upper -= lower;
-@@ -292,13 +292,13 @@ void Assembler::movptr_with_offset(Register Rd, address addr, int32_t &offset) {
-   lui(Rd, upper);
-   addi(Rd, Rd, lower);
- 
--  // Load the rest 16 bits.
-+  // Load the rest 17 bits.
-   slli(Rd, Rd, 11);
--  addi(Rd, Rd, (imm64 >> 5) & 0x7ff);
--  slli(Rd, Rd, 5);
-+  addi(Rd, Rd, (imm64 >> 6) & 0x7ff);
-+  slli(Rd, Rd, 6);
- 
-   // This offset will be used by following jalr/ld.
--  offset = imm64 & 0x1f;
-+  offset = imm64 & 0x3f;
- }
- 
- void Assembler::movptr(Register Rd, uintptr_t imm64) {
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index 9f80f7e2650..f592d7585da 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -1158,12 +1158,12 @@ static int patch_offset_in_pc_relative(address branch, int64_t offset) {
- 
- static int patch_addr_in_movptr(address branch, address target) {
-   const int MOVPTR_INSTRUCTIONS_NUM = 6;                                        // lui + addi + slli + addi + slli + addi/jalr/load
--  int32_t lower = ((intptr_t)target << 36) >> 36;
--  int64_t upper = ((intptr_t)target - lower) >> 28;
--  Assembler::patch(branch + 0,  31, 12, upper & 0xfffff);                       // Lui.             target[47:28] + target[27] ==> branch[31:12]
--  Assembler::patch(branch + 4,  31, 20, (lower >> 16) & 0xfff);                 // Addi.            target[27:16] ==> branch[31:20]
--  Assembler::patch(branch + 12, 31, 20, (lower >> 5) & 0x7ff);                  // Addi.            target[15: 5] ==> branch[31:20]
--  Assembler::patch(branch + 20, 31, 20, lower & 0x1f);                          // Addi/Jalr/Load.  target[ 4: 0] ==> branch[31:20]
-+  int32_t lower = ((intptr_t)target << 35) >> 35;
-+  int64_t upper = ((intptr_t)target - lower) >> 29;
-+  Assembler::patch(branch + 0,  31, 12, upper & 0xfffff);                       // Lui.             target[48:29] + target[28] ==> branch[31:12]
-+  Assembler::patch(branch + 4,  31, 20, (lower >> 17) & 0xfff);                 // Addi.            target[28:17] ==> branch[31:20]
-+  Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff);                  // Addi.            target[16: 6] ==> branch[31:20]
-+  Assembler::patch(branch + 20, 31, 20, lower & 0x3f);                          // Addi/Jalr/Load.  target[ 5: 0] ==> branch[31:20]
-   return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
- }
- 
-@@ -1235,9 +1235,9 @@ static long get_offset_of_pc_relative(address insn_addr) {
- 
- static address get_target_of_movptr(address insn_addr) {
-   assert_cond(insn_addr != NULL);
--  intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 28;    // Lui.
--  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 16;                        // Addi.
--  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 5;                         // Addi.
-+  intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 29;    // Lui.
-+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 17;                        // Addi.
-+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 6;                         // Addi.
-   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20));                              // Addi/Jalr/Load.
-   return (address) target_address;
- }
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index 45ffc663963..792c1fc2103 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -827,7 +827,8 @@ class MacroAssembler: public Assembler {
- 
-   // Return true if an address is within the 48-bit RISCV64 address space.
-   bool is_valid_riscv64_address(address addr) {
--    return ((uintptr_t)addr >> 48) == 0;
-+    // sv48: must have bits 63–48 all equal to bit 47
-+    return ((uintptr_t)addr >> 47) == 0;
-   }
- 
-   void ld_constant(Register dest, const Address &const_addr) {
-diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
-index bfe84fa4e30..27011ad1283 100644
---- a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
-@@ -89,7 +89,7 @@ bool NativeInstruction::is_movptr_at(address instr) {
-          is_addi_at(instr + instruction_size) && // Addi
-          is_slli_shift_at(instr + instruction_size * 2, 11) && // Slli Rd, Rs, 11
-          is_addi_at(instr + instruction_size * 3) && // Addi
--         is_slli_shift_at(instr + instruction_size * 4, 5) && // Slli Rd, Rs, 5
-+         is_slli_shift_at(instr + instruction_size * 4, 6) && // Slli Rd, Rs, 6
-          (is_addi_at(instr + instruction_size * 5) ||
-           is_jalr_at(instr + instruction_size * 5) ||
-           is_load_at(instr + instruction_size * 5)) && // Addi/Jalr/Load
-
-From 41d73298bf28473b3ba2483e61a39c188eddfde3 Mon Sep 17 00:00:00 2001
-From: Kuai Wei <kuaiwei.kw@alibaba-inc.com>
-Date: Fri, 22 Sep 2023 16:57:56 +0800
-Subject: [PATCH 135/140] Fix: Fixed-length mv() mistakenly redirected to li()
- during reshaping
-
----
- src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 6 ++++++
- src/hotspot/cpu/riscv/macroAssembler_riscv.hpp | 3 +--
- 2 files changed, 7 insertions(+), 2 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index f592d7585da..f851cc1e413 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -1343,6 +1343,12 @@ void MacroAssembler::mv(Register Rd, Address dest) {
-   movptr(Rd, dest.target());
- }
- 
-+void MacroAssembler::mv(Register Rd, address addr) {
-+  // Here in case of use with relocation, use fix length instruction
-+  // movptr instead of li
-+  movptr(Rd, addr);
-+}
-+
- void MacroAssembler::mv(Register Rd, RegisterOrConstant src) {
-   if (src.is_register()) {
-     mv(Rd, src.as_register());
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index 792c1fc2103..65f91532661 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -540,8 +540,6 @@ class MacroAssembler: public Assembler {
-   }
- 
-   // mv
--  void mv(Register Rd, address addr)                    { li(Rd, (int64_t)addr);  }
--
-   inline void mv(Register Rd, int imm64)                { li(Rd, (int64_t)imm64); }
-   inline void mv(Register Rd, long imm64)               { li(Rd, (int64_t)imm64); }
-   inline void mv(Register Rd, long long imm64)          { li(Rd, (int64_t)imm64); }
-@@ -552,6 +550,7 @@ class MacroAssembler: public Assembler {
-   inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); }
- 
-   void mv(Register Rd, Address dest);
-+  void mv(Register Rd, address dest);
-   void mv(Register Rd, RegisterOrConstant src);
- 
-   // logic
-
-From 26f4b26a98507ec03a2329bfcbaab393247fe83f Mon Sep 17 00:00:00 2001
-From: Xiaolin Zheng <xlinzheng@openjdk.org>
-Date: Fri, 2 Sep 2022 07:01:02 +0000
-Subject: [PATCH 136/140] 8293100: RISC-V: Need to save and restore
- callee-saved FloatRegisters in StubGenerator::generate_call_stub
-
-Reviewed-by: yadongwang, fjiang, shade, vkempik
----
- src/hotspot/cpu/riscv/frame_riscv.hpp         |  2 +-
- src/hotspot/cpu/riscv/riscv.ad                | 18 ++---
- src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 74 +++++++++++++++++--
- src/hotspot/cpu/riscv/vmreg_riscv.cpp         |  2 +-
- 4 files changed, 80 insertions(+), 16 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp
-index 3b88f6d5a1a..18e021dcb94 100644
---- a/src/hotspot/cpu/riscv/frame_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/frame_riscv.hpp
-@@ -131,7 +131,7 @@
-     // Entry frames
-     // n.b. these values are determined by the layout defined in
-     // stubGenerator for the Java call stub
--    entry_frame_after_call_words                     =  22,
-+    entry_frame_after_call_words                     =  34,
-     entry_frame_call_wrapper_offset                  = -10,
- 
-     // we don't need a save area
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index e410bd06aa6..69696b272a5 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -8601,7 +8601,7 @@ instruct cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
-   effect(USE lbl);
- 
-   ins_cost(XFER_COST + BRANCH_COST);
--  format %{ "float_b$cmp $op1, $op2 \t#@cmpF_branch"%}
-+  format %{ "float_b$cmp $op1, $op2, $lbl \t#@cmpF_branch"%}
- 
-   ins_encode %{
-     __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label));
-@@ -8618,7 +8618,7 @@ instruct cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
-   effect(USE lbl);
- 
-   ins_cost(XFER_COST + BRANCH_COST);
--  format %{ "float_b$cmp $op1, $op2\t#@cmpF_loop"%}
-+  format %{ "float_b$cmp $op1, $op2, $lbl\t#@cmpF_loop"%}
- 
-   ins_encode %{
-     __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label));
-@@ -8636,7 +8636,7 @@ instruct cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
-   effect(USE lbl);
- 
-   ins_cost(XFER_COST + BRANCH_COST);
--  format %{ "double_b$cmp $op1, $op2\t#@cmpD_branch"%}
-+  format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_branch"%}
- 
-   ins_encode %{
-     __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-@@ -8654,7 +8654,7 @@ instruct cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
-   effect(USE lbl);
- 
-   ins_cost(XFER_COST + BRANCH_COST);
--  format %{ "double_b$cmp $op1, $op2\t#@cmpD_loop"%}
-+  format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_loop"%}
- 
-   ins_encode %{
-     __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-@@ -8929,7 +8929,7 @@ instruct far_cmpFlag_branch(cmpOp cmp, rFlagsReg cr, label lbl) %{
-   effect(USE lbl);
- 
-   ins_cost(BRANCH_COST);
--  format %{ "far_b$cmp $cr, zr, L\t#@far_cmpFlag_branch"%}
-+  format %{ "far_b$cmp $cr, zr, $lbl\t#@far_cmpFlag_branch"%}
- 
-   ins_encode %{
-     __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label), /* is_far */ true);
-@@ -9138,7 +9138,7 @@ instruct far_cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
-   effect(USE lbl);
- 
-   ins_cost(XFER_COST + BRANCH_COST * 2);
--  format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_branch"%}
-+  format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_branch"%}
- 
-   ins_encode %{
-     __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
-@@ -9154,7 +9154,7 @@ instruct far_cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
-   effect(USE lbl);
- 
-   ins_cost(XFER_COST + BRANCH_COST * 2);
--  format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_loop"%}
-+  format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_loop"%}
- 
-   ins_encode %{
-     __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
-@@ -9171,7 +9171,7 @@ instruct far_cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
-   effect(USE lbl);
- 
-   ins_cost(XFER_COST + BRANCH_COST * 2);
--  format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_branch"%}
-+  format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_branch"%}
- 
-   ins_encode %{
-     __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-@@ -9187,7 +9187,7 @@ instruct far_cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
-   effect(USE lbl);
- 
-   ins_cost(XFER_COST + BRANCH_COST * 2);
--  format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_loop"%}
-+  format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_loop"%}
- 
-   ins_encode %{
-     __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-index 74c38c3d044..9970229c5c5 100644
---- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-@@ -118,16 +118,28 @@ class StubGenerator: public StubCodeGenerator {
-   // we don't need to save x6-x7 and x28-x31 which both C and Java treat as
-   // volatile
-   //
--  // we save x18-x27 which Java uses as temporary registers and C
--  // expects to be callee-save
-+  // we save x9, x18-x27, f8-f9, and f18-f27 which Java uses as temporary
-+  // registers and C expects to be callee-save
-   //
-   // so the stub frame looks like this when we enter Java code
-   //
-   //     [ return_from_Java     ] <--- sp
-   //     [ argument word n      ]
-   //      ...
--  // -22 [ argument word 1      ]
--  // -21 [ saved x27            ] <--- sp_after_call
-+  // -34 [ argument word 1      ]
-+  // -33 [ saved f27            ] <--- sp_after_call
-+  // -32 [ saved f26            ]
-+  // -31 [ saved f25            ]
-+  // -30 [ saved f24            ]
-+  // -29 [ saved f23            ]
-+  // -28 [ saved f22            ]
-+  // -27 [ saved f21            ]
-+  // -26 [ saved f20            ]
-+  // -25 [ saved f19            ]
-+  // -24 [ saved f18            ]
-+  // -23 [ saved f9             ]
-+  // -22 [ saved f8             ]
-+  // -21 [ saved x27            ]
-   // -20 [ saved x26            ]
-   // -19 [ saved x25            ]
-   // -18 [ saved x24            ]
-@@ -152,7 +164,20 @@ class StubGenerator: public StubCodeGenerator {
- 
-   // Call stub stack layout word offsets from fp
-   enum call_stub_layout {
--    sp_after_call_off  = -21,
-+    sp_after_call_off  = -33,
-+
-+    f27_off            = -33,
-+    f26_off            = -32,
-+    f25_off            = -31,
-+    f24_off            = -30,
-+    f23_off            = -29,
-+    f22_off            = -28,
-+    f21_off            = -27,
-+    f20_off            = -26,
-+    f19_off            = -25,
-+    f18_off            = -24,
-+    f9_off             = -23,
-+    f8_off             = -22,
- 
-     x27_off            = -21,
-     x26_off            = -20,
-@@ -198,6 +223,19 @@ class StubGenerator: public StubCodeGenerator {
- 
-     const Address thread        (fp, thread_off         * wordSize);
- 
-+    const Address f27_save      (fp, f27_off            * wordSize);
-+    const Address f26_save      (fp, f26_off            * wordSize);
-+    const Address f25_save      (fp, f25_off            * wordSize);
-+    const Address f24_save      (fp, f24_off            * wordSize);
-+    const Address f23_save      (fp, f23_off            * wordSize);
-+    const Address f22_save      (fp, f22_off            * wordSize);
-+    const Address f21_save      (fp, f21_off            * wordSize);
-+    const Address f20_save      (fp, f20_off            * wordSize);
-+    const Address f19_save      (fp, f19_off            * wordSize);
-+    const Address f18_save      (fp, f18_off            * wordSize);
-+    const Address f9_save       (fp, f9_off             * wordSize);
-+    const Address f8_save       (fp, f8_off             * wordSize);
-+
-     const Address x27_save      (fp, x27_off            * wordSize);
-     const Address x26_save      (fp, x26_off            * wordSize);
-     const Address x25_save      (fp, x25_off            * wordSize);
-@@ -244,6 +282,19 @@ class StubGenerator: public StubCodeGenerator {
-     __ sd(x26, x26_save);
-     __ sd(x27, x27_save);
+@@ -41,6 +41,7 @@ package compiler.intrinsics.sha.cli;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
+ import compiler.intrinsics.sha.cli.testcases.UseSHASpecificTestCaseForUnsupportedCPU;
  
-+    __ fsd(f8,  f8_save);
-+    __ fsd(f9,  f9_save);
-+    __ fsd(f18, f18_save);
-+    __ fsd(f19, f19_save);
-+    __ fsd(f20, f20_save);
-+    __ fsd(f21, f21_save);
-+    __ fsd(f22, f22_save);
-+    __ fsd(f23, f23_save);
-+    __ fsd(f24, f24_save);
-+    __ fsd(f25, f25_save);
-+    __ fsd(f26, f26_save);
-+    __ fsd(f27, f27_save);
-+
-     // install Java thread in global register now we have saved
-     // whatever value it held
-     __ mv(xthread, c_rarg7);
-@@ -335,6 +386,19 @@ class StubGenerator: public StubCodeGenerator {
- #endif
+@@ -53,6 +54,8 @@ public class TestUseSHAOptionOnUnsupportedCPU {
+                         SHAOptionsBase.USE_SHA_OPTION),
+                 new GenericTestCaseForUnsupportedAArch64CPU(
+                         SHAOptionsBase.USE_SHA_OPTION),
++                new GenericTestCaseForUnsupportedRISCV64CPU(
++                        SHAOptionsBase.USE_SHA_OPTION),
+                 new UseSHASpecificTestCaseForUnsupportedCPU(
+                         SHAOptionsBase.USE_SHA_OPTION),
+                 new GenericTestCaseForOtherCPU(
+diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
+index faa9fdbae6..2663500204 100644
+--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -32,26 +32,27 @@ import jdk.test.lib.cli.predicate.OrPredicate;
  
-     // restore callee-save registers
-+    __ fld(f27, f27_save);
-+    __ fld(f26, f26_save);
-+    __ fld(f25, f25_save);
-+    __ fld(f24, f24_save);
-+    __ fld(f23, f23_save);
-+    __ fld(f22, f22_save);
-+    __ fld(f21, f21_save);
-+    __ fld(f20, f20_save);
-+    __ fld(f19, f19_save);
-+    __ fld(f18, f18_save);
-+    __ fld(f9,  f9_save);
-+    __ fld(f8,  f8_save);
-+
-     __ ld(x27, x27_save);
-     __ ld(x26, x26_save);
-     __ ld(x25, x25_save);
-diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
-index 5d1187c2a27..c4338715f95 100644
---- a/src/hotspot/cpu/riscv/vmreg_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
-@@ -40,7 +40,7 @@ void VMRegImpl::set_regName() {
-   FloatRegister freg = ::as_FloatRegister(0);
-   for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
-     for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) {
--      regName[i++] = reg->name();
-+      regName[i++] = freg->name();
+ /**
+  * Generic test case for SHA-related options targeted to any CPU except
+- * AArch64, PPC, S390x, SPARC and X86.
++ * AArch64, RISCV64, PPC, S390x, SPARC and X86.
+  */
+ public class GenericTestCaseForOtherCPU extends
+         SHAOptionsBase.TestCase {
+     public GenericTestCaseForOtherCPU(String optionName) {
+-        // Execute the test case on any CPU except AArch64, PPC, S390x, SPARC and X86.
++        // Execute the test case on any CPU except AArch64, RISCV64, PPC, S390x, SPARC and X86.
+         super(optionName, new NotPredicate(
+                               new OrPredicate(Platform::isAArch64,
++                              new OrPredicate(Platform::isRISCV64,
+                               new OrPredicate(Platform::isS390x,
+                               new OrPredicate(Platform::isSparc,
+                               new OrPredicate(Platform::isPPC,
+                               new OrPredicate(Platform::isX64,
+-                                              Platform::isX86)))))));
++                                              Platform::isX86))))))));
      }
-     freg = freg->successor();
-   }
-
-From 69ea557c320ad7b2f35fc0e986af9b485f95addf Mon Sep 17 00:00:00 2001
-From: Xiaolin Zheng <xlinzheng@openjdk.org>
-Date: Fri, 28 Oct 2022 11:56:21 +0000
-Subject: [PATCH 137/140] 8295926: RISC-V: C1: Fix
- LIRGenerator::do_LibmIntrinsic
-
-Reviewed-by: yadongwang, fyang
----
- .../cpu/riscv/c1_LIRGenerator_riscv.cpp       | 21 +++--
- .../floatingpoint/TestLibmIntrinsics.java     | 80 +++++++++++++++++++
- 2 files changed, 96 insertions(+), 5 deletions(-)
- create mode 100644 test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-index f9242251491..c41819fc2ae 100644
---- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-@@ -679,19 +679,30 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
- void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
-   LIRItem value(x->argument_at(0), this);
-   value.set_destroys_register();
-+
-   LIR_Opr calc_result = rlock_result(x);
-   LIR_Opr result_reg = result_register_for(x->type());
-+
-   CallingConvention* cc = NULL;
--  BasicTypeList signature(1);
--  signature.append(T_DOUBLE);
--  if (x->id() == vmIntrinsics::_dpow) { signature.append(T_DOUBLE); }
--  cc = frame_map()->c_calling_convention(&signature);
--  value.load_item_force(cc->at(0));
-+
-   if (x->id() == vmIntrinsics::_dpow) {
-     LIRItem value1(x->argument_at(1), this);
-+
-     value1.set_destroys_register();
-+
-+    BasicTypeList signature(2);
-+    signature.append(T_DOUBLE);
-+    signature.append(T_DOUBLE);
-+    cc = frame_map()->c_calling_convention(&signature);
-+    value.load_item_force(cc->at(0));
-     value1.load_item_force(cc->at(1));
-+  } else {
-+    BasicTypeList signature(1);
-+    signature.append(T_DOUBLE);
-+    cc = frame_map()->c_calling_convention(&signature);
-+    value.load_item_force(cc->at(0));
-   }
-+
-   switch (x->id()) {
-     case vmIntrinsics::_dexp:
-       if (StubRoutines::dexp() != NULL) { __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args()); }
-diff --git a/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
+ 
+     @Override
+     protected void verifyWarnings() throws Throwable {
+         String shouldPassMessage = String.format("JVM should start with "
+                 + "option '%s' without any warnings", optionName);
+-        // Verify that on non-x86, non-SPARC and non-AArch64 CPU usage of
++        // Verify that on non-x86, non-RISCV64, non-SPARC and non-AArch64 CPU usage of
+         //  SHA-related options will not cause any warnings.
+         CommandLineOptionTest.verifySameJVMStartup(null,
+                 new String[] { ".*" + optionName + ".*" }, shouldPassMessage,
+diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
 new file mode 100644
-index 00000000000..5c711efddea
+index 0000000000..8566d57c39
 --- /dev/null
-+++ b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
-@@ -0,0 +1,80 @@
++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
+@@ -0,0 +1,115 @@
 +/*
-+ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2022, Alibaba Group Holding Limited. All rights reserved.
++ * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -82025,422 +56882,462 @@ index 00000000000..5c711efddea
 + * questions.
 + */
 +
-+/*
-+ * @test
-+ * @summary Test libm intrinsics
-+ * @library /test/lib /
-+ *
-+ * @build jdk.test.whitebox.WhiteBox
-+ * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
-+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
-+ *                   -XX:-BackgroundCompilation -XX:-UseOnStackReplacement
-+ *                   compiler.floatingpoint.TestLibmIntrinsics
-+ */
-+
-+package compiler.floatingpoint;
-+
-+import compiler.whitebox.CompilerWhiteBoxTest;
-+import jdk.test.whitebox.WhiteBox;
-+
-+import java.lang.reflect.Method;
-+
-+public class TestLibmIntrinsics {
++package compiler.intrinsics.sha.cli.testcases;
 +
-+    private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox();
++import compiler.intrinsics.sha.cli.SHAOptionsBase;
++import jdk.test.lib.process.ExitCode;
++import jdk.test.lib.Platform;
++import jdk.test.lib.cli.CommandLineOptionTest;
++import jdk.test.lib.cli.predicate.AndPredicate;
++import jdk.test.lib.cli.predicate.NotPredicate;
 +
-+    private static final double pi = 3.1415926;
++/**
++ * Generic test case for SHA-related options targeted to RISCV64 CPUs
++ * which don't support instruction required by the tested option.
++ */
++public class GenericTestCaseForUnsupportedRISCV64CPU extends
++        SHAOptionsBase.TestCase {
 +
-+    private static final double expected = 2.5355263553695413;
++    final private boolean checkUseSHA;
 +
-+    static double m() {
-+        return Math.pow(pi, Math.sin(Math.cos(Math.tan(Math.log(Math.log10(Math.exp(pi)))))));
++    public GenericTestCaseForUnsupportedRISCV64CPU(String optionName) {
++        this(optionName, true);
 +    }
 +
-+    static public void main(String[] args) throws NoSuchMethodException {
-+        Method test_method = compiler.floatingpoint.TestLibmIntrinsics.class.getDeclaredMethod("m");
++    public GenericTestCaseForUnsupportedRISCV64CPU(String optionName, boolean checkUseSHA) {
++        super(optionName, new AndPredicate(Platform::isRISCV64,
++                new NotPredicate(SHAOptionsBase.getPredicateForOption(
++                        optionName))));
 +
-+        double interpreter_result = m();
++        this.checkUseSHA = checkUseSHA;
++    }
 +
-+        // Compile with C1 if possible
-+        WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_SIMPLE);
++    @Override
++    protected void verifyWarnings() throws Throwable {
++        String shouldPassMessage = String.format("JVM startup should pass with"
++                + "option '-XX:-%s' without any warnings", optionName);
++        //Verify that option could be disabled without any warnings.
++        CommandLineOptionTest.verifySameJVMStartup(null, new String[] {
++                        SHAOptionsBase.getWarningForUnsupportedCPU(optionName)
++                }, shouldPassMessage, shouldPassMessage, ExitCode.OK,
++                SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
++                CommandLineOptionTest.prepareBooleanFlag(optionName, false));
 +
-+        double c1_result = m();
++        if (checkUseSHA) {
++            shouldPassMessage = String.format("If JVM is started with '-XX:-"
++                    + "%s' '-XX:+%s', output should contain warning.",
++                    SHAOptionsBase.USE_SHA_OPTION, optionName);
 +
-+        WHITE_BOX.deoptimizeMethod(test_method);
++            // Verify that when the tested option is enabled, then
++            // a warning will occur in VM output if UseSHA is disabled.
++            if (!optionName.equals(SHAOptionsBase.USE_SHA_OPTION)) {
++                CommandLineOptionTest.verifySameJVMStartup(
++                        new String[] { SHAOptionsBase.getWarningForUnsupportedCPU(optionName) },
++                        null,
++                        shouldPassMessage,
++                        shouldPassMessage,
++                        ExitCode.OK,
++                        SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
++                        CommandLineOptionTest.prepareBooleanFlag(SHAOptionsBase.USE_SHA_OPTION, false),
++                        CommandLineOptionTest.prepareBooleanFlag(optionName, true));
++            }
++        }
++    }
 +
-+        // Compile it with C2 if possible
-+        WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION);
++    @Override
++    protected void verifyOptionValues() throws Throwable {
++        // Verify that option is disabled by default.
++        CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
++                String.format("Option '%s' should be disabled by default",
++                        optionName),
++                SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS);
 +
-+        double c2_result = m();
++        if (checkUseSHA) {
++            // Verify that option is disabled even if it was explicitly enabled
++            // using CLI options.
++            CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
++                    String.format("Option '%s' should be off on unsupported "
++                            + "RISCV64CPU even if set to true directly", optionName),
++                    SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
++                    CommandLineOptionTest.prepareBooleanFlag(optionName, true));
 +
-+        if (interpreter_result != c1_result ||
-+            interpreter_result != c2_result ||
-+            c1_result != c2_result) {
-+            System.out.println("interpreter = " + interpreter_result + " c1 = " + c1_result + " c2 = " + c2_result);
-+            throw new RuntimeException("Test Failed");
++            // Verify that option is disabled when +UseSHA was passed to JVM.
++            CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
++                    String.format("Option '%s' should be off on unsupported "
++                            + "RISCV64CPU even if %s flag set to JVM",
++                            optionName, CommandLineOptionTest.prepareBooleanFlag(
++                                  SHAOptionsBase.USE_SHA_OPTION, true)),
++                    SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
++                    CommandLineOptionTest.prepareBooleanFlag(
++                            SHAOptionsBase.USE_SHA_OPTION, true));
 +        }
 +    }
 +}
-
-From ec57f23aa4001315a030cacd55aa5ef7c3269fbb Mon Sep 17 00:00:00 2001
-From: Kuai Wei <kuaiwei.kw@alibaba-inc.com>
-Date: Mon, 9 Oct 2023 11:07:34 +0800
-Subject: [PATCH 138/140] Fix test error after port 8295926
-
----
- .../jtreg/compiler/floatingpoint/TestLibmIntrinsics.java    | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
-index 5c711efddea..5a1b659bbe0 100644
---- a/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
-+++ b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
-@@ -27,8 +27,8 @@
-  * @summary Test libm intrinsics
-  * @library /test/lib /
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
+index 2e3e2717a6..7be8af6d03 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
-- * @build jdk.test.whitebox.WhiteBox
-- * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
-+ * @build sun.hotspot.WhiteBox
-+ * @run driver ClassFileInstaller sun.hotspot.WhiteBox
-  * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
-  *                   -XX:-BackgroundCompilation -XX:-UseOnStackReplacement
-  *                   compiler.floatingpoint.TestLibmIntrinsics
-@@ -37,7 +37,7 @@
- package compiler.floatingpoint;
- 
- import compiler.whitebox.CompilerWhiteBoxTest;
--import jdk.test.whitebox.WhiteBox;
-+import sun.hotspot.WhiteBox;
- 
- import java.lang.reflect.Method;
- 
-
-From b115ec4381ad3ad8cbe9ca3d225cb438538916ac Mon Sep 17 00:00:00 2001
-From: Kuai Wei <kuaiwei.kw@alibaba-inc.com>
-Date: Tue, 17 Oct 2023 14:22:49 +0800
-Subject: [PATCH 139/140] Revert JDK-8247533: SA stack walking sometimes fails
- with sun.jvm.hotspot.debugger.DebuggerException: get_thread_regs failed for a
- lwp
-
----
- .../native/libsaproc/LinuxDebuggerLocal.c     |  8 +------
- .../linux/native/libsaproc/ps_proc.c          |  3 +--
- .../native/libsaproc/MacosxDebuggerLocal.m    | 24 +++++++------------
- .../debugger/bsd/BsdDebuggerLocal.java        |  2 +-
- .../jvm/hotspot/debugger/bsd/BsdThread.java   | 10 +++-----
- .../debugger/linux/LinuxDebuggerLocal.java    |  2 +-
- .../hotspot/debugger/linux/LinuxThread.java   | 10 +++-----
- .../windbg/amd64/WindbgAMD64Thread.java       | 15 ++++--------
- .../windows/native/libsaproc/sawindbg.cpp     | 14 +++--------
- 9 files changed, 27 insertions(+), 61 deletions(-)
-
-diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
-index 6f1887f8113..45a927fb5ee 100644
---- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
-+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
-@@ -413,13 +413,7 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
- 
-   struct ps_prochandle* ph = get_proc_handle(env, this_obj);
-   if (get_lwp_regs(ph, lwp_id, &gregs) != true) {
--    // This is not considered fatal and does happen on occassion, usually with an
--    // ESRCH error. The root cause is not fully understood, but by ignoring this error
--    // and returning NULL, stacking walking code will get null registers and fallback
--    // to using the "last java frame" if setup.
--    fprintf(stdout, "WARNING: getThreadIntegerRegisterSet0: get_lwp_regs failed for lwp (%d)\n", lwp_id);
--    fflush(stdout);
--    return NULL;
-+     THROW_NEW_DEBUGGER_EXCEPTION_("get_thread_regs failed for a lwp", 0);
-   }
- 
- #undef NPRGREG
-diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
-index 691c3f6684a..de5254d859e 100644
---- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
-+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
-@@ -144,8 +144,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8074981
+  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test
+- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
+  *      -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
+index 0e06a9e432..797927b42b 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8074981
+  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test
+- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
+  *      -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
+index c3cdbf3746..be8f7d586c 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8074981
+  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test
+- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
+  *      -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
+index d33bd411f1..d96d5e29c0 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8074981
+  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test
+- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions
+  *      -XX:LoopUnrollLimit=250 -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
+index 992fa4b516..b09c873d05 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8138583
+  * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : double abs & neg test
+- * @requires os.arch=="aarch64"
++ * @requires os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
+  *      -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
+index 3e79b3528b..fe40ed6f98 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8138583
+  * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : float abs & neg test
+- * @requires os.arch=="aarch64"
++ * @requires os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
+  *      -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
+index 6603dd224e..5163191049 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8135028
+  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double sqrt test
+- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
+  *      -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
+index d9a0c98800..d999ae423c 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8074981
+  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double test
+- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
+  *      -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
+index 722db95aed..65912a5c7f 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8074981
+  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : float test
+- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
+  *      -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
+index f58f21feb2..fffdc2f756 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8074981
+  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : int test
+- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
+  *      -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
+index acb86812d2..2c866f26f0 100644
+--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
++++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
+@@ -24,7 +24,7 @@
  
- #ifdef PTRACE_GETREGS_REQ
-  if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) {
--   print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp(%d) errno(%d) \"%s\"\n", pid,
--               errno, strerror(errno));
-+   print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp %d\n", pid);
-    return false;
-  }
-  return true;
-diff --git a/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m b/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m
-index e46370a1f18..18b8b4282fe 100644
---- a/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m
-+++ b/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m
-@@ -685,7 +685,7 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo
-   JNIEnv *env, jobject this_obj,
-   jlong thread_id)
- {
--  print_debug("getThreadIntegerRegisterSet0 called\n");
-+  print_debug("getThreadRegisterSet0 called\n");
+ /* @test
+  * @bug 8167409
+- * @requires (os.arch != "aarch64") & (os.arch != "arm")
++ * @requires (os.arch != "aarch64") & (os.arch != "riscv64") & (os.arch != "arm")
+  * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.argumentcorruption.CheckLongArgs
+  */
+ package compiler.runtime.criticalnatives.argumentcorruption;
+diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
+index eab36f9311..1da369fde2 100644
+--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
++++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
+@@ -24,7 +24,7 @@
  
-   struct ps_prochandle* ph = get_proc_handle(env, this_obj);
-   if (ph != NULL && ph->core != NULL) {
-@@ -705,13 +705,7 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo
-   result = thread_get_state(tid, HSDB_THREAD_STATE, (thread_state_t)&state, &count);
+ /* @test
+  * @bug 8167408
+- * @requires (os.arch != "aarch64") & (os.arch != "arm")
++ * @requires (os.arch != "aarch64") & (os.arch != "riscv64") & (os.arch != "arm")
+  * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.lookup.LookUp
+  */
+ package compiler.runtime.criticalnatives.lookup;
+diff --git a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
+index 7774dabcb5..7afe3560f3 100644
+--- a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
++++ b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -61,15 +61,17 @@ public class IntrinsicPredicates {
  
-   if (result != KERN_SUCCESS) {
--    // This is not considered fatal. Unlike on Linux and Windows, we haven't seen a
--    // failure to get thread registers, but if it were to fail the response should
--    // be the same. By ignoring this error and returning NULL, stacking walking code
--    // will get null registers and fallback to using the "last java frame" if setup.
--    fprintf(stdout, "WARNING: getThreadIntegerRegisterSet0: thread_get_state failed (%d) for thread (%d)\n",
--            result, tid);
--    fflush(stdout);
-+    print_error("getregs: thread_get_state(%d) failed (%d)\n", tid, result);
-     return NULL;
-   }
+     public static final BooleanSupplier SHA1_INSTRUCTION_AVAILABLE
+             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha1" }, null),
++              new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha1" }, null),
+               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha1" }, null),
+               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha1" }, null),
+               // x86 variants
+               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "sha" },  null),
+               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha" },  null),
+-                              new CPUSpecificPredicate("x86.*",     new String[] { "sha" },  null))))));
++                              new CPUSpecificPredicate("x86.*",     new String[] { "sha" },  null)))))));
  
-@@ -814,25 +808,25 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo
-  */
- JNIEXPORT jint JNICALL
- Java_sun_jvm_hotspot_debugger_macosx_MacOSXDebuggerLocal_translateTID0(
--  JNIEnv *env, jobject this_obj, jint tid)
-+  JNIEnv *env, jobject this_obj, jint tid) 
- {
-   print_debug("translateTID0 called on tid = 0x%x\n", (int)tid);
+     public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE
+             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha256"       }, null),
++              new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha256"       }, null),
+               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha256"       }, null),
+               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha256"       }, null),
+               new OrPredicate(new CPUSpecificPredicate("ppc64.*",   new String[] { "sha"          }, null),
+@@ -79,10 +81,11 @@ public class IntrinsicPredicates {
+               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha"          }, null),
+               new OrPredicate(new CPUSpecificPredicate("x86.*",     new String[] { "sha"          }, null),
+               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "avx2", "bmi2" }, null),
+-                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null))))))))));
++                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null)))))))))));
  
-   kern_return_t result;
-   thread_t foreign_tid, usable_tid;
-   mach_msg_type_name_t type;
--
-+  
-   foreign_tid = tid;
--
-+    
-   task_t gTask = getTask(env, this_obj);
--  result = mach_port_extract_right(gTask, foreign_tid,
--				   MACH_MSG_TYPE_COPY_SEND,
-+  result = mach_port_extract_right(gTask, foreign_tid, 
-+				   MACH_MSG_TYPE_COPY_SEND, 
- 				   &usable_tid, &type);
-   if (result != KERN_SUCCESS)
-     return -1;
--
-+    
-   print_debug("translateTID0: 0x%x -> 0x%x\n", foreign_tid, usable_tid);
--
-+    
-   return (jint) usable_tid;
- }
+     public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE
+             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha512"       }, null),
++              new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha512"       }, null),
+               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha512"       }, null),
+               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha512"       }, null),
+               new OrPredicate(new CPUSpecificPredicate("ppc64.*",   new String[] { "sha"          }, null),
+@@ -92,7 +95,7 @@ public class IntrinsicPredicates {
+               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha"          }, null),
+               new OrPredicate(new CPUSpecificPredicate("x86.*",     new String[] { "sha"          }, null),
+               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "avx2", "bmi2" }, null),
+-                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null))))))))));
++                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null)))))))))));
  
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java
-index d0557a7d254..655b450c3fc 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java
-@@ -166,7 +166,7 @@ public WorkerThreadTask execute(WorkerThreadTask task) throws DebuggerException
-                 } catch (InterruptedException x) {}
-              }
-              if (lastException != null) {
--                throw new DebuggerException(lastException.getMessage(), lastException);
-+                throw new DebuggerException(lastException);
-              } else {
-                 return task;
-              }
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java
-index c52d3a51d54..0d637f30f14 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java
+     public static final BooleanSupplier ANY_SHA_INSTRUCTION_AVAILABLE
+             = new OrPredicate(IntrinsicPredicates.SHA1_INSTRUCTION_AVAILABLE,
+diff --git a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
+index 57256aa5a3..d4d43b01ae 100644
+--- a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
++++ b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
 @@ -1,5 +1,5 @@
  /*
-- * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -67,12 +67,8 @@ public String toString() {
-     public ThreadContext getContext() throws IllegalThreadStateException {
-         long[] data = debugger.getThreadIntegerRegisterSet(unique_thread_id);
-         ThreadContext context = BsdThreadContextFactory.createThreadContext(debugger);
--        // null means we failed to get the register set for some reason. The caller
--        // is responsible for dealing with the set of null registers in that case.
--        if (data != null) {
--            for (int i = 0; i < data.length; i++) {
--                context.setRegister(i, data[i]);
--            }
-+        for (int i = 0; i < data.length; i++) {
-+            context.setRegister(i, data[i]);
-         }
-         return context;
-     }
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java
-index 6a0648f508a..cb6712b58ee 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java
-@@ -173,7 +173,7 @@ public WorkerThreadTask execute(WorkerThreadTask task) throws DebuggerException
-                 } catch (InterruptedException x) {}
-              }
-              if (lastException != null) {
--                throw new DebuggerException(lastException.getMessage(), lastException);
-+                throw new DebuggerException(lastException);
-              } else {
-                 return task;
-              }
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java
-index 3fe795d34bc..52307b9cdcf 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java
+@@ -112,7 +112,7 @@ public class CheckForProperDetailStackTrace {
+             // It's ok for ARM not to have symbols, because it does not support NMT detail
+             // when targeting thumb2. It's also ok for Windows not to have symbols, because
+             // they are only available if the symbols file is included with the build.
+-            if (Platform.isWindows() || Platform.isARM()) {
++            if (Platform.isWindows() || Platform.isARM() || Platform.isRISCV64()) {
+                 return; // we are done
+             }
+             output.reportDiagnosticSummary();
+diff --git a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
+index 127bb6abcd..eab19273ad 100644
+--- a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
++++ b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
 @@ -1,5 +1,5 @@
  /*
-- * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -73,12 +73,8 @@ public String toString() {
-     public ThreadContext getContext() throws IllegalThreadStateException {
-         long[] data = debugger.getThreadIntegerRegisterSet(lwp_id);
-         ThreadContext context = LinuxThreadContextFactory.createThreadContext(debugger);
--        // null means we failed to get the register set for some reason. The caller
--        // is responsible for dealing with the set of null registers in that case.
--        if (data != null) {
--            for (int i = 0; i < data.length; i++) {
--                context.setRegister(i, data[i]);
--            }
-+        for (int i = 0; i < data.length; i++) {
-+            context.setRegister(i, data[i]);
-         }
-         return context;
+@@ -239,7 +239,7 @@ public class ReservedStackTest {
+         return Platform.isAix() ||
+             (Platform.isLinux() &&
+              (Platform.isPPC() || Platform.isS390x() || Platform.isX64() ||
+-              Platform.isX86())) ||
++              Platform.isX86() || Platform.isRISCV64())) ||
+             Platform.isOSX() ||
+             Platform.isSolaris();
      }
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java
-index 377650a0a1c..ec5aea35e8c 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java
+diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
+index 126a43a900..feb4de5388 100644
+--- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
++++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
+@@ -45,7 +45,7 @@ import java.util.Set;
+  */
+ public class TestMutuallyExclusivePlatformPredicates {
+     private static enum MethodGroup {
+-        ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"),
++        ARCH("isAArch64", "isARM", "isRISCV64", "isPPC", "isS390x", "isSparc", "isX64", "isX86"),
+         BITNESS("is32bit", "is64bit"),
+         OS("isAix", "isLinux", "isOSX", "isSolaris", "isWindows"),
+         VM_TYPE("isClient", "isServer", "isGraal", "isMinimal", "isZero", "isEmbedded"),
+diff --git a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
+index 7990c49a1f..abeff80e5e 100644
+--- a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
++++ b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
 @@ -1,5 +1,5 @@
  /*
-- * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2013, 2022, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -30,9 +30,9 @@
- 
- class WindbgAMD64Thread implements ThreadProxy {
-   private WindbgDebugger debugger;
--  private long           sysId; // SystemID for Windows thread, stored in OSThread::_thread_id
-+  private long           sysId;
-   private boolean        gotID;
--  private long           id;    // ThreadID for Windows thread,  returned by GetThreadIdBySystemId
-+  private long           id;
- 
-   // The address argument must be the address of the OSThread::_thread_id
-   WindbgAMD64Thread(WindbgDebugger debugger, Address addr) {
-@@ -50,12 +50,8 @@ class WindbgAMD64Thread implements ThreadProxy {
-   public ThreadContext getContext() throws IllegalThreadStateException {
-     long[] data = debugger.getThreadIntegerRegisterSet(getThreadID());
-     WindbgAMD64ThreadContext context = new WindbgAMD64ThreadContext(debugger);
--    // null means we failed to get the register set for some reason. The caller
--    // is responsible for dealing with the set of null registers in that case.
--    if (data != null) {
--        for (int i = 0; i < data.length; i++) {
--            context.setRegister(i, data[i]);
--        }
-+    for (int i = 0; i < data.length; i++) {
-+      context.setRegister(i, data[i]);
-     }
-     return context;
-   }
-@@ -90,7 +86,6 @@ public String toString() {
-   private long getThreadID() {
-     if (!gotID) {
-        id = debugger.getThreadIdFromSysId(sysId);
--       gotID = true;
+@@ -54,8 +54,8 @@ public class TestCPUInformation {
+             Events.assertField(event, "hwThreads").atLeast(1);
+             Events.assertField(event, "cores").atLeast(1);
+             Events.assertField(event, "sockets").atLeast(1);
+-            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390");
+-            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390");
++            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390");
++            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390");
+         }
      }
- 
-     return id;
-diff --git a/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp b/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp
-index e3b218b4dae..314cf69c957 100644
---- a/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp
-+++ b/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp
-@@ -45,7 +45,6 @@
- 
- #include <limits.h>
- #include <windows.h>
--#include <inttypes.h>
- 
- #define DEBUG_NO_IMPLEMENTATION
- #include <dbgeng.h>
-@@ -766,16 +765,9 @@ JNIEXPORT jlong JNICALL Java_sun_jvm_hotspot_debugger_windbg_WindbgDebuggerLocal
-   CHECK_EXCEPTION_(0);
- 
-   ULONG id = 0;
--  HRESULT hr = ptrIDebugSystemObjects->GetThreadIdBySystemId((ULONG)sysId, &id);
--  if (hr != S_OK) {
--    // This is not considered fatal and does happen on occassion, usually with an
--    // 0x80004002 "No such interface supported". The root cause is not fully understood,
--    // but by ignoring this error and returning NULL, stacking walking code will get
--    // null registers and fallback to using the "last java frame" if setup.
--   printf("WARNING: GetThreadIdBySystemId failed with 0x%x for sysId (%" PRIu64 ")\n",
--           hr, sysId);
--    return -1;
--  }
-+  COM_VERIFY_OK_(ptrIDebugSystemObjects->GetThreadIdBySystemId((ULONG)sysId, &id),
-+                 "Windbg Error: GetThreadIdBySystemId failed!", 0);
-+
-   return (jlong) id;
  }
+diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java
+index 6269373c2b..e1511772e7 100644
+--- a/test/lib/jdk/test/lib/Platform.java
++++ b/test/lib/jdk/test/lib/Platform.java
+@@ -205,6 +205,10 @@ public class Platform {
+         return isArch("arm.*");
+     }
  
-
-From 4b01e13731fc330ca3d57a5cd532c91bc66579c8 Mon Sep 17 00:00:00 2001
-From: Kuai Wei <kuaiwei.kw@alibaba-inc.com>
-Date: Wed, 31 Jan 2024 17:26:31 +0800
-Subject: [PATCH 140/140] Remove unused zSyscall_linux_riscv.hpp
-
----
- .../linux_riscv/gc/z/zSyscall_linux_riscv.hpp | 42 -------------------
- 1 file changed, 42 deletions(-)
- delete mode 100644 src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp
-
-diff --git a/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp
-deleted file mode 100644
-index 1aa58f27871..00000000000
---- a/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp
-+++ /dev/null
-@@ -1,42 +0,0 @@
--/*
-- * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#ifndef OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP
--#define OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP
--
--#include <sys/syscall.h>
--
--//
--// Support for building on older Linux systems
--//
--
--#ifndef SYS_memfd_create
--#define SYS_memfd_create     279
--#endif
--#ifndef SYS_fallocate
--#define SYS_fallocate        47
--#endif
--
--#endif // OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP
++    public static boolean isRISCV64() {
++        return isArch("riscv64");
++    }
++
+     public static boolean isPPC() {
+         return isArch("ppc.*");
+     }
diff --git a/LoongArch64-support.patch b/LoongArch64-support.patch
index bf78938519963d04f67592ed50d962f0cee255e9..9c2bb8354556a1dcd1cbf01de61e1125db177f12 100644
--- a/LoongArch64-support.patch
+++ b/LoongArch64-support.patch
@@ -38046,13 +38046,13 @@ index 0000000000..49302590c3
 +#endif // CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP
 diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp
 new file mode 100644
-index 0000000000..3ed4c36651
+index 0000000000..6e27a69747
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp
-@@ -0,0 +1,1625 @@
+@@ -0,0 +1,1626 @@
 +/* Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2018, Cavium. All rights reserved. (By BELLSOFT)
-+ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * Copyright (c) 2022, 2024, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -38951,7 +38951,7 @@ index 0000000000..3ed4c36651
 +      b(Q_DONE);
 +    bind(JX_IS_0);
 +      if (UseLASX) {
-+        xvfmul_d(v28, v18, v6);                // f[0,1] * x[0]
++        xvfmul_d(v28, v18, v6);                // f[0,3] * x[0]
 +        fmul_d(v30, v19, v6);                  // f[4] * x[0]
 +      } else {
 +        vfmul_d(v28, v18, v6);                 // f[0,1] * x[0]
@@ -39180,6 +39180,7 @@ index 0000000000..3ed4c36651
 +          st_w(tmp2, SCR2, 0);
 +          addi_w(SCR1, SCR1, 24);
 +          addi_w(jz, jz, 1);
++          alsl_d(SCR2, jz, iqBase, 2 - 1);
 +          st_w(tmp3, SCR2, 0);                               // iq[jz] = (int) fw
 +          b(Z_ZERO_CHECK_DONE);
 +        bind(Z_IS_LESS_THAN_TWO24B);
@@ -104792,7 +104793,7 @@ index 3687754e71..791e4ed43f 100644
    void generate_c1_load_barrier_stub(LIR_Assembler* ce,
                                       ZLoadBarrierStubC1* stub) const;
 diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
-index 0af357ea56..66a8006780 100644
+index 2842a11f92..4f58ec4be3 100644
 --- a/src/hotspot/os/linux/os_linux.cpp
 +++ b/src/hotspot/os/linux/os_linux.cpp
 @@ -23,6 +23,12 @@
@@ -104808,7 +104809,7 @@ index 0af357ea56..66a8006780 100644
  // no precompiled headers
  #include "jvm.h"
  #include "classfile/classLoader.hpp"
-@@ -4068,6 +4074,8 @@ size_t os::Linux::find_large_page_size() {
+@@ -4060,6 +4066,8 @@ size_t os::Linux::find_large_page_size() {
      IA64_ONLY(256 * M)
      PPC_ONLY(4 * M)
      S390_ONLY(1 * M)
@@ -110338,7 +110339,7 @@ index ce23aafa8f..d3dfb74d5b 100644
    assert(_owner != Self, "invariant");
    assert(_Responsible != Self, "invariant");
 diff --git a/src/hotspot/share/runtime/os.cpp b/src/hotspot/share/runtime/os.cpp
-index e0f4a2af1f..09cc4b1ba5 100644
+index 1c540bb621..0e44240d40 100644
 --- a/src/hotspot/share/runtime/os.cpp
 +++ b/src/hotspot/share/runtime/os.cpp
 @@ -22,6 +22,12 @@
@@ -110672,7 +110673,7 @@ index 8318e8e021..07064e76ee 100644
  // This C bool type must be int for compatibility with Linux calls and
  // it would be a mistake to equivalence it to C++ bool on many platforms
 diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
-index de5254d859..eefe55959c 100644
+index c22b5d1cb3..36d6343960 100644
 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
 +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
 @@ -22,6 +22,12 @@
@@ -110688,12 +110689,12 @@ index de5254d859..eefe55959c 100644
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
-@@ -142,7 +148,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use
- #define PTRACE_GETREGS_REQ PT_GETREGS
- #endif
- 
--#ifdef PTRACE_GETREGS_REQ
-+#if defined(PTRACE_GETREGS_REQ) && !defined(loongarch64)
+@@ -151,7 +157,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use
+    return false;
+  }
+  return true;
+-#elif defined(PTRACE_GETREGS_REQ)
++#elif defined(PTRACE_GETREGS_REQ) && !defined(loongarch64)
   if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) {
     print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp %d\n", pid);
     return false;
@@ -116585,7 +116586,7 @@ index 127bb6abcd..c9277604ae 100644
              Platform.isSolaris();
      }
 diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
-index 77458554b7..05aee6b84c 100644
+index 126a43a900..55bd135f6e 100644
 --- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
 +++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
 @@ -45,7 +45,7 @@ import java.util.Set;
@@ -116625,35 +116626,8 @@ index 7990c49a1f..025048c6b0 100644
          }
      }
  }
-diff --git a/test/jdk/sun/security/pkcs11/PKCS11Test.java b/test/jdk/sun/security/pkcs11/PKCS11Test.java
-index b14daf6c6d..da33514c75 100644
---- a/test/jdk/sun/security/pkcs11/PKCS11Test.java
-+++ b/test/jdk/sun/security/pkcs11/PKCS11Test.java
-@@ -21,6 +21,12 @@
-  * questions.
-  */
- 
-+/*
-+ * This file has been modified by Loongson Technology in 2022, These
-+ * modifications are Copyright (c) 2021, 2022, Loongson Technology, and are made
-+ * available on the same license terms set forth above.
-+ */
-+
- // common infrastructure for SunPKCS11 tests
- 
- import java.io.BufferedReader;
-@@ -747,6 +753,9 @@ public abstract class PKCS11Test {
-                 "/usr/lib64/" });
-         osMap.put("Linux-ppc64-64", new String[] { "/usr/lib64/" });
-         osMap.put("Linux-ppc64le-64", new String[] { "/usr/lib64/" });
-+        osMap.put("Linux-mips64el-64", new String[]{"/usr/lib64/"});
-+        osMap.put("Linux-loongarch64-64", new String[]{"/usr/lib/loongarch64-linux-gnu/",
-+                "/usr/lib64/" });
-         osMap.put("Linux-s390x-64", new String[] { "/usr/lib64/" });
-         osMap.put("Windows-x86-32", new String[] {});
-         osMap.put("Windows-amd64-64", new String[] {});
 diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java
-index 5b3f1889cb..aaf8867a7c 100644
+index 6269373c2b..440ec4664f 100644
 --- a/test/lib/jdk/test/lib/Platform.java
 +++ b/test/lib/jdk/test/lib/Platform.java
 @@ -21,6 +21,12 @@
@@ -116668,8 +116642,8 @@ index 5b3f1889cb..aaf8867a7c 100644
 +
  package jdk.test.lib;
  
- import java.io.FileNotFoundException;
-@@ -226,6 +232,14 @@ public class Platform {
+ import java.io.BufferedReader;
+@@ -229,6 +235,14 @@ public class Platform {
          return isArch("(i386)|(x86(?!_64))");
      }
  
diff --git a/delete_expired_certificates.patch b/delete_expired_certificates.patch
index 3e654b7774f145bca6cee0a764b16d15dd779cde..04e75c0b71888621ca579be0cc487ee38509d23f 100644
--- a/delete_expired_certificates.patch
+++ b/delete_expired_certificates.patch
@@ -116,7 +116,7 @@ diff --git a/test/jdk/sun/security/lib/cacerts/VerifyCACerts.java b/test/jdk/sun
 index 122a01901..c131bd493 100644
 --- a/test/jdk/sun/security/lib/cacerts/VerifyCACerts.java
 +++ b/test/jdk/sun/security/lib/cacerts/VerifyCACerts.java
-@@ -47,12 +47,12 @@ public class VerifyCACerts {
+@@ -48,12 +48,12 @@ public class VerifyCACerts {
              + File.separator + "security" + File.separator + "cacerts";
  
      // The numbers of certs now.
@@ -131,8 +131,7 @@ index 122a01901..c131bd493 100644
 
      // map of cert alias to SHA-256 fingerprint
      @SuppressWarnings("serial")
-     private static final Map<String, String> FINGERPRINT_MAP = new HashMap<>() {
-@@ -109,8 +109,6 @@ public class VerifyCACerts {
+@@ -111,8 +111,6 @@ public class VerifyCACerts {
                      "7E:37:CB:8B:4C:47:09:0C:AB:36:55:1B:A6:F4:5D:B8:40:68:0F:BA:16:6A:95:2D:B1:00:71:7F:43:05:3F:C2");
              put("digicerthighassuranceevrootca [jdk]",
                      "74:31:E5:F4:C3:C1:CE:46:90:77:4F:0B:61:E0:54:40:88:3B:A9:A0:1E:D0:0B:A6:AB:D7:80:6E:D3:B1:18:CF");
@@ -141,7 +140,7 @@ index 122a01901..c131bd493 100644
              put("geotrustprimaryca [jdk]",
                      "37:D5:10:06:C5:12:EA:AB:62:64:21:F1:EC:8C:92:01:3F:C5:F8:2A:E9:8E:E5:33:EB:46:19:B8:DE:B4:D0:6C");
              put("geotrustprimarycag2 [jdk]",
-@@ -145,10 +143,6 @@ public class VerifyCACerts {
+@@ -147,10 +145,6 @@ public class VerifyCACerts {
                      "96:BC:EC:06:26:49:76:F3:74:60:77:9A:CF:28:C5:A7:CF:E8:A3:C0:AA:E1:1A:8F:FC:EE:05:C0:BD:DF:08:C6");
              put("letsencryptisrgx2 [jdk]",
                      "69:72:9B:8E:15:A8:6E:FC:17:7A:57:AF:B7:17:1D:FC:64:AD:D2:8C:2F:CA:8C:F1:50:7E:34:45:3C:CB:14:70");
@@ -152,7 +151,7 @@ index 122a01901..c131bd493 100644
              put("quovadisrootca1g3 [jdk]",
                      "8A:86:6F:D1:B2:76:B5:7E:57:8E:92:1C:65:82:8A:2B:ED:58:E9:F2:F2:88:05:41:34:B7:F1:F4:BF:C9:CC:74");
              put("quovadisrootca2 [jdk]",
-@@ -282,12 +276,6 @@ public class VerifyCACerts {
+@@ -292,12 +286,6 @@ public class VerifyCACerts {
              add("addtrustexternalca [jdk]");
              // Valid until: Sat May 30 10:44:50 GMT 2020
              add("addtrustqualifiedca [jdk]");
diff --git a/openjdk-11.spec b/openjdk-11.spec
index 3b4b978d0117d460220a5e5c47b7bc8c5c5cf565..9c7776aac3f57a4f1d083ed71dafd10f62d691b9 100644
--- a/openjdk-11.spec
+++ b/openjdk-11.spec
@@ -762,7 +762,7 @@ Provides: java-src%{?1} = %{epoch}:%{version}-%{release}
 
 Name:    java-%{javaver}-%{origin}
 Version: %{newjavaver}.%{buildver}
-Release: 0
+Release: 6
 # java-1.5.0-ibm from jpackage.org set Epoch to 1 for unknown reasons
 # and this change was brought into RHEL-4. java-1.5.0-ibm packages
 # also included the epoch in their virtual provides. This created a
@@ -791,7 +791,7 @@ Group:   Development/Languages
 # The test code includes copies of NSS under the Mozilla Public License v2.0
 # The PCSClite headers are under a BSD with advertising license
 # The elliptic curve cryptography (ECC) source code is licensed under the LGPLv2.1 or any later version
-License:  ASL 1.1 and ASL 2.0 and BSD and BSD with advertising and GPL+ and GPLv2 and GPLv2 with exceptions and IJG and LGPLv2+ and MIT and MPLv2.0 and Public Domain and W3C and zlib and ISC and FTL and RSA
+License:  ASL 1.1 and ASL 2.0 and BSD and BSD with advertising and GPL+ and GPLv2 and GPLv2 with exceptions and IJG and LGPLv2+ and MIT and MPLv2.0 and Public Domain and W3C and zlib and ISC and FTL and RSA-MD
 URL:      http://openjdk.java.net/
 
 
@@ -1588,9 +1588,10 @@ else
   end
 end
 -- run content of included file with fake args
+arg = nil; -- it is better to null the arg up, no meter if they exists or not, and use cjc as module in unified way, instead of relaying on "main" method during require "copy_jdk_configs.lua"
 cjc = require "copy_jdk_configs.lua"
-arg = {"--currentjvm", "%{uniquesuffix %{nil}}", "--jvmdir", "%{_jvmdir %{nil}}", "--origname", "%{name}", "--origjavaver", "%{javaver}", "--arch", "%{_arch}", "--temp", "%{rpm_state_dir}/%{name}.%{_arch}"}
-cjc.mainProgram(arg)
+args = {"--currentjvm", "%{uniquesuffix %{nil}}", "--jvmdir", "%{_jvmdir %{nil}}", "--origname", "%{name}", "--origjavaver", "%{javaver}", "--arch", "%{_arch}", "--temp", "%{rpm_state_dir}/%{name}.%{_arch}"}
+cjc.mainProgram(args) -- the returns from copy_jdk_configs.lua should not affect this 'main', so it should run under all circumstances, except fatal error
 
 %post
 %{post_script %{nil}}
@@ -1715,7 +1716,25 @@ cjc.mainProgram(arg)
 
 
 %changelog
-* Thu July 18 2024 DXwangg <wangjiawei80@huawei.com> - 1.11.0.24.8-0
+* Fri Aug 30 2024 songliyang <songliyang@kylinos.cn> - 1.11.0.24.8-6
+- update License
+
+* Thu Aug 1 2024 aoqi <aoqi@loongson.cn> - 1.11.0.24.8-5
+- update LoongArch64 port to 11.0.24
+
+* Thu July 29 2024 DXwangg <wangjiawei80@huawei.com> - 1.11.0.24.8-4
+- modified delete_expired_certificates.patch
+
+* Thu Jul 25 2024 songliyang <songliyang@kylinos.cn> - 1.11.0.24.8-3
+- update Loongarch support patch to fix the error while applying in prep stage
+
+* Tue Jul 23 2024 songliyang <songliyang@kylinos.cn> - 1.11.0.24.8-2
+- null the arg to solve openjdk-headless install error
+
+* Thu Jul 18 2024 Dingli Zhang <dingli@iscas.ac.cn> - 1.11.0.24.8-1
+- update riscv64 port to 11.0.24
+
+* Thu Jul 18 2024 DXwangg <wangjiawei80@huawei.com> - 1.11.0.24.8-0
 - update to 11.0.24+8(GA)
 
 * Thu Jun 20 2024 aoqi <aoqi@loongson.cn> - 1.11.0.23.9-6